From edb13ca36259a5a298766e27e9d0c3e625138ff5 Mon Sep 17 00:00:00 2001
From: Annick Renevey <47788523+rannick@users.noreply.github.com>
Date: Thu, 17 Oct 2024 12:06:25 +0200
Subject: [PATCH] Add upload for raredisease to genotype (#3784)

### Added

- Upload of raredisease cases to genotype

### Changed

- Add _mip suffix to mip related genotype functions
---
 cg/cli/upload/genotype.py                     |   5 +-
 cg/constants/constants.py                     |   2 +
 cg/constants/housekeeper_tags.py              |   8 +
 cg/constants/nf_analysis.py                   |   4 +-
 cg/meta/upload/gt.py                          | 191 ++++++++++++------
 tests/conftest.py                             |   6 +
 .../case_metrics_deliverables.yaml            | 128 ++++++++++++
 tests/meta/upload/balsamic/test_balsamic.py   |   6 +-
 .../meta/upload/test_upload_genotypes_api.py  |  93 +++++++--
 9 files changed, 354 insertions(+), 89 deletions(-)
 create mode 100644 tests/fixtures/apps/raredisease/case_metrics_deliverables.yaml

diff --git a/cg/cli/upload/genotype.py b/cg/cli/upload/genotype.py
index f7e2c01f38..ee79a25641 100644
--- a/cg/cli/upload/genotype.py
+++ b/cg/cli/upload/genotype.py
@@ -31,15 +31,14 @@ def upload_genotypes(context: CGConfig, re_upload: bool, family_id: str | None):
     status_db: Store = context.status_db
     housekeeper_api: HousekeeperAPI = context.housekeeper_api
     genotype_api: GenotypeAPI = context.genotype_api
-
     click.echo(click.style("----------------- GENOTYPES -------------------"))
 
     if not family_id:
         suggest_cases_to_upload(status_db=status_db)
         raise click.Abort
-    case_obj: Case = status_db.get_case_by_internal_id(internal_id=family_id)
+    case: Case = status_db.get_case_by_internal_id(internal_id=family_id)
     upload_genotypes_api = UploadGenotypesAPI(hk_api=housekeeper_api, gt_api=genotype_api)
-    results: dict = upload_genotypes_api.data(case_obj.analyses[0])
+    results: dict = upload_genotypes_api.get_genotype_data(case.analyses[0])
 
     if not results:
         LOG.warning("Could not find any results to upload")
diff --git a/cg/constants/constants.py b/cg/constants/constants.py
index ed03ebd6d9..594a229324 100644
--- a/cg/constants/constants.py
+++ b/cg/constants/constants.py
@@ -197,6 +197,7 @@ class HastaSlurmPartitions(StrEnum):
 
 class FileExtensions(StrEnum):
     BAM: str = ".bam"
+    BCF: str = ".bcf"
     BED: str = ".bed"
     COMPLETE: str = ".complete"
     CONFIG: str = ".config"
@@ -224,6 +225,7 @@ class FileExtensions(StrEnum):
     TSV: str = ".tsv"
     TXT: str = ".txt"
     VCF: str = ".vcf"
+    VCF_GZ: str = ".vcf.gz"
     XLSX: str = ".xlsx"
     XML: str = ".xml"
     YAML: str = ".yaml"
diff --git a/cg/constants/housekeeper_tags.py b/cg/constants/housekeeper_tags.py
index 338700a7f6..d6d9a07d56 100644
--- a/cg/constants/housekeeper_tags.py
+++ b/cg/constants/housekeeper_tags.py
@@ -96,11 +96,19 @@ class BalsamicAnalysisTag:
     QC_METRICS: list[str] = ["qc-metrics", "deliverable"]
 
 
+class HkAnalysisMetricsTag:
+    QC_METRICS: set[str] = {"qc-metrics", "deliverable"}
+
+
 class GensAnalysisTag:
     COVERAGE: list[str] = ["gens", "coverage", "bed"]
     FRACSNP: list[str] = ["gens", "fracsnp", "bed"]
 
 
+class GenotypeAnalysisTag:
+    GENOTYPE: str = "genotype"
+
+
 class BalsamicProtectedTags:
     """Balsamic workflow protected tags by type."""
 
diff --git a/cg/constants/nf_analysis.py b/cg/constants/nf_analysis.py
index 09e5111b6b..24a3a11458 100644
--- a/cg/constants/nf_analysis.py
+++ b/cg/constants/nf_analysis.py
@@ -15,13 +15,15 @@ class NfTowerStatus(StrEnum):
     UNKNOWN: str = "UNKNOWN"
 
 
+RAREDISEASE_PREDICTED_SEX_METRIC = "predicted_sex_sex_check"
+
 RAREDISEASE_METRIC_CONDITIONS: dict[str, dict[str, Any]] = {
     "percent_duplicates": {"norm": "lt", "threshold": 20},
     "PCT_PF_UQ_READS_ALIGNED": {"norm": "gt", "threshold": 0.95},
     "MEDIAN_TARGET_COVERAGE": {"norm": "gt", "threshold": 25},
     "PCT_TARGET_BASES_10X": {"norm": "gt", "threshold": 0.95},
     "PCT_EXC_ADAPTER": {"norm": "lt", "threshold": 0.0005},
-    "predicted_sex_sex_check": {"norm": "eq", "threshold": None},
+    RAREDISEASE_PREDICTED_SEX_METRIC: {"norm": "eq", "threshold": None},
     "gender": {"norm": "eq", "threshold": None},
 }
 
diff --git a/cg/meta/upload/gt.py b/cg/meta/upload/gt.py
index 6f2685fb85..05c6b989f0 100644
--- a/cg/meta/upload/gt.py
+++ b/cg/meta/upload/gt.py
@@ -1,14 +1,16 @@
 import logging
 from pathlib import Path
 
-from housekeeper.store.models import File, Version
+from housekeeper.store.models import File
 
 from cg.apps.gt import GenotypeAPI
 from cg.apps.housekeeper.hk import HousekeeperAPI
-from cg.constants.constants import FileFormat, PrepCategory, Workflow
-from cg.constants.housekeeper_tags import HkMipAnalysisTag
+from cg.constants.constants import FileExtensions, FileFormat, PrepCategory, Workflow
+from cg.constants.housekeeper_tags import GenotypeAnalysisTag, HkAnalysisMetricsTag
+from cg.constants.nf_analysis import RAREDISEASE_PREDICTED_SEX_METRIC
 from cg.constants.subject import Sex
 from cg.io.controller import ReadFile
+from cg.models.deliverables.metric_deliverables import MetricsBase
 from cg.models.mip.mip_metrics_deliverables import MIPMetricsDeliverables
 from cg.store.models import Analysis, Case, Sample
 
@@ -16,6 +18,8 @@
 
 
 class UploadGenotypesAPI(object):
+    """Genotype upload API."""
+
     def __init__(
         self,
         hk_api: HousekeeperAPI,
@@ -25,8 +29,8 @@ def __init__(
         self.hk = hk_api
         self.gt = gt_api
 
-    def data(self, analysis: Analysis) -> dict:
-        """Fetch data about an analysis to load genotypes.
+    def get_genotype_data(self, analysis: Analysis) -> dict[dict[str, dict[str, str]]]:
+        """Return data about an analysis to load genotypes.
 
         Returns: dict on form
 
@@ -43,93 +47,150 @@ def data(self, analysis: Analysis) -> dict:
         """
         case_id = analysis.case.internal_id
         LOG.info(f"Fetching upload genotype data for {case_id}")
-        hk_version = self.hk.last_version(case_id)
-        hk_bcf = self.get_bcf_file(hk_version)
-        data = {"bcf": hk_bcf.full_path}
+        hk_bcf: File = self._get_genotype_file(case_id=case_id)
+        genotype_load_config: dict = {"bcf": hk_bcf.full_path}
         if analysis.workflow in [Workflow.BALSAMIC, Workflow.BALSAMIC_UMI]:
-            data["samples_sex"] = self._get_samples_sex_balsamic(case_obj=analysis.case)
+            genotype_load_config["samples_sex"] = self._get_samples_sex_balsamic(case=analysis.case)
         elif analysis.workflow == Workflow.MIP_DNA:
-            data["samples_sex"] = self._get_samples_sex_mip(
-                case_obj=analysis.case, hk_version=hk_version
+            genotype_load_config["samples_sex"] = self._get_samples_sex_mip_dna(case=analysis.case)
+        elif analysis.workflow == Workflow.RAREDISEASE:
+            genotype_load_config["samples_sex"] = self._get_samples_sex_raredisease(
+                case=analysis.case
             )
         else:
             raise ValueError(f"Workflow {analysis.workflow} does not support Genotype upload")
-        return data
-
-    def _get_samples_sex_mip(self, case_obj: Case, hk_version: Version) -> dict:
-        qc_metrics_file = self.get_qcmetrics_file(hk_version)
-        analysis_sexes = self.analysis_sex(qc_metrics_file)
-        samples_sex = {}
-        for link_obj in case_obj.links:
-            sample_id = link_obj.sample.internal_id
+        return genotype_load_config
+
+    def upload(self, genotype_load_config: dict, replace: bool = False):
+        """Upload a genotype config for a case."""
+        self.gt.upload(
+            str(genotype_load_config["bcf"]), genotype_load_config["samples_sex"], force=replace
+        )
+
+    @staticmethod
+    def _is_suitable_for_genotype_upload(case: Case) -> bool:
+        """Returns True if there are any non-tumor WGS samples in the case."""
+        samples: list[Sample] = case.samples
+        return any(
+            (not sample.is_tumour and PrepCategory.WHOLE_GENOME_SEQUENCING == sample.prep_category)
+            for sample in samples
+        )
+
+    def _get_genotype_file(self, case_id: str) -> File:
+        "Returns latest genotype file in housekeeper for given case, raises FileNotFoundError if not found."
+        LOG.debug("Get Genotype files from Housekeeper")
+        tags: set[str] = {GenotypeAnalysisTag.GENOTYPE}
+        hk_genotype_files: list[File] = self.hk.get_files_from_latest_version(
+            bundle_name=case_id, tags=tags
+        )
+        hk_genotype: File = self._get_single_genotype_file(hk_genotype_files)
+        if not hk_genotype:
+            raise FileNotFoundError(f"Genotype file not found for {case_id}")
+        LOG.debug(f"Found genotype file {hk_genotype.full_path}")
+        return hk_genotype
+
+    def _get_samples_sex_balsamic(self, case: Case) -> dict[str, dict[str, str]]:
+        """Return sex information from StatusDB and for analysis."""
+        samples_sex: dict[str, dict[str, str]] = {}
+        for sample in case.samples:
+            if sample.is_tumour:
+                continue
+            sample_id: str = sample.internal_id
             samples_sex[sample_id] = {
-                "pedigree": link_obj.sample.sex,
-                "analysis": analysis_sexes[sample_id],
+                "pedigree": sample.sex,
+                "analysis": Sex.UNKNOWN,
             }
         return samples_sex
 
-    def _get_samples_sex_balsamic(self, case_obj: Case) -> dict:
-        samples_sex = {}
-        for link_obj in case_obj.links:
-            if link_obj.sample.is_tumour:
-                continue
-            sample_id = link_obj.sample.internal_id
+    def _get_samples_sex_mip_dna(self, case: Case) -> dict[str, dict[str, str]]:
+        """Return sex information from StatusDB and from analysis prediction"""
+        qc_metrics_file: Path = self._get_qcmetrics_file(case_id=case.internal_id)
+        analysis_sex: dict = self._get_analysis_sex_mip_dna(qc_metrics_file)
+        samples_sex: dict[str, dict[str, str]] = {}
+        for sample in case.samples:
+            sample_id: str = sample.internal_id
             samples_sex[sample_id] = {
-                "pedigree": link_obj.sample.sex,
-                "analysis": Sex.UNKNOWN,
+                "pedigree": sample.sex,
+                "analysis": analysis_sex[sample_id],
             }
         return samples_sex
 
-    def analysis_sex(self, qc_metrics_file: Path) -> dict:
-        """Fetch analysis sex for each sample of an analysis."""
-        qc_metrics: MIPMetricsDeliverables = self.get_parsed_qc_metrics_data(qc_metrics_file)
+    @staticmethod
+    def _get_analysis_sex_mip_dna(qc_metrics_file: Path) -> dict:
+        """Return analysis sex for each sample of an analysis."""
+        qc_metrics: MIPMetricsDeliverables = (
+            UploadGenotypesAPI._get_parsed_qc_metrics_deliverables_mip_dna(qc_metrics_file)
+        )
         return {
             sample_id_metric.sample_id: sample_id_metric.predicted_sex
             for sample_id_metric in qc_metrics.sample_id_metrics
         }
 
-    def get_bcf_file(self, hk_version_obj: Version) -> File:
-        """Fetch a bcf file and return the file object"""
-        genotype_files: list = self._get_genotype_files(version_id=hk_version_obj.id)
-        for genotype_file in genotype_files:
-            if self._is_variant_file(genotype_file=genotype_file):
-                LOG.debug(f"Found bcf file {genotype_file.full_path}")
-                return genotype_file
-        raise FileNotFoundError(f"No vcf or bcf file found for bundle {hk_version_obj.bundle_id}")
-
-    def get_qcmetrics_file(self, hk_version_obj: Version) -> Path:
-        """Fetch a qc_metrics file and return the path"""
-        hk_qcmetrics = self.hk.files(
-            version=hk_version_obj.id, tags=HkMipAnalysisTag.QC_METRICS
-        ).first()
-        LOG.debug(f"Found qc metrics file {hk_qcmetrics.full_path}")
+    def _get_qcmetrics_file(self, case_id: str) -> Path:
+        """Return a QC metrics file path.
+        Raises: FileNotFoundError if nothing is found in the Housekeeper bundle."""
+        tags: set[str] = HkAnalysisMetricsTag.QC_METRICS
+        hk_qcmetrics: File = self.hk.get_file_from_latest_version(bundle_name=case_id, tags=tags)
+        if not hk_qcmetrics:
+            raise FileNotFoundError("QC metrics file not found for the given Housekeeper version.")
+        LOG.debug(f"Found QC metrics file {hk_qcmetrics.full_path}")
         return Path(hk_qcmetrics.full_path)
 
     @staticmethod
-    def get_parsed_qc_metrics_data(qc_metrics: Path) -> MIPMetricsDeliverables:
-        """Parse the information from a qc metrics file"""
+    def _get_parsed_qc_metrics_deliverables_mip_dna(
+        qc_metrics_file: Path,
+    ) -> MIPMetricsDeliverables:
+        """Parse and return a QC metrics file."""
         qcmetrics_raw: dict = ReadFile.get_content_from_file(
-            file_format=FileFormat.YAML, file_path=qc_metrics
+            file_format=FileFormat.YAML, file_path=qc_metrics_file
         )
         return MIPMetricsDeliverables(**qcmetrics_raw)
 
-    def upload(self, data: dict, replace: bool = False):
-        """Upload data about genotypes for a family of samples."""
-        self.gt.upload(str(data["bcf"]), data["samples_sex"], force=replace)
+    def _get_samples_sex_raredisease(self, case: Case) -> dict[str, dict[str, str]]:
+        """Return sex information from StatusDB and from analysis prediction."""
+        qc_metrics_file: Path = self._get_qcmetrics_file(case_id=case.internal_id)
+        samples_sex: dict[str, dict[str, str]] = {}
+        for sample in case.samples:
+            sample_id: str = sample.internal_id
+            samples_sex[sample_id] = {
+                "pedigree": sample.sex,
+                "analysis": self._get_analysis_sex_raredisease(
+                    qc_metrics_file=qc_metrics_file, sample_id=sample_id
+                ),
+            }
+        return samples_sex
 
-    @staticmethod
-    def _is_variant_file(genotype_file: File):
-        return genotype_file.full_path.endswith("vcf.gz") or genotype_file.full_path.endswith("bcf")
+    def _get_analysis_sex_raredisease(self, qc_metrics_file: Path, sample_id: str) -> str:
+        """Return analysis sex for each sample of an analysis."""
+        qc_metrics: list[MetricsBase] = self._get_parsed_qc_metrics_deliverables_raredisease(
+            qc_metrics_file
+        )
+        for metric in qc_metrics:
+            if metric.name == RAREDISEASE_PREDICTED_SEX_METRIC and metric.id == sample_id:
+                return str(metric.value)
 
-    def _get_genotype_files(self, version_id: int) -> list:
-        return self.hk.files(version=version_id, tags=["genotype"]).all()
+    @staticmethod
+    def _get_parsed_qc_metrics_deliverables_raredisease(qc_metrics: Path) -> list[MetricsBase]:
+        """Parse and return a QC metrics file."""
+        qcmetrics_raw: dict = ReadFile.get_content_from_file(
+            file_format=FileFormat.YAML, file_path=qc_metrics
+        )
+        return [MetricsBase(**metric) for metric in qcmetrics_raw["metrics"]]
 
     @staticmethod
-    def is_suitable_for_genotype_upload(case_obj: Case) -> bool:
-        """Check if a cancer case is contains WGS and normal sample."""
+    def _is_variant_file(genotype_file: File):
+        return genotype_file.full_path.endswith(
+            FileExtensions.VCF_GZ
+        ) or genotype_file.full_path.endswith(FileExtensions.BCF)
 
-        samples: list[Sample] = case_obj.samples
-        return any(
-            (not sample.is_tumour and PrepCategory.WHOLE_GENOME_SEQUENCING == sample.prep_category)
-            for sample in samples
-        )
+    def _get_single_genotype_file(self, hk_genotype_files: list[File]) -> File:
+        """
+        Returns the single .bcf or .vcf.gz file expected to be found in the provided list. Raises an error if the amount of such files is different from 1.
+        """
+        filtered_files = [file for file in hk_genotype_files if self._is_variant_file(file)]
+        if len(filtered_files) != 1:
+            raise ValueError(
+                f"Error: Expecte one genotype file, but found {len(filtered_files)} "
+                f"({', '.join(map(str, filtered_files))})."
+            )
+        return filtered_files[0]
diff --git a/tests/conftest.py b/tests/conftest.py
index 4fc03bfeed..68f7225c24 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -830,6 +830,12 @@ def case_qc_metrics_deliverables(apps_dir: Path) -> Path:
     return Path(apps_dir, "mip", "case_metrics_deliverables.yaml")
 
 
+@pytest.fixture
+def case_qc_metrics_deliverables_raredisease(apps_dir: Path) -> Path:
+    """Return the path to a qc metrics deliverables file with case data."""
+    return Path(apps_dir, "raredisease", "case_metrics_deliverables.yaml")
+
+
 @pytest.fixture
 def mip_analysis_dir(analysis_dir: Path) -> Path:
     """Return the path to the directory with mip analysis files."""
diff --git a/tests/fixtures/apps/raredisease/case_metrics_deliverables.yaml b/tests/fixtures/apps/raredisease/case_metrics_deliverables.yaml
new file mode 100644
index 0000000000..11ba777add
--- /dev/null
+++ b/tests/fixtures/apps/raredisease/case_metrics_deliverables.yaml
@@ -0,0 +1,128 @@
+---
+metrics:
+  - header: ~
+    id: ADM1
+    input: ADM1_lanes_1_sorted_md
+    name: fraction_duplicates
+    step: markduplicates
+    value: '0.040068546369646'
+  - header: ~
+    id: ADM2
+    input: ADM2_lanes_1_sorted_md
+    name: fraction_duplicates
+    step: markduplicates
+    value: '0.0406761321866496'
+  - header: ~
+    id: ADM3
+    input: ADM3_lanes_1_sorted_md
+    name: fraction_duplicates
+    step: markduplicates
+    value: '0.0379522720687161'
+  - header: ~
+    id: ADM1
+    input: ADM1_171015_HHT5NDSXX_XXXXXX_lane1_sorted
+    name: raw_total_sequences
+    step: bamstats
+    value: '599990660'
+  - header: ~
+    id: ADM2
+    input: ADM2_171015_HHT5NDSXX_XXXXXX_lane1_sorted
+    name: raw_total_sequences
+    step: bamstats
+    value: '600049404'
+  - header: ~
+    id: ADM3
+    input: ADM3_171015_HHT5NDSXX_XXXXXX_lane1_sorted
+    name: raw_total_sequences
+    step: bamstats
+    value: '600006004'
+  - header: ~
+    id: ADM1
+    input: ADM1_lanes_1_sorted_md_sex
+    name: predicted_sex_sex_check
+    step: multiqc
+    value: male
+  - header: ~
+    id: ADM2
+    input: ADM2_lanes_1_sorted_md_sex
+    name: multiqc
+    step: predicted_sex_sex_check
+    value: male
+  - header: ~
+    id: ADM3
+    input: ADM3_lanes_1_sorted_md_sex
+    name: multiqc
+    step: predicted_sex_sex_check
+    value: female
+  - header: ~
+    id: ADM1
+    input: ADM1_171015_HHT5NDSXX_XXXXXX_lane1_sorted
+    name: reads_mapped
+    step: bamstats
+    value: '598626360'
+  - header: ~
+    id: ADM2
+    input: ADM2_171015_HHT5NDSXX_XXXXXX_lane1_sorted
+    name: reads_mapped
+    step: bamstats
+    value: '598568878'
+  - header: ~
+    id: ADM3
+    input: ADM3_171015_HHT5NDSXX_XXXXXX_lane1_sorted
+    name: reads_mapped
+    step: bamstats
+    value: '598456583'
+  - header: ~
+    id: ADM1
+    input: ADM1_171015_HHT5NDSXX_XXXXXX_lane1_sorted
+    name: percentage_mapped_reads
+    step: bamstats
+    value: '99.772613127011'
+  - header: ~
+    id: ADM2
+    input: ADM2_171015_HHT5NDSXX_XXXXXX_lane1_sorted
+    name: percentage_mapped_reads
+    step: bamstats
+    value: '99.7532659827456'
+  - header: ~
+    id: ADM3
+    input: ADM3_171015_HHT5NDSXX_XXXXXX_lane1_sorted
+    name: percentage_mapped_reads
+    step: bamstats
+    value: '99.7417657507307'
+  - header: data
+    id: ADM1
+    input: ADM1_lanes_1_sorted_md_collectmultiplemetrics
+    name: MEAN_INSERT_SIZE
+    step: collectmultiplemetricsinsertsize
+    value: '399.191095'
+  - header: data
+    id: ADM2
+    input: ADM2_lanes_1_sorted_md_collectmultiplemetrics
+    name: MEAN_INSERT_SIZE
+    step: collectmultiplemetricsinsertsize
+    value: '421.846775'
+  - header: data
+    id: ADM3
+    input: ADM3_lanes_1_sorted_md_colraw_total_sequenceslectmultiplemetrics
+    name: MEAN_INSERT_SIZE
+    step: collectmultiplemetricsinsertsize
+    value: '415.673545'
+  - header: data
+    id: ADM1
+    input: ADM1_lanes_1_sorted_md_collecthsmetrics
+    name: MEDIAN_TARGET_COVERAGE
+    step: collecthsmetrics
+    value: '29'
+  - header: data
+    id: ADM2
+    input: ADM2_lanes_1_sorted_md_collecthsmetrics
+    name: MEDIAN_TARGET_COVERAGE
+    step: collecthsmetrics
+    value: '28'
+  - header: data
+    id: ADM3
+    input: ADM3_lanes_1_sorted_md_collecthsmetrics
+    name: MEDIAN_TARGET_COVERAGE
+    step: collecthsmetrics
+    value: '28'
diff --git a/tests/meta/upload/balsamic/test_balsamic.py b/tests/meta/upload/balsamic/test_balsamic.py
index dd7865cc65..ad8f75d801 100644
--- a/tests/meta/upload/balsamic/test_balsamic.py
+++ b/tests/meta/upload/balsamic/test_balsamic.py
@@ -27,7 +27,7 @@ def test_genotype_check_wgs_normal(balsamic_context: CGConfig):
     case: Case = balsamic_context.status_db.get_case_by_internal_id(internal_id=internal_id)
 
     # WHEN checking if the case is Genotype upload compatible
-    passed_check = UploadGenotypesAPI.is_suitable_for_genotype_upload(case)
+    passed_check = UploadGenotypesAPI._is_suitable_for_genotype_upload(case)
 
     # THEN it should return True
     assert passed_check
@@ -40,7 +40,7 @@ def test_genotype_check_non_wgs_normal(balsamic_context: CGConfig):
     case: Case = balsamic_context.status_db.get_case_by_internal_id(internal_id=internal_id)
 
     # WHEN checking if the case is Genotype upload compatible
-    passed_check = UploadGenotypesAPI.is_suitable_for_genotype_upload(case)
+    passed_check = UploadGenotypesAPI._is_suitable_for_genotype_upload(case)
 
     # THEN it should return False
     assert not passed_check
@@ -53,7 +53,7 @@ def test_genotype_check_only_tumour(balsamic_context: CGConfig):
     case: Case = balsamic_context.status_db.get_case_by_internal_id(internal_id=internal_id)
 
     # WHEN checking if the case is Genotype upload compatible
-    passed_check = UploadGenotypesAPI.is_suitable_for_genotype_upload(case)
+    passed_check = UploadGenotypesAPI._is_suitable_for_genotype_upload(case)
 
     # THEN it should return False
     assert not passed_check
diff --git a/tests/meta/upload/test_upload_genotypes_api.py b/tests/meta/upload/test_upload_genotypes_api.py
index 4c0c988eea..bffb0b7ad8 100644
--- a/tests/meta/upload/test_upload_genotypes_api.py
+++ b/tests/meta/upload/test_upload_genotypes_api.py
@@ -1,9 +1,12 @@
-"""Tests for the upload genotypes api"""
+"""Tests for the upload Genotypes API."""
 
-from datetime import datetime
 from pathlib import Path
 
+from cg.constants import Workflow
+from cg.constants.constants import SexOptions
+from cg.constants.observations import MipDNAObservationsAnalysisTag
 from cg.meta.upload.gt import UploadGenotypesAPI
+from cg.models.deliverables.metric_deliverables import MetricsBase
 from cg.models.mip.mip_metrics_deliverables import MIPMetricsDeliverables
 from cg.store.models import Analysis
 
@@ -13,40 +16,71 @@ def test_get_analysis_sex(case_qc_metrics_deliverables: Path, genotype_analysis_
     # GIVEN a UploadGenotypesAPI some qcmetrics data
 
     # WHEN fetching the predicted sex by the analysis
-    sex: dict = UploadGenotypesAPI.analysis_sex(
-        self=UploadGenotypesAPI, qc_metrics_file=case_qc_metrics_deliverables
+    sex: dict = UploadGenotypesAPI._get_analysis_sex_mip_dna(
+        qc_metrics_file=case_qc_metrics_deliverables
     )
 
     # THEN assert that the the predicted sex per sample_id is returned
     assert sex == genotype_analysis_sex
 
 
-def test_get_parsed_qc_metrics_data(case_qc_metrics_deliverables: Path):
+def test_get_analysis_sex_raredisease(
+    case_qc_metrics_deliverables_raredisease: Path, sample_id: str
+):
+    """Test to get the predicted sex from a MIP run using the upload Genotypes API"""
+    # GIVEN a UploadGenotypesAPI some qcmetrics data
+
+    # WHEN fetching the predicted sex by the analysis
+    sex: str = UploadGenotypesAPI._get_analysis_sex_raredisease(
+        UploadGenotypesAPI,
+        qc_metrics_file=case_qc_metrics_deliverables_raredisease,
+        sample_id=sample_id,
+    )
+
+    # THEN assert that the the predicted sex per sample_id is returned
+    assert sex == SexOptions.MALE
+
+
+def test_get_parsed_qc_metrics_deliverables_mip(case_qc_metrics_deliverables: Path):
     """Test to get the predicted sex from a MIP run using the upload genotypes API"""
     # GIVEN a UploadGenotypesAPI and the path to a qc_metrics file with case data
 
     # WHEN fetching the predicted sex
-    metrics_object: MIPMetricsDeliverables = UploadGenotypesAPI.get_parsed_qc_metrics_data(
-        case_qc_metrics_deliverables
+    metrics_object: MIPMetricsDeliverables = (
+        UploadGenotypesAPI._get_parsed_qc_metrics_deliverables_mip_dna(case_qc_metrics_deliverables)
     )
-
     # THEN assert that it was successfully created
     assert isinstance(metrics_object, MIPMetricsDeliverables)
 
 
-def test_get_bcf_file(upload_genotypes_api: UploadGenotypesAPI, case_id: str, timestamp: datetime):
+def test_get_parsed_qc_metrics_deliverables_raredisease(case_qc_metrics_deliverables: Path):
+    """Test to get the predicted sex from a MIP run using the upload genotypes API"""
+    # GIVEN a UploadGenotypesAPI and the path to a QC metrics file with case data
+
+    # WHEN fetching the predicted sex
+    metrics_object = UploadGenotypesAPI._get_parsed_qc_metrics_deliverables_raredisease(
+        case_qc_metrics_deliverables
+    )
+
+    # First, check if the object is a list
+    assert isinstance(metrics_object, list)
+
+    # Then, check if all items in the list are instances of MetricsBase
+    assert all(isinstance(item, MetricsBase) for item in metrics_object)
+
+
+def test_get_genotype_file(upload_genotypes_api: UploadGenotypesAPI, case_id: str):
     """Test to get the predicted sex from a MIP run using the upload genotypes API"""
     # GIVEN a UploadGenotypesAPI populated with some data in housekeeper
-    hk_version = upload_genotypes_api.hk.version(case_id, timestamp)
 
     # WHEN fetching the gbcf file with the api
-    gbcf = upload_genotypes_api.get_bcf_file(hk_version)
+    gbcf = upload_genotypes_api._get_genotype_file(case_id)
 
     # THEN assert that the file has the correct tag
-    assert "snv-gbcf" in (tag.name for tag in gbcf.tags)
+    assert MipDNAObservationsAnalysisTag.PROFILE_GBCF in (tag.name for tag in gbcf.tags)
 
 
-def test_get_data(
+def test_get_data_mip(
     analysis_obj: Analysis,
     genotype_analysis_sex: dict,
     mocker,
@@ -57,11 +91,36 @@ def test_get_data(
     # GIVEN an analysis object with a trio
 
     # GIVEN analysis sex were generated and could be found
-    mocker.patch.object(UploadGenotypesAPI, "analysis_sex")
-    UploadGenotypesAPI.analysis_sex.return_value = genotype_analysis_sex
+    mocker.patch.object(UploadGenotypesAPI, "_get_analysis_sex_mip_dna")
+    UploadGenotypesAPI._get_analysis_sex_mip_dna.return_value = genotype_analysis_sex
+
+    analysis_obj.workflow = Workflow.MIP_DNA
+
+    # WHEN parsing the data
+    result = upload_genotypes_api.get_genotype_data(analysis=analysis_obj)
+
+    # THEN assert that the the number of samples sex is set
+    assert len(result["samples_sex"]) == 3
+
+
+def test_get_data_raredisease(
+    analysis_obj: Analysis,
+    genotype_analysis_sex: dict,
+    mocker,
+    upload_genotypes_api: UploadGenotypesAPI,
+):
+    """Test to get data from the UploadGenotypesAPI"""
+    # GIVEN a UploadGenotypeAPI populated with some data
+    # GIVEN an analysis object with a trio
+
+    # GIVEN analysis sex were generated and could be found
+    mocker.patch.object(UploadGenotypesAPI, "_get_analysis_sex_raredisease")
+    UploadGenotypesAPI._get_analysis_sex_raredisease.return_value = genotype_analysis_sex
+
+    analysis_obj.workflow = Workflow.RAREDISEASE
 
     # WHEN parsing the data
-    result = upload_genotypes_api.data(analysis=analysis_obj)
+    result = upload_genotypes_api.get_genotype_data(analysis=analysis_obj)
 
-    # THEN assert that the result looks like expected
+    # THEN assert that the the number of samples sex is set
     assert len(result["samples_sex"]) == 3