Skip to content

Commit

Permalink
Fix RNA uploads/delivery message (#4145) (patch)
Browse files Browse the repository at this point in the history
### Changed

- The RNA uploads to Scout use the query logic found in the ReadHandler
- More robust error raising in the ReadHandler for when there are multiple matching DNA samples or when no subject id is passed.
  • Loading branch information
islean authored Jan 30, 2025
1 parent aabb8f7 commit eb104d8
Show file tree
Hide file tree
Showing 6 changed files with 164 additions and 257 deletions.
99 changes: 22 additions & 77 deletions cg/meta/upload/scout/uploadscoutapi.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
from pathlib import Path

from housekeeper.store.models import File, Version
from pydantic.dataclasses import dataclass

from cg.apps.housekeeper.hk import HousekeeperAPI
from cg.apps.lims import LimsAPI
Expand All @@ -26,22 +25,13 @@
from cg.meta.workflow.analysis import AnalysisAPI
from cg.meta.workflow.utils.genome_build_helpers import genome_to_scout_format, get_genome_build
from cg.models.scout.scout_load_config import ScoutLoadConfig
from cg.store.api.data_classes import RNADNACollection
from cg.store.models import Analysis, Case, Customer, Sample
from cg.store.store import Store

LOG = logging.getLogger(__name__)


@dataclass
class RNADNACollection:
"""Contains the id for an RNA sample, the name of its connected DNA sample,
and a list of connected, uploaded DNA cases."""

rna_sample_internal_id: str
dna_sample_name: str
dna_case_ids: list[str]


class UploadScoutAPI:
"""Class that handles everything that has to do with uploading to Scout."""

Expand Down Expand Up @@ -182,15 +172,6 @@ def get_rna_omics_outrider(self, case_id: str) -> File | None:
tags: set[str] = {AnalysisTag.OUTRIDER, case_id, AnalysisTag.CLINICAL}
return self.housekeeper.get_file_from_latest_version(bundle_name=case_id, tags=tags)

def get_unique_dna_cases_related_to_rna_case(self, case_id: str) -> set[str]:
"""Return a set of unique DNA cases related to an RNA case."""
case: Case = self.status_db.get_case_by_internal_id(case_id)
rna_dna_collections: list[RNADNACollection] = self.create_rna_dna_collections(case)
unique_dna_cases_related_to_rna_case: set[str] = set()
for rna_dna_collection in rna_dna_collections:
unique_dna_cases_related_to_rna_case.update(rna_dna_collection.dna_case_ids)
return unique_dna_cases_related_to_rna_case

def get_rna_alignment_cram(self, case_id: str, sample_id: str) -> File | None:
"""Return an RNA alignment CRAM file for a case in Housekeeper."""
tags: set[str] = {AlignmentFileTag.CRAM, sample_id}
Expand All @@ -206,9 +187,11 @@ def get_rna_alignment_cram(self, case_id: str, sample_id: str) -> File | None:
def upload_rna_alignment_file(self, case_id: str, dry_run: bool) -> None:
"""Upload RNA alignment file to Scout."""
rna_case: Case = self.status_db.get_case_by_internal_id(case_id)
rna_dna_collections: list[RNADNACollection] = self.create_rna_dna_collections(rna_case)
rna_dna_collections: list[RNADNACollection] = (
self.status_db.get_related_dna_cases_with_samples(rna_case)
)
for rna_dna_collection in rna_dna_collections:
rna_sample_internal_id: str = rna_dna_collection.rna_sample_internal_id
rna_sample_internal_id: str = rna_dna_collection.rna_sample_id
dna_sample_name: str = rna_dna_collection.dna_sample_name
rna_alignment_cram: File | None = self.get_rna_alignment_cram(
case_id=case_id, sample_id=rna_sample_internal_id
Expand Down Expand Up @@ -346,9 +329,11 @@ def upload_rna_coverage_bigwig_to_scout(self, case_id: str, dry_run: bool) -> No

status_db: Store = self.status_db
rna_case = status_db.get_case_by_internal_id(case_id)
rna_dna_collections: list[RNADNACollection] = self.create_rna_dna_collections(rna_case)
rna_dna_collections: list[RNADNACollection] = (
self.status_db.get_related_dna_cases_with_samples(rna_case)
)
for rna_dna_collection in rna_dna_collections:
rna_sample_internal_id: str = rna_dna_collection.rna_sample_internal_id
rna_sample_internal_id: str = rna_dna_collection.rna_sample_id
dna_sample_name: str = rna_dna_collection.dna_sample_name
rna_coverage_bigwig: File | None = self.get_rna_coverage_bigwig(
case_id=case_id, sample_id=rna_sample_internal_id
Expand Down Expand Up @@ -382,7 +367,7 @@ def upload_omics_sample_id_to_scout(
self, dry_run: bool, rna_dna_collections: list[RNADNACollection]
) -> None:
for rna_dna_collection in rna_dna_collections:
rna_sample_internal_id: str = rna_dna_collection.rna_sample_internal_id
rna_sample_internal_id: str = rna_dna_collection.rna_sample_id
dna_sample_name: str = rna_dna_collection.dna_sample_name
for dna_case_id in rna_dna_collection.dna_case_ids:
LOG.info(
Expand All @@ -406,7 +391,7 @@ def upload_rna_fraser_outrider_to_scout(
"""Upload omics fraser and outrider file for a case to Scout."""
status_db: Store = self.status_db
for rna_dna_collection in rna_dna_collections:
rna_sample_internal_id: str = rna_dna_collection.rna_sample_internal_id
rna_sample_internal_id: str = rna_dna_collection.rna_sample_id
dna_sample_name: str = rna_dna_collection.dna_sample_name
rna_fraser: File | None = self.get_rna_omics_fraser(case_id=case_id)
rna_outrider: File | None = self.get_rna_omics_outrider(case_id=case_id)
Expand Down Expand Up @@ -442,7 +427,7 @@ def upload_rna_fraser_outrider_to_scout(
def upload_rna_genome_build_to_scout(
self,
dry_run: bool,
rna_case: str,
rna_case: Case,
rna_dna_collections: list[RNADNACollection],
) -> None:
"""Upload RNA genome built for a RNA/DNA case to Scout."""
Expand Down Expand Up @@ -502,9 +487,11 @@ def upload_splice_junctions_bed_to_scout(self, dry_run: bool, case_id: str) -> N
status_db: Store = self.status_db
rna_case: Case = status_db.get_case_by_internal_id(case_id)

rna_dna_collections: list[RNADNACollection] = self.create_rna_dna_collections(rna_case)
rna_dna_collections: list[RNADNACollection] = (
self.status_db.get_related_dna_cases_with_samples(rna_case)
)
for rna_dna_collection in rna_dna_collections:
rna_sample_internal_id: str = rna_dna_collection.rna_sample_internal_id
rna_sample_internal_id: str = rna_dna_collection.rna_sample_id
dna_sample_name: str = rna_dna_collection.dna_sample_name
splice_junctions_bed: File | None = self.get_splice_junctions_bed(
case_id=case_id, sample_id=rna_sample_internal_id
Expand Down Expand Up @@ -615,7 +602,9 @@ def upload_rna_omics_to_scout(self, dry_run: bool, case_id: str) -> None:
"""Upload RNA omics files to Scout."""
status_db: Store = self.status_db
rna_case = status_db.get_case_by_internal_id(case_id)
rna_dna_collections: list[RNADNACollection] = self.create_rna_dna_collections(rna_case)
rna_dna_collections: list[RNADNACollection] = (
self.status_db.get_related_dna_cases_with_samples(rna_case)
)
self.upload_omics_sample_id_to_scout(
dry_run=dry_run, rna_dna_collections=rna_dna_collections
)
Expand Down Expand Up @@ -675,45 +664,6 @@ def get_config_builder(self, analysis, hk_version) -> ScoutConfigBuilder:

return config_builders[analysis.workflow]

def create_rna_dna_collections(self, rna_case: Case) -> list[RNADNACollection]:
return [self.create_rna_dna_collection(link.sample) for link in rna_case.links]

def create_rna_dna_collection(self, rna_sample: Sample) -> RNADNACollection:
"""Creates a collection containing the given RNA sample id, its related DNA sample name, and
a list of ids for the DNA cases connected to the DNA sample."""
if not rna_sample.subject_id:
raise CgDataError(
f"Failed to link RNA sample {rna_sample.internal_id} to DNA samples - subject_id field is empty."
)

collaborators: set[Customer] = rna_sample.customer.collaborators
subject_id_samples: list[Sample] = (
self.status_db.get_samples_by_customer_ids_and_subject_id_and_is_tumour(
customer_ids=[customer.id for customer in collaborators],
subject_id=rna_sample.subject_id,
is_tumour=rna_sample.is_tumour,
)
)

subject_id_dna_samples: list[Sample] = self._get_application_prep_category(
subject_id_samples
)

if len(subject_id_dna_samples) != 1:
raise CgDataError(
f"Failed to upload files for RNA case: unexpected number of DNA sample matches for subject_id: "
f"{rna_sample.subject_id}. Number of matches: {len(subject_id_dna_samples)} "
)
dna_sample: Sample = subject_id_dna_samples[0]
dna_cases: list[str] = self._dna_cases_related_to_dna_sample(
dna_sample=dna_sample, collaborators=collaborators
)
return RNADNACollection(
rna_sample_internal_id=rna_sample.internal_id,
dna_sample_name=dna_sample.name,
dna_case_ids=dna_cases,
)

def _dna_cases_related_to_dna_sample(
self, dna_sample: Sample, collaborators: set[Customer]
) -> list[str]:
Expand Down Expand Up @@ -768,11 +718,6 @@ def _get_application_prep_category(

def get_related_uploaded_dna_cases(self, rna_case_id: str) -> set[str]:
"""Returns all uploaded DNA cases related to the specified RNA case."""
unique_dna_case_ids: set[str] = self.get_unique_dna_cases_related_to_rna_case(rna_case_id)
uploaded_dna_cases: set[str] = set()
for dna_case_id in unique_dna_case_ids:
if self.status_db.get_case_by_internal_id(dna_case_id).is_uploaded:
uploaded_dna_cases.add(dna_case_id)
else:
LOG.warning(f"Related DNA case {dna_case_id} has not been completed.")
return uploaded_dna_cases
rna_case: Case = self.status_db.get_case_by_internal_id(rna_case_id)
dna_cases: list[Case] = self.status_db.get_uploaded_related_dna_cases(rna_case)
return {dna_case.internal_id for dna_case in dna_cases}
11 changes: 11 additions & 0 deletions cg/store/api/data_classes.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
from pydantic.dataclasses import dataclass


@dataclass
class RNADNACollection:
"""Contains the id for an RNA sample, the name of its connected DNA sample,
and a list of connected, uploaded DNA cases."""

rna_sample_id: str
dna_sample_name: str
dna_case_ids: list[str]
Loading

0 comments on commit eb104d8

Please sign in to comment.