Refactor get cases for analysis (#3926)

### Changed - Refactor cases_to_analyse
Clinical-Genomics · Nov 20, 2024 · e118d2e · e118d2e
1 parent 69046d6
commit e118d2e
Show file tree

Hide file tree

Showing 13 changed files with 542 additions and 234 deletions.
diff --git a/cg/cli/workflow/raw_data/base.py b/cg/cli/workflow/raw_data/base.py
@@ -44,7 +44,7 @@ def store_raw_data_analysis(context: click.Context, case_id: str, dry_run: bool
 def store_available_raw_data_analysis(context: click.Context, dry_run: bool = False):
     """Creates an analysis object in status-db for all raw data cases to be delivered."""
     status_db: Store = context.obj.status_db
-    for case in status_db.cases_to_analyse(workflow=Workflow.RAW_DATA):
+    for case in status_db.get_cases_to_analyze(workflow=Workflow.RAW_DATA):
         LOG.info(f"Creating an analysis for case {case.internal_id}")
         if SequencingQCService.case_pass_sequencing_qc(case):
             if dry_run:

diff --git a/cg/meta/workflow/analysis.py b/cg/meta/workflow/analysis.py
@@ -121,9 +121,9 @@ def is_case_ready_for_analysis(self, case: Case) -> bool:
     def get_cases_ready_for_analysis(self):
         """
         Return cases that are ready for analysis. The case is ready if it passes the logic in the
-        get_cases_to_analyse method, and it has passed the pre-analysis quality check.
+        get_cases_to_analyze method, and it has passed the pre-analysis quality check.
         """
-        cases_to_analyse: list[Case] = self.get_cases_to_analyse()
+        cases_to_analyse: list[Case] = self.get_cases_to_analyze()
         cases_passing_quality_check: list[Case] = [
             case for case in cases_to_analyse if SequencingQCService.case_pass_sequencing_qc(case)
         ]
@@ -366,8 +366,8 @@ def get_analyses_to_clean(self, before: datetime) -> list[Analysis]:
         )
         return analyses_to_clean
 
-    def get_cases_to_analyse(self) -> list[Case]:
-        return self.status_db.cases_to_analyse(workflow=self.workflow)
+    def get_cases_to_analyze(self) -> list[Case]:
+        return self.status_db.get_cases_to_analyze(workflow=self.workflow)
 
     def get_cases_to_store(self) -> list[Case]:
         """Return cases where analysis finished successfully,

diff --git a/cg/meta/workflow/balsamic.py b/cg/meta/workflow/balsamic.py
@@ -95,7 +95,7 @@ def get_case_path(self, case_id: str) -> Path:
 
     def get_cases_ready_for_analysis(self) -> list[Case]:
         """Returns a list of cases that are ready for analysis."""
-        cases_to_analyse: list[Case] = self.get_cases_to_analyse()
+        cases_to_analyse: list[Case] = self.get_cases_to_analyze()
         cases_ready_for_analysis: list[Case] = [
             case for case in cases_to_analyse if self.is_case_ready_for_analysis(case)
         ]

diff --git a/cg/meta/workflow/mip.py b/cg/meta/workflow/mip.py
@@ -251,7 +251,7 @@ def get_skip_evaluation_flag(self, case_id: str, skip_evaluation: bool) -> bool:
 
     def get_cases_ready_for_analysis(self) -> list[Case]:
         """Return cases to analyze."""
-        cases_to_analyse: list[Case] = self.get_cases_to_analyse()
+        cases_to_analyse: list[Case] = self.get_cases_to_analyze()
         cases_ready_for_analysis: list[Case] = [
             case for case in cases_to_analyse if self.is_case_ready_for_analysis(case)
         ]

diff --git a/cg/store/crud/read.py b/cg/store/crud/read.py
@@ -11,12 +11,20 @@
 from cg.constants.constants import CaseActions, CustomerId, PrepCategory, SampleType
 from cg.exc import CaseNotFoundError, CgError, OrderNotFoundError, SampleNotFoundError
 from cg.models.orders.constants import OrderType
-from cg.server.dto.samples.collaborator_samples_request import CollaboratorSamplesRequest
+from cg.server.dto.samples.collaborator_samples_request import (
+    CollaboratorSamplesRequest,
+)
 from cg.services.orders.order_service.models import OrderQueryParams
 from cg.store.base import BaseHandler
 from cg.store.exc import EntryNotFoundError
-from cg.store.filters.status_analysis_filters import AnalysisFilter, apply_analysis_filter
-from cg.store.filters.status_application_filters import ApplicationFilter, apply_application_filter
+from cg.store.filters.status_analysis_filters import (
+    AnalysisFilter,
+    apply_analysis_filter,
+)
+from cg.store.filters.status_application_filters import (
+    ApplicationFilter,
+    apply_application_filter,
+)
 from cg.store.filters.status_application_limitations_filters import (
     ApplicationLimitationsFilter,
     apply_application_limitations_filter,
@@ -26,14 +34,23 @@
     apply_application_versions_filter,
 )
 from cg.store.filters.status_bed_filters import BedFilter, apply_bed_filter
-from cg.store.filters.status_bed_version_filters import BedVersionFilter, apply_bed_version_filter
+from cg.store.filters.status_bed_version_filters import (
+    BedVersionFilter,
+    apply_bed_version_filter,
+)
 from cg.store.filters.status_case_filters import CaseFilter, apply_case_filter
-from cg.store.filters.status_case_sample_filters import CaseSampleFilter, apply_case_sample_filter
+from cg.store.filters.status_case_sample_filters import (
+    CaseSampleFilter,
+    apply_case_sample_filter,
+)
 from cg.store.filters.status_collaboration_filters import (
     CollaborationFilter,
     apply_collaboration_filter,
 )
-from cg.store.filters.status_customer_filters import CustomerFilter, apply_customer_filter
+from cg.store.filters.status_customer_filters import (
+    CustomerFilter,
+    apply_customer_filter,
+)
 from cg.store.filters.status_illumina_flow_cell_filters import (
     IlluminaFlowCellFilter,
     apply_illumina_flow_cell_filters,
@@ -52,7 +69,10 @@
     OrderTypeApplicationFilter,
     apply_order_type_application_filter,
 )
-from cg.store.filters.status_organism_filters import OrganismFilter, apply_organism_filter
+from cg.store.filters.status_organism_filters import (
+    OrganismFilter,
+    apply_organism_filter,
+)
 from cg.store.filters.status_pacbio_smrt_cell_filters import (
     PacBioSMRTCellFilter,
     apply_pac_bio_smrt_cell_filters,
@@ -1019,8 +1039,23 @@ def get_families_with_samples(self) -> Query:
         """Return all cases in the database with samples."""
         return self._get_join_cases_with_samples_query()
 
-    def cases_to_analyse(self, workflow: Workflow = None, limit: int = None) -> list[Case]:
-        """Returns a list if cases ready to be analyzed or set to be reanalyzed."""
+    def _is_case_set_to_analyse_or_not_analyzed(self, case: Case) -> bool:
+        return case.action == CaseActions.ANALYZE or not case.latest_analyzed
+
+    def _is_latest_analysis_done_on_all_sequences(self, case: Case) -> bool:
+        return case.latest_analyzed < case.latest_sequenced
+
+    def _is_case_to_be_analyzed(self, case: Case) -> bool:
+        if not case.latest_sequenced:
+            return False
+        if self._is_case_set_to_analyse_or_not_analyzed(case):
+            return True
+        return bool(self._is_latest_analysis_done_on_all_sequences(case))
+
+    def get_cases_to_analyze(self, workflow: Workflow = None, limit: int = None) -> list[Case]:
+        """Returns a list if cases ready to be analyzed or set to be reanalyzed.
+        1. Get cases to be analyzed using BE query
+        2. Use the latest analysis for case to determine if the case is to be analyzed"""
         case_filter_functions: list[CaseFilter] = [
             CaseFilter.HAS_SEQUENCE,
             CaseFilter.WITH_WORKFLOW,
@@ -1032,19 +1067,11 @@ def cases_to_analyse(self, workflow: Workflow = None, limit: int = None) -> list
             workflow=workflow,
         )
 
-        families: list[Query] = list(cases.order_by(Case.ordered_at))
-        families = [
-            case_obj
-            for case_obj in families
-            if case_obj.latest_sequenced
-            and (
-                case_obj.action == CaseActions.ANALYZE
-                or not case_obj.latest_analyzed
-                or case_obj.latest_analyzed < case_obj.latest_sequenced
-            )
+        sorted_cases: list[Case] = list(cases.order_by(Case.ordered_at))
+        cases_to_analyze: list[Case] = [
+            case for case in sorted_cases if self._is_case_to_be_analyzed(case)
         ]
-
-        return families[:limit]
+        return cases_to_analyze[:limit]
 
     def set_case_action(
         self, action: Literal[CaseActions.actions()], case_internal_id: str

diff --git a/tests/cli/workflow/mip/conftest.py b/tests/cli/workflow/mip/conftest.py
@@ -157,15 +157,15 @@ def mip_dna_context(
 def setup_mocks(
     mocker,
     can_at_least_one_sample_be_decompressed: bool = False,
-    case_to_analyze: Case = None,
+    get_case_to_analyze: Case = None,
     decompress_spring: bool = False,
     has_latest_analysis_started: bool = False,
     is_spring_decompression_needed: bool = False,
     is_spring_decompression_running: bool = False,
 ) -> None:
     """Helper function to set up the necessary mocks for the decompression logics."""
-    mocker.patch.object(ReadHandler, "cases_to_analyse")
-    ReadHandler.cases_to_analyse.return_value = [case_to_analyze]
+    mocker.patch.object(ReadHandler, "get_cases_to_analyze")
+    ReadHandler.get_cases_to_analyze.return_value = [get_case_to_analyze]
 
     mocker.patch.object(PrepareFastqAPI, "is_spring_decompression_needed")
     PrepareFastqAPI.is_spring_decompression_needed.return_value = is_spring_decompression_needed

diff --git a/tests/cli/workflow/mip/test_cli_mip_base.py b/tests/cli/workflow/mip/test_cli_mip_base.py
@@ -13,7 +13,6 @@
 from cg.models.cg_config import CGConfig
 from cg.store.models import Case
 from tests.cli.workflow.mip.conftest import setup_mocks
-from tests.store.conftest import case_obj
 
 
 def test_spring_decompression_needed_and_started(
@@ -34,7 +33,7 @@ def test_spring_decompression_needed_and_started(
     # GIVEN there are flow cells for the case
     setup_mocks(
         can_at_least_one_sample_be_decompressed=True,
-        case_to_analyze=case,
+        get_case_to_analyze=case,
         decompress_spring=True,
         has_latest_analysis_started=False,
         is_spring_decompression_needed=True,
@@ -69,7 +68,7 @@ def test_spring_decompression_needed_and_start_failed(
     # GIVEN there are flow cells for the case
     setup_mocks(
         can_at_least_one_sample_be_decompressed=True,
-        case_to_analyze=case,
+        get_case_to_analyze=case,
         decompress_spring=False,
         has_latest_analysis_started=False,
         is_spring_decompression_needed=True,
@@ -105,7 +104,7 @@ def test_spring_decompression_needed_and_cant_start(
     # GIVEN there are flow cells for the case
     setup_mocks(
         can_at_least_one_sample_be_decompressed=False,
-        case_to_analyze=case,
+        get_case_to_analyze=case,
         decompress_spring=False,
         has_latest_analysis_started=False,
         is_spring_decompression_needed=True,
@@ -142,7 +141,7 @@ def test_decompression_cant_start_and_is_running(
     # GIVEN there are flow cells for the case
     setup_mocks(
         can_at_least_one_sample_be_decompressed=False,
-        case_to_analyze=case,
+        get_case_to_analyze=case,
         decompress_spring=False,
         has_latest_analysis_started=False,
         is_spring_decompression_needed=True,
@@ -180,7 +179,7 @@ def test_case_needs_to_be_stored(
     # GIVEN there are flow cells for the case
     setup_mocks(
         can_at_least_one_sample_be_decompressed=False,
-        case_to_analyze=case,
+        get_case_to_analyze=case,
         decompress_spring=False,
         has_latest_analysis_started=False,
         is_spring_decompression_needed=False,

diff --git a/tests/conftest.py b/tests/conftest.py
@@ -1260,7 +1260,7 @@ def crunchy_api():
 # Store fixtures
 
 
-@pytest.fixture(name="analysis_store")
+@pytest.fixture
 def analysis_store(
     base_store: Store,
     analysis_family: dict,
@@ -4248,3 +4248,9 @@ def libary_sequencing_method() -> str:
 @pytest.fixture
 def capture_kit() -> str:
     return "panel.bed"
+
+
+@pytest.fixture
+def case(analysis_store: Store) -> Case:
+    """Return a case models object."""
+    return analysis_store.get_cases()[0]
diff --git a/tests/services/file_delivery/rsync/test_rsync_service.py b/tests/services/file_delivery/rsync/test_rsync_service.py
@@ -9,12 +9,9 @@
 from cg.constants import Workflow
 from cg.constants.priority import SlurmAccount, SlurmQos
 from cg.exc import CgError
-from cg.services.deliver_files.rsync.service import (
-    DeliveryRsyncService,
-)
+from cg.services.deliver_files.rsync.service import DeliveryRsyncService
 from cg.store.models import Case
 from cg.store.store import Store
-from tests.store.conftest import case_obj
 
 
 def test_get_source_and_destination_paths(
@@ -241,7 +238,6 @@ def test_slurm_rsync_single_case(
 
     # WHEN the destination path is created
     sbatch_number: int
-    is_complete_delivery: bool
     sbatch_number: int = delivery_rsync_service.run_rsync_for_case(
         case=case,
         dry_run=True,

diff --git a/tests/store/conftest.py b/tests/store/conftest.py
@@ -12,7 +12,9 @@
 from cg.constants.priority import PriorityTerms
 from cg.constants.subject import PhenotypeStatus, Sex
 from cg.services.illumina.data_transfer.models import IlluminaFlowCellDTO
-from cg.services.orders.store_order_services.store_pool_order import StorePoolOrderService
+from cg.services.orders.store_order_services.store_pool_order import (
+    StorePoolOrderService,
+)
 from cg.store.models import (
     Analysis,
     Application,
@@ -173,12 +175,6 @@ def microbial_store(
     yield base_store
 
 
-@pytest.fixture(name="case")
-def case_obj(analysis_store: Store) -> Case:
-    """Return a case models object."""
-    return analysis_store.get_cases()[0]
-
-
 @pytest.fixture(name="sample")
 def sample_obj(analysis_store) -> Sample:
     """Return a sample models object."""