diff --git a/cg/exc.py b/cg/exc.py index b0cfda9317..60b5a1ec88 100644 --- a/cg/exc.py +++ b/cg/exc.py @@ -138,7 +138,7 @@ class MicrosaltError(CgError): """ -class MissingAnalysisDir(CgError): +class MissingAnalysisRunDirectory(CgError): """ Error related to missing analysis. """ diff --git a/cg/meta/workflow/microsalt/microsalt.py b/cg/meta/workflow/microsalt/microsalt.py index 8745100bc1..c918ffd645 100644 --- a/cg/meta/workflow/microsalt/microsalt.py +++ b/cg/meta/workflow/microsalt/microsalt.py @@ -1,5 +1,4 @@ import logging -import os import re import shutil from datetime import datetime @@ -16,6 +15,7 @@ from cg.meta.workflow.fastq import MicrosaltFastqHandler from cg.meta.workflow.microsalt.quality_controller import QualityController from cg.meta.workflow.microsalt.quality_controller.models import QualityResult +from cg.meta.workflow.microsalt.utils import get_most_recent_project_directory from cg.models.cg_config import CGConfig from cg.store.models import Case, Sample from cg.utils import Process @@ -301,16 +301,7 @@ def get_project_id(self, case_id: str) -> str: def get_results_dir(self) -> Path: return Path(self.root_dir, "results") - def get_analyses_result_dirs(self, case_id: str) -> list[str]: - project_id: str = self.get_project_id(case_id) - results_dir: Path = self.get_results_dir() - matches: list[str] = [d for d in os.listdir(results_dir) if d.startswith(project_id)] - if not matches: - LOG.error(f"No result directory found for {case_id} with project id {project_id}") - return matches - def get_case_path(self, case_id: str) -> Path: + project_id: str = self.get_project_id(case_id) results_dir: Path = self.get_results_dir() - matching_cases: list[str] = self.get_analyses_result_dirs(case_id) - case_dir: str = max(matching_cases, default="") - return Path(results_dir, case_dir) + return get_most_recent_project_directory(project_id=project_id, directory=results_dir) diff --git a/cg/meta/workflow/microsalt/utils.py b/cg/meta/workflow/microsalt/utils.py new file mode 100644 index 0000000000..29e8da8ceb --- /dev/null +++ b/cg/meta/workflow/microsalt/utils.py @@ -0,0 +1,32 @@ +from datetime import datetime +import os +from pathlib import Path + +from cg.exc import MissingAnalysisRunDirectory + + +def get_project_directory_date(dir_name: str) -> datetime: + # Assumes format like _year.month.day_hour.minute.second + _, date, time = dir_name.split("_") + date_time = f"{date}_{time}" + return datetime.strptime(date_time, "%Y.%m.%d_%H.%M.%S") + + +def get_project_directories(project_id: str, directory: Path) -> list[str]: + return [sub_dir for sub_dir in os.listdir(directory) if sub_dir.startswith(project_id)] + + +def sort_project_directories_by_date(project_directories: list[str]) -> list[str]: + return sorted(project_directories, key=get_project_directory_date, reverse=True) + + +def get_most_recent_project_directory(project_id: str, directory: Path) -> Path: + project_directories: list[str] = get_project_directories( + project_id=project_id, directory=directory + ) + sorted_project_directories: list[str] = sort_project_directories_by_date(project_directories) + + if not sorted_project_directories: + raise MissingAnalysisRunDirectory(f"No analysis directory found for project {project_id}.") + + return Path(directory, sorted_project_directories[0]) diff --git a/tests/meta/workflow/test_microsalt.py b/tests/meta/workflow/test_microsalt.py index c2f27804de..039fe1b42c 100644 --- a/tests/meta/workflow/test_microsalt.py +++ b/tests/meta/workflow/test_microsalt.py @@ -1,11 +1,13 @@ """Tests for MicroSALT analysis.""" +from datetime import datetime from pathlib import Path from mock import MagicMock from cg.apps.lims.api import LimsAPI from cg.meta.workflow.microsalt import MicrosaltAnalysisAPI +from cg.meta.workflow.microsalt.utils import get_project_directory_date from cg.models.cg_config import CGConfig from cg.store.models import Case @@ -32,3 +34,14 @@ def test_get_cases_to_store_pass( # THEN cases should returned assert cases_to_store + + +def test_get_date_from_project_directory(): + # GIVEN a microsalt analysis run directory name + run_dir_name = "ACC13796_2024.2.5_15.58.22" + + # WHEN parsing the project directory date + run_date: datetime = get_project_directory_date(run_dir_name) + + # THEN the date should be parsed correctly + assert run_date == datetime(2024, 2, 5, 15, 58, 22)