diff --git a/cg/services/fastq_concatenation_service/utils.py b/cg/services/fastq_concatenation_service/utils.py index 01de27a199..49582a5716 100644 --- a/cg/services/fastq_concatenation_service/utils.py +++ b/cg/services/fastq_concatenation_service/utils.py @@ -4,7 +4,7 @@ import uuid from cg.services.fastq_concatenation_service.exceptions import ConcatenationError -from cg.constants.constants import ReadDirection +from cg.constants.constants import ReadDirection, FileFormat from cg.constants import FileExtensions @@ -73,7 +73,11 @@ def sort_files_by_name(files: list[Path]) -> list[Path]: def file_can_be_removed(file: Path, forward_file: Path, reverse_file: Path) -> bool: - return file.suffix == FileExtensions.GZIP and file != forward_file and file != reverse_file + return ( + f"{FileFormat.FASTQ}{FileExtensions.GZIP}" in file.name + and file != forward_file + and file != reverse_file + ) def remove_raw_fastqs(fastq_directory: Path, forward_file: Path, reverse_file: Path) -> None: diff --git a/cg/services/file_delivery/abstract_classes.py b/cg/services/file_delivery/abstract_classes.py index 69eb9c2e24..0a4b6ec818 100644 --- a/cg/services/file_delivery/abstract_classes.py +++ b/cg/services/file_delivery/abstract_classes.py @@ -3,22 +3,16 @@ from cg.services.file_delivery.fetch_file_service.fetch_delivery_files_service import ( FetchDeliveryFilesService, ) + +from cg.services.file_delivery.file_formatter_service.delivery_file_formatting_service import ( + DeliveryFileFormattingService, +) + from cg.services.file_delivery.move_files_service.move_delivery_files_service import ( MoveDeliveryFilesService, ) -class FormatDeliveryFilesService(ABC): - """ - Abstract class that encapsulates the logic required for formatting files to deliver. - """ - - @abstractmethod - def format_files(self, case_id: str) -> None: - """Format the files to deliver.""" - pass - - class DeliverFilesService(ABC): """ Abstract class that encapsulates the logic required for delivering files to the customer. @@ -35,9 +29,11 @@ def __init__( self, delivery_file_manager_service: FetchDeliveryFilesService, move_file_service: MoveDeliveryFilesService, + file_formatter_service: DeliveryFileFormattingService, ): self.file_manager = delivery_file_manager_service self.file_mover = move_file_service + self.file_formatter = file_formatter_service @abstractmethod def deliver_files_for_case(self, case_id: str) -> None: diff --git a/cg/services/file_delivery/fetch_file_service/fetch_analysis_files_service.py b/cg/services/file_delivery/fetch_file_service/fetch_analysis_files_service.py index 691b9c705b..ef74b86268 100644 --- a/cg/services/file_delivery/fetch_file_service/fetch_analysis_files_service.py +++ b/cg/services/file_delivery/fetch_file_service/fetch_analysis_files_service.py @@ -51,12 +51,17 @@ def _get_sample_files_from_case_bundle( """Return a list of files from a case bundle with a sample id as tag.""" sample_tags: list[set[str]] = self.tags_fetcher.fetch_tags(workflow).sample_tags sample_tags_with_sample_id: list[set[str]] = [tag | {sample_id} for tag in sample_tags] - sample_files: list[File] = self.hk_api.get_files_from_latest_version_containing_tags( bundle_name=case_id, tags=sample_tags_with_sample_id ) + sample_name: str = self.status_db.get_sample_by_internal_id(sample_id).name return [ - SampleFile(case_id=case_id, sample_id=sample_id, file_path=sample_file.full_path) + SampleFile( + case_id=case_id, + sample_id=sample_id, + sample_name=sample_name, + file_path=sample_file.full_path, + ) for sample_file in sample_files ] @@ -82,6 +87,10 @@ def get_analysis_case_delivery_files(self, case: Case) -> list[CaseFile]: bundle_name=case.internal_id, tags=case_tags, excluded_tags=sample_id_tags ) return [ - CaseFile(case_id=case.internal_id, file_path=case_file.full_path) + CaseFile( + case_id=case.internal_id, + case_name=case.name, + file_path=case_file.full_path, + ) for case_file in case_files ] diff --git a/cg/services/file_delivery/fetch_file_service/fetch_fastq_analysis_files_service.py b/cg/services/file_delivery/fetch_file_service/fetch_fastq_analysis_files_service.py index 9a1c811469..fb99fc0c17 100644 --- a/cg/services/file_delivery/fetch_file_service/fetch_fastq_analysis_files_service.py +++ b/cg/services/file_delivery/fetch_file_service/fetch_fastq_analysis_files_service.py @@ -33,13 +33,14 @@ def get_files_to_deliver(self, case_id: str) -> DeliveryFiles: self.hk_api, tags_fetcher=FetchSampleAndCaseDeliveryFileTagsService(), ) - fastq_files = fetch_fastq_service.get_files_to_deliver(case_id) - analysis_files = fetch_analysis_service.get_files_to_deliver(case_id) + + fastq_files: DeliveryFiles = fetch_fastq_service.get_files_to_deliver(case_id) + analysis_files: DeliveryFiles = fetch_analysis_service.get_files_to_deliver(case_id) delivery_data = DeliveryMetaData( customer_internal_id=case.customer.internal_id, ticket_id=case.latest_ticket ) return DeliveryFiles( delivery_data=delivery_data, - case_files=analysis_case_files, - sample_files=analysis_sample_files, + case_files=analysis_files.case_files, + sample_files=analysis_files.sample_files + fastq_files.sample_files, ) diff --git a/cg/services/file_delivery/fetch_file_service/fetch_fastq_files_service.py b/cg/services/file_delivery/fetch_file_service/fetch_fastq_files_service.py index e029f5623c..ef64ccc200 100644 --- a/cg/services/file_delivery/fetch_file_service/fetch_fastq_files_service.py +++ b/cg/services/file_delivery/fetch_file_service/fetch_fastq_files_service.py @@ -59,7 +59,13 @@ def _get_fastq_files_for_sample(self, case_id: str, sample_id: str) -> list[Samp fastq_files: list[File] = self.hk_api.get_files_from_latest_version_containing_tags( bundle_name=sample_id, tags=fastq_tags ) + sample_name: str = self.status_db.get_sample_by_internal_id(sample_id).name return [ - SampleFile(case_id=case_id, sample_id=sample_id, file_path=fastq_file.full_path) + SampleFile( + case_id=case_id, + sample_id=sample_id, + sample_name=sample_name, + file_path=fastq_file.full_path, + ) for fastq_file in fastq_files ] diff --git a/cg/services/file_delivery/fetch_file_service/models.py b/cg/services/file_delivery/fetch_file_service/models.py index 99380e69f7..aa75c12b08 100644 --- a/cg/services/file_delivery/fetch_file_service/models.py +++ b/cg/services/file_delivery/fetch_file_service/models.py @@ -10,12 +10,14 @@ class DeliveryMetaData(BaseModel): class CaseFile(BaseModel): case_id: str + case_name: str file_path: Path class SampleFile(BaseModel): case_id: str sample_id: str + sample_name: str file_path: Path diff --git a/cg/services/file_delivery/file_formatter_service/__init__.py b/cg/services/file_delivery/file_formatter_service/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/cg/services/file_delivery/file_formatter_service/delivery_file_formatter.py b/cg/services/file_delivery/file_formatter_service/delivery_file_formatter.py new file mode 100644 index 0000000000..552635b10b --- /dev/null +++ b/cg/services/file_delivery/file_formatter_service/delivery_file_formatter.py @@ -0,0 +1,75 @@ +import os +from pathlib import Path +from cg.constants.delivery import INBOX_NAME +from cg.services.file_delivery.fetch_file_service.models import DeliveryFiles, SampleFile, CaseFile +from cg.services.file_delivery.file_formatter_service.utils.case_file_formatter import ( + CaseFileFormatter, +) +from cg.services.file_delivery.file_formatter_service.delivery_file_formatting_service import ( + DeliveryFileFormattingService, +) +from cg.services.file_delivery.file_formatter_service.models import FormattedFiles, FormattedFile +from cg.services.file_delivery.file_formatter_service.utils.sample_file_concatenation_formatter import ( + SampleFileConcatenationFormatter, +) +from cg.services.file_delivery.file_formatter_service.utils.sample_file_formatter import ( + SampleFileFormatter, +) + + +class DeliveryFileFormatter(DeliveryFileFormattingService): + """ + Format the files to be delivered in the generic format. + Expected structure: + /inbox/// + /inbox/// + """ + + def __init__( + self, + case_file_formatter: CaseFileFormatter, + sample_file_formatter: SampleFileFormatter | SampleFileConcatenationFormatter, + ): + self.case_file_formatter = case_file_formatter + self.sample_file_formatter = sample_file_formatter + + def format_files(self, delivery_files: DeliveryFiles) -> FormattedFiles: + """Format the files to be delivered and return the formatted files in the generic format.""" + ticket_dir_path: Path = self.get_folder_under_inbox( + delivery_files.sample_files[0].file_path + ) + self._create_ticket_dir(ticket_dir_path) + formatted_files: list[FormattedFile] = self._format_sample_and_case_files( + sample_files=delivery_files.sample_files, + case_files=delivery_files.case_files, + ticket_dir_path=ticket_dir_path, + ) + return FormattedFiles(files=formatted_files) + + def _format_sample_and_case_files( + self, sample_files: list[SampleFile], case_files: list[CaseFile], ticket_dir_path: Path + ) -> list[FormattedFile]: + """Helper method to format both sample and case files.""" + formatted_files: list[FormattedFile] = self.sample_file_formatter.format_files( + moved_files=sample_files, + ticket_dir_path=ticket_dir_path, + ) + if case_files: + formatted_case_files: list[FormattedFile] = self.case_file_formatter.format_files( + moved_files=case_files, + ticket_dir_path=ticket_dir_path, + ) + formatted_files.extend(formatted_case_files) + return formatted_files + + @staticmethod + def get_folder_under_inbox(file_path: Path) -> Path: + try: + inbox_index: int = file_path.parts.index(INBOX_NAME) + return Path(*file_path.parts[: inbox_index + 2]) + except ValueError: + raise ValueError(f"Could not find the inbox directory in the path: {file_path}") + + @staticmethod + def _create_ticket_dir(ticket_dir_path: Path) -> None: + os.makedirs(ticket_dir_path, exist_ok=True) diff --git a/cg/services/file_delivery/file_formatter_service/delivery_file_formatting_service.py b/cg/services/file_delivery/file_formatter_service/delivery_file_formatting_service.py new file mode 100644 index 0000000000..b74d513c0c --- /dev/null +++ b/cg/services/file_delivery/file_formatter_service/delivery_file_formatting_service.py @@ -0,0 +1,16 @@ +from abc import abstractmethod, ABC + +from cg.services.file_delivery.fetch_file_service.models import DeliveryFiles +from cg.services.file_delivery.file_formatter_service.models import FormattedFiles +from cg.store.store import Store + + +class DeliveryFileFormattingService(ABC): + """ + Abstract class that encapsulates the logic required for formatting files to deliver. + """ + + @abstractmethod + def format_files(self, delivery_files: DeliveryFiles) -> FormattedFiles: + """Format the files to deliver.""" + pass diff --git a/cg/services/file_delivery/file_formatter_service/models.py b/cg/services/file_delivery/file_formatter_service/models.py new file mode 100644 index 0000000000..d4be95ff73 --- /dev/null +++ b/cg/services/file_delivery/file_formatter_service/models.py @@ -0,0 +1,11 @@ +from pathlib import Path +from pydantic import BaseModel + + +class FormattedFile(BaseModel): + original_path: Path + formatted_path: Path + + +class FormattedFiles(BaseModel): + files: list[FormattedFile] diff --git a/cg/services/file_delivery/file_formatter_service/utils/__init__.py b/cg/services/file_delivery/file_formatter_service/utils/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/cg/services/file_delivery/file_formatter_service/utils/case_file_formatter.py b/cg/services/file_delivery/file_formatter_service/utils/case_file_formatter.py new file mode 100644 index 0000000000..eeb522c1d3 --- /dev/null +++ b/cg/services/file_delivery/file_formatter_service/utils/case_file_formatter.py @@ -0,0 +1,48 @@ +import os +from pathlib import Path + +from cg.services.file_delivery.fetch_file_service.models import CaseFile +from cg.services.file_delivery.file_formatter_service.models import FormattedFile + + +class CaseFileFormatter: + + def format_files( + self, moved_files: list[CaseFile], ticket_dir_path: Path + ) -> list[FormattedFile]: + """Format the case files to deliver and return the formatted files..""" + self._create_case_name_folder( + ticket_path=ticket_dir_path, case_name=moved_files[0].case_name + ) + return self._format_case_files(moved_files) + + def _format_case_files(self, case_files: list[CaseFile]) -> list[FormattedFile]: + formatted_files: list[FormattedFile] = self._get_formatted_files(case_files) + for formatted_file in formatted_files: + os.rename(src=formatted_file.original_path, dst=formatted_file.formatted_path) + return formatted_files + + @staticmethod + def _create_case_name_folder(ticket_path: Path, case_name: str) -> None: + case_dir_path = Path(ticket_path, case_name) + case_dir_path.mkdir(exist_ok=True) + + @staticmethod + def _get_formatted_files(case_files: list[CaseFile]) -> list[FormattedFile]: + """ + Returns formatted files: + 1. Adds a folder with case name to the path of the case files. + 2. Replaces case id by case name. + """ + formatted_files: list[FormattedFile] = [] + for case_file in case_files: + replaced_case_file_name: str = case_file.file_path.name.replace( + case_file.case_id, case_file.case_name + ) + formatted_file_path = Path( + case_file.file_path.parent, case_file.case_name, replaced_case_file_name + ) + formatted_files.append( + FormattedFile(original_path=case_file.file_path, formatted_path=formatted_file_path) + ) + return formatted_files diff --git a/cg/services/file_delivery/file_formatter_service/utils/sample_file_concatenation_formatter.py b/cg/services/file_delivery/file_formatter_service/utils/sample_file_concatenation_formatter.py new file mode 100644 index 0000000000..fe3701668c --- /dev/null +++ b/cg/services/file_delivery/file_formatter_service/utils/sample_file_concatenation_formatter.py @@ -0,0 +1,110 @@ +from pathlib import Path + +from cg.constants.constants import ReadDirection, FileFormat, FileExtensions +from cg.meta.deliver.fastq_path_generator import generate_concatenated_fastq_delivery_path +from cg.services.fastq_concatenation_service.fastq_concatenation_service import ( + FastqConcatenationService, +) +from cg.services.file_delivery.fetch_file_service.models import SampleFile +from cg.services.file_delivery.file_formatter_service.models import FormattedFile +from cg.services.file_delivery.file_formatter_service.utils.sample_file_formatter import ( + SampleFileFormatter, +) + + +class SampleFileConcatenationFormatter(SampleFileFormatter): + """ + Format the sample files to deliver, concatenate fastq files and return the formatted files. + Used for workflows: Microsalt and Mutant. + """ + + def __init__(self, concatenation_service: FastqConcatenationService): + self.concatenation_service = concatenation_service + + def format_files( + self, moved_files: list[SampleFile], ticket_dir_path: Path + ) -> list[FormattedFile]: + """Format the sample files to deliver, concatenate fastq files and return the formatted files.""" + formatted_files: list[FormattedFile] = super().format_files( + moved_files=moved_files, ticket_dir_path=ticket_dir_path + ) + forward_paths, reverse_path = self._concatenate_fastq_files(formatted_files=formatted_files) + self._replace_fastq_paths( + reverse_paths=reverse_path, + forward_paths=forward_paths, + formatted_files=formatted_files, + ) + return formatted_files + + def _concatenate_fastq_files( + self, formatted_files: list[FormattedFile] + ) -> tuple[list[Path], list[Path]]: + unique_sample_dir_paths: set[Path] = self._get_unique_sample_paths( + sample_files=formatted_files + ) + forward_paths: list[Path] = [] + reverse_paths: list[Path] = [] + for fastq_directory in unique_sample_dir_paths: + sample_name: str = fastq_directory.name + + forward_path: Path = generate_concatenated_fastq_delivery_path( + fastq_directory=fastq_directory, + sample_name=sample_name, + direction=ReadDirection.FORWARD, + ) + forward_paths.append(forward_path) + reverse_path: Path = generate_concatenated_fastq_delivery_path( + fastq_directory=fastq_directory, + sample_name=sample_name, + direction=ReadDirection.REVERSE, + ) + reverse_paths.append(reverse_path) + self.concatenation_service.concatenate( + fastq_directory=fastq_directory, + forward_output_path=forward_path, + reverse_output_path=reverse_path, + remove_raw=True, + ) + return forward_paths, reverse_paths + + @staticmethod + def _get_unique_sample_paths(sample_files: list[FormattedFile]) -> set[Path]: + sample_paths: list[Path] = [] + for sample_file in sample_files: + sample_paths.append(sample_file.formatted_path.parent) + return set(sample_paths) + + @staticmethod + def _replace_fastq_formatted_file_path( + formatted_files: list[FormattedFile], + direction: ReadDirection, + new_path: Path, + ) -> None: + """Replace the formatted file path with the new path.""" + for formatted_file in formatted_files: + if ( + formatted_file.formatted_path.parent == new_path.parent + and f"{FileFormat.FASTQ}{FileExtensions.GZIP}" in formatted_file.formatted_path.name + and f"R{direction}" in formatted_file.formatted_path.name + ): + formatted_file.formatted_path = new_path + + def _replace_fastq_paths( + self, + forward_paths: list[Path], + reverse_paths: list[Path], + formatted_files: list[FormattedFile], + ) -> None: + """Replace the fastq file paths with the new concatenated fastq file paths.""" + for forward_path in forward_paths: + self._replace_fastq_formatted_file_path( + formatted_files=formatted_files, + direction=ReadDirection.FORWARD, + new_path=forward_path, + ) + for reverse_path in reverse_paths: + self._replace_fastq_formatted_file_path( + formatted_files=formatted_files, + direction=ReadDirection.REVERSE, + new_path=reverse_path, + ) diff --git a/cg/services/file_delivery/file_formatter_service/utils/sample_file_formatter.py b/cg/services/file_delivery/file_formatter_service/utils/sample_file_formatter.py new file mode 100644 index 0000000000..e1e70058b1 --- /dev/null +++ b/cg/services/file_delivery/file_formatter_service/utils/sample_file_formatter.py @@ -0,0 +1,57 @@ +import os +from pathlib import Path +from cg.services.file_delivery.fetch_file_service.models import SampleFile +from cg.services.file_delivery.file_formatter_service.models import FormattedFile + + +class SampleFileFormatter: + """ + Format the sample files to deliver. + Used for all workflows except Microsalt and Mutant. + """ + + def format_files( + self, moved_files: list[SampleFile], ticket_dir_path: Path + ) -> list[FormattedFile]: + """Format the sample files to deliver and return the formatted files.""" + sample_names: set[str] = self._get_sample_names(moved_files) + self._create_sample_folders(ticket_dir_path=ticket_dir_path, sample_names=sample_names) + return self._format_sample_files(moved_files) + + @staticmethod + def _get_sample_names(sample_files: list[SampleFile]) -> set[str]: + return set(sample_file.sample_name for sample_file in sample_files) + + @staticmethod + def _create_sample_folders(ticket_dir_path: Path, sample_names: set[str]): + for sample_name in sample_names: + sample_dir_path = Path(ticket_dir_path, sample_name) + sample_dir_path.mkdir(exist_ok=True) + + def _format_sample_files(self, sample_files: list[SampleFile]) -> list[FormattedFile]: + formatted_files: list[FormattedFile] = self._get_formatted_files(sample_files) + for formatted_file in formatted_files: + os.rename(src=formatted_file.original_path, dst=formatted_file.formatted_path) + return formatted_files + + @staticmethod + def _get_formatted_files(sample_files: list[SampleFile]) -> list[FormattedFile]: + """ + Returns formatted files: + 1. Adds a folder with sample name to the path of the sample files. + 2. Replaces sample id by sample name. + """ + formatted_files: list[FormattedFile] = [] + for sample_file in sample_files: + replaced_sample_file_name: str = sample_file.file_path.name.replace( + sample_file.sample_id, sample_file.sample_name + ) + formatted_file_path = Path( + sample_file.file_path.parent, sample_file.sample_name, replaced_sample_file_name + ) + formatted_files.append( + FormattedFile( + original_path=sample_file.file_path, formatted_path=formatted_file_path + ) + ) + return formatted_files diff --git a/tests/conftest.py b/tests/conftest.py index fa9ce37d75..38b17354e7 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -95,6 +95,7 @@ "tests.fixture_plugins.delivery_fixtures.path_fixtures", "tests.fixture_plugins.delivery_fixtures.delivery_files_models_fixtures", "tests.fixture_plugins.delivery_fixtures.delivery_services_fixtures", + "tests.fixture_plugins.delivery_fixtures.delivery_formatted_files_fixtures", "tests.fixture_plugins.demultiplex_fixtures.flow_cell_fixtures", "tests.fixture_plugins.demultiplex_fixtures.housekeeper_fixtures", "tests.fixture_plugins.demultiplex_fixtures.metrics_fixtures", diff --git a/tests/fixture_plugins/delivery_fixtures/delivery_files_models_fixtures.py b/tests/fixture_plugins/delivery_fixtures/delivery_files_models_fixtures.py index b093946c53..7414a87b58 100644 --- a/tests/fixture_plugins/delivery_fixtures/delivery_files_models_fixtures.py +++ b/tests/fixture_plugins/delivery_fixtures/delivery_files_models_fixtures.py @@ -24,20 +24,26 @@ def expected_fastq_delivery_files( delivery_housekeeper_api: HousekeeperAPI, case_id: str, sample_id: str, + sample_name: str, another_sample_id: str, + another_sample_name: str, delivery_store_microsalt: Store, ) -> DeliveryFiles: """Return the expected fastq delivery files.""" - hk_bundle_names: list[str] = [sample_id, another_sample_id] + sample_info: list[tuple[str, str]] = [ + (sample_id, sample_name), + (another_sample_id, another_sample_name), + ] sample_files: list[SampleFile] = [ SampleFile( case_id=case_id, - sample_id=sample, + sample_id=sample[0], + sample_name=sample[1], file_path=delivery_housekeeper_api.get_files_from_latest_version( - bundle_name=sample, tags=[SequencingFileTag.FASTQ] + bundle_name=sample[0], tags=[SequencingFileTag.FASTQ] )[0].full_path, ) - for sample in hk_bundle_names + for sample in sample_info ] case: Case = delivery_store_microsalt.get_case_by_internal_id(case_id) delivery_meta_data = DeliveryMetaData( @@ -52,29 +58,37 @@ def expected_fastq_delivery_files( def expected_analysis_delivery_files( delivery_housekeeper_api: HousekeeperAPI, case_id: str, + case_name: str, sample_id: str, + sample_name: str, another_sample_id: str, + another_sample_name: str, delivery_store_balsamic: Store, ) -> DeliveryFiles: """Return the expected analysis delivery files.""" - hk_bundle_names: list[str] = [sample_id, another_sample_id] + sample_info: list[tuple[str, str]] = [ + (sample_id, sample_name), + (another_sample_id, another_sample_name), + ] sample_files: list[SampleFile] = [] - for sample in hk_bundle_names: + for sample in sample_info: sample_files.extend( [ SampleFile( case_id=case_id, - sample_id=sample, + sample_id=sample[0], + sample_name=sample[1], file_path=file.full_path, ) for file in delivery_housekeeper_api.get_files_from_latest_version( - bundle_name=case_id, tags=[AlignmentFileTag.CRAM, sample] + bundle_name=case_id, tags=[AlignmentFileTag.CRAM, sample[0]] ) ] ) case_files: list[CaseFile] = [ CaseFile( case_id=case_id, + case_name=case_name, file_path=delivery_housekeeper_api.get_files_from_latest_version( bundle_name=case_id, tags=[HK_DELIVERY_REPORT_TAG] )[0].full_path, @@ -136,6 +150,40 @@ def expected_moved_analysis_delivery_files( ) +@pytest.fixture +def expected_moved_analysis_sample_delivery_files( + expected_moved_analysis_delivery_files: DeliveryFiles, +) -> list[SampleFile]: + return expected_moved_analysis_delivery_files.sample_files + + +@pytest.fixture +def expected_moved_analysis_case_delivery_files( + expected_moved_analysis_delivery_files: DeliveryFiles, +) -> list[CaseFile]: + return expected_moved_analysis_delivery_files.case_files + + +@pytest.fixture +def fastq_concatenation_sample_files(tmp_path: Path) -> list[SampleFile]: + some_ticket: str = "some_ticket" + fastq_paths: list[Path] = [ + Path(tmp_path, some_ticket, "S1_1_R1_1.fastq.gz"), + Path(tmp_path, some_ticket, "S1_2_R1_1.fastq.gz"), + Path(tmp_path, some_ticket, "S1_1_R2_1.fastq.gz"), + Path(tmp_path, some_ticket, "S1_2_R2_1.fastq.gz"), + ] + return [ + SampleFile( + sample_id="S1", + case_id="Case1", + sample_name="Sample1", + file_path=fastq_path, + ) + for fastq_path in fastq_paths + ] + + def swap_file_paths_with_inbox_paths( file_models: list[CaseFile | SampleFile], inbox_dir_path: Path ) -> list[CaseFile | SampleFile]: diff --git a/tests/fixture_plugins/delivery_fixtures/delivery_formatted_files_fixtures.py b/tests/fixture_plugins/delivery_fixtures/delivery_formatted_files_fixtures.py new file mode 100644 index 0000000000..45aacbefe9 --- /dev/null +++ b/tests/fixture_plugins/delivery_fixtures/delivery_formatted_files_fixtures.py @@ -0,0 +1,87 @@ +from pathlib import Path + +import pytest + +from cg.services.file_delivery.fetch_file_service.models import DeliveryFiles, SampleFile +from cg.services.file_delivery.file_formatter_service.models import FormattedFile + + +@pytest.fixture +def expected_formatted_analysis_case_files( + expected_moved_analysis_delivery_files: DeliveryFiles, +) -> list[FormattedFile]: + formatted_files: list[FormattedFile] = [] + for case_file in expected_moved_analysis_delivery_files.case_files: + replaced_case_file_name: str = case_file.file_path.name.replace( + case_file.case_id, case_file.case_name + ) + formatted_file_path = Path( + case_file.file_path.parent, case_file.case_name, replaced_case_file_name + ) + formatted_files.append( + FormattedFile(original_path=case_file.file_path, formatted_path=formatted_file_path) + ) + return formatted_files + + +@pytest.fixture +def expected_formatted_analysis_sample_files( + expected_moved_analysis_delivery_files: DeliveryFiles, +) -> list[FormattedFile]: + formatted_files: list[FormattedFile] = [] + for sample_file in expected_moved_analysis_delivery_files.sample_files: + replaced_sample_file_name: str = sample_file.file_path.name.replace( + sample_file.sample_id, sample_file.sample_name + ) + formatted_file_path = Path( + sample_file.file_path.parent, sample_file.sample_name, replaced_sample_file_name + ) + formatted_files.append( + FormattedFile(original_path=sample_file.file_path, formatted_path=formatted_file_path) + ) + return formatted_files + + +@pytest.fixture +def expected_formatted_fastq_sample_files( + expected_moved_fastq_delivery_files: DeliveryFiles, +) -> list[FormattedFile]: + formatted_files: list[FormattedFile] = [] + for sample_file in expected_moved_fastq_delivery_files.sample_files: + replaced_sample_file_name: str = sample_file.file_path.name.replace( + sample_file.sample_id, sample_file.sample_name + ) + formatted_file_path = Path( + sample_file.file_path.parent, sample_file.sample_name, replaced_sample_file_name + ) + formatted_files.append( + FormattedFile(original_path=sample_file.file_path, formatted_path=formatted_file_path) + ) + return formatted_files + + +@pytest.fixture +def expected_concatenated_fastq_formatted_files( + fastq_concatenation_sample_files, +) -> list[FormattedFile]: + formatted_files: list[FormattedFile] = [] + for sample_file in fastq_concatenation_sample_files: + replaced_sample_file_name: str = sample_file.file_path.name.replace( + sample_file.sample_id, sample_file.sample_name + ) + replaced_sample_file_name = replaced_sample_file_name.replace("1_R1_1", "1") + replaced_sample_file_name = replaced_sample_file_name.replace("2_R1_1", "1") + replaced_sample_file_name = replaced_sample_file_name.replace("1_R2_1", "2") + replaced_sample_file_name = replaced_sample_file_name.replace("2_R2_1", "2") + formatted_file_path = Path( + sample_file.file_path.parent, sample_file.sample_name, replaced_sample_file_name + ) + formatted_files.append( + FormattedFile(original_path=sample_file.file_path, formatted_path=formatted_file_path) + ) + return formatted_files + + +@pytest.fixture +def empty_case_files() -> None: + return None diff --git a/tests/fixture_plugins/delivery_fixtures/delivery_services_fixtures.py b/tests/fixture_plugins/delivery_fixtures/delivery_services_fixtures.py index 9f89406f6b..15f30d6169 100644 --- a/tests/fixture_plugins/delivery_fixtures/delivery_services_fixtures.py +++ b/tests/fixture_plugins/delivery_fixtures/delivery_services_fixtures.py @@ -1,6 +1,9 @@ import pytest from cg.apps.housekeeper.hk import HousekeeperAPI +from cg.services.fastq_concatenation_service.fastq_concatenation_service import ( + FastqConcatenationService, +) from cg.services.file_delivery.fetch_delivery_files_tags.fetch_sample_and_case_delivery_file_tags_service import ( FetchSampleAndCaseDeliveryFileTagsService, ) @@ -10,6 +13,18 @@ from cg.services.file_delivery.fetch_file_service.fetch_fastq_files_service import ( FetchFastqDeliveryFilesService, ) +from cg.services.file_delivery.file_formatter_service.delivery_file_formatter import ( + DeliveryFileFormatter, +) +from cg.services.file_delivery.file_formatter_service.utils.case_file_formatter import ( + CaseFileFormatter, +) +from cg.services.file_delivery.file_formatter_service.utils.sample_file_concatenation_formatter import ( + SampleFileConcatenationFormatter, +) +from cg.services.file_delivery.file_formatter_service.utils.sample_file_formatter import ( + SampleFileFormatter, +) from cg.store.store import Store @@ -39,3 +54,11 @@ def analysis_delivery_service( status_db=delivery_store_balsamic, tags_fetcher=tag_service, ) + + +@pytest.fixture +def generic_delivery_file_formatter() -> DeliveryFileFormatter: + """Fixture to get an instance of GenericDeliveryFileFormatter.""" + return DeliveryFileFormatter( + sample_file_formatter=SampleFileFormatter(), case_file_formatter=CaseFileFormatter() + ) diff --git a/tests/fixture_plugins/delivery_fixtures/path_fixtures.py b/tests/fixture_plugins/delivery_fixtures/path_fixtures.py index f9300b42f8..ad0e0bff1b 100644 --- a/tests/fixture_plugins/delivery_fixtures/path_fixtures.py +++ b/tests/fixture_plugins/delivery_fixtures/path_fixtures.py @@ -54,3 +54,8 @@ def delivery_another_cram_file(tmp_path: Path, another_sample_id: str) -> Path: file = Path(tmp_path, f"{another_sample_id}{FileExtensions.CRAM}") file.touch() return file + + +@pytest.fixture +def delivery_ticket_dir_path(tmp_path: Path, ticket_id: str) -> Path: + return Path(tmp_path, ticket_id) diff --git a/tests/services/file_delivery/format_deliver_files_service/test_format_delivery_files_service.py b/tests/services/file_delivery/format_deliver_files_service/test_format_delivery_files_service.py new file mode 100644 index 0000000000..3dbfd7d37d --- /dev/null +++ b/tests/services/file_delivery/format_deliver_files_service/test_format_delivery_files_service.py @@ -0,0 +1,72 @@ +import mock + +from cg.services.file_delivery.fetch_file_service.models import ( + DeliveryFiles, + SampleFile, + CaseFile, + DeliveryMetaData, +) +from cg.services.file_delivery.file_formatter_service.delivery_file_formatting_service import ( + DeliveryFileFormattingService, +) +import pytest + +from cg.services.file_delivery.file_formatter_service.models import FormattedFiles, FormattedFile + + +@pytest.mark.parametrize( + "formatter_service, formatted_case_files, formatted_sample_files, case_files, sample_files", + [ + ( + "generic_delivery_file_formatter", + "empty_case_files", + "expected_formatted_analysis_sample_files", + "empty_case_files", + "expected_moved_analysis_sample_delivery_files", + ), + ( + "generic_delivery_file_formatter", + "expected_formatted_analysis_case_files", + "expected_formatted_analysis_sample_files", + "expected_moved_analysis_case_delivery_files", + "expected_moved_analysis_sample_delivery_files", + ), + ], +) +def test_reformat_files( + formatter_service: DeliveryFileFormattingService, + formatted_case_files: list[FormattedFile], + formatted_sample_files: list[FormattedFile], + case_files: list[CaseFile], + sample_files: list[SampleFile], + request, +): + # GIVEN a delivery file formatter, mocked delivery files and expected formatted files + formatter_service = request.getfixturevalue(formatter_service) + formatted_case_files = request.getfixturevalue(formatted_case_files) + formatted_sample_files = request.getfixturevalue(formatted_sample_files) + case_files = request.getfixturevalue(case_files) + sample_files = request.getfixturevalue(sample_files) + + delivery_data = DeliveryMetaData(customer_internal_id="cust_id", ticket_id="ticket_id") + mock_delivery_files = DeliveryFiles( + delivery_data=delivery_data, case_files=case_files, sample_files=sample_files + ) + files = [] + files.extend(formatted_sample_files) + if case_files: + files.extend(formatted_case_files) + + expected_formatted_files = FormattedFiles(files=files) + with mock.patch( + "cg.services.file_delivery.file_formatter_service.utils.sample_file_formatter.SampleFileFormatter.format_files", + return_value=formatted_sample_files, + ), mock.patch( + "cg.services.file_delivery.file_formatter_service.utils.case_file_formatter.CaseFileFormatter.format_files", + return_value=formatted_case_files, + ): + # WHEN reformatting the delivery files + formatted_files: FormattedFiles = formatter_service.format_files(mock_delivery_files) + + # THEN the delivery files should be reformatted + assert formatted_files == expected_formatted_files diff --git a/tests/services/file_delivery/format_deliver_files_service/utils/test_formatter_utils.py b/tests/services/file_delivery/format_deliver_files_service/utils/test_formatter_utils.py new file mode 100644 index 0000000000..9890cce0bc --- /dev/null +++ b/tests/services/file_delivery/format_deliver_files_service/utils/test_formatter_utils.py @@ -0,0 +1,69 @@ +import os +import pytest +from pathlib import Path + +from cg.services.fastq_concatenation_service.fastq_concatenation_service import ( + FastqConcatenationService, +) +from cg.services.file_delivery.fetch_file_service.models import DeliveryFiles, CaseFile, SampleFile +from cg.services.file_delivery.file_formatter_service.models import FormattedFile +from cg.services.file_delivery.file_formatter_service.utils.case_file_formatter import ( + CaseFileFormatter, +) +from cg.services.file_delivery.file_formatter_service.utils.sample_file_concatenation_formatter import ( + SampleFileConcatenationFormatter, +) +from cg.services.file_delivery.file_formatter_service.utils.sample_file_formatter import ( + SampleFileFormatter, +) + + +@pytest.mark.parametrize( + "moved_files,expected_formatted_files,file_formatter", + [ + ( + "expected_moved_analysis_case_delivery_files", + "expected_formatted_analysis_case_files", + CaseFileFormatter(), + ), + ( + "expected_moved_analysis_sample_delivery_files", + "expected_formatted_analysis_sample_files", + SampleFileFormatter(), + ), + ( + "fastq_concatenation_sample_files", + "expected_concatenated_fastq_formatted_files", + SampleFileConcatenationFormatter(FastqConcatenationService()), + ), + ], +) +def test_file_formatter_utils( + moved_files: list[CaseFile | SampleFile], + expected_formatted_files: list[FormattedFile], + file_formatter: CaseFileFormatter | SampleFileFormatter | SampleFileConcatenationFormatter, + request, +): + # GIVEN existing case files, a case file formatter and a ticket directory path and a customer inbox + moved_files: list[CaseFile | SampleFile] = request.getfixturevalue(moved_files) + expected_formatted_files: list[FormattedFile] = request.getfixturevalue( + expected_formatted_files + ) + ticket_dir_path: Path = moved_files[0].file_path.parent + + os.makedirs(ticket_dir_path, exist_ok=True) + + for moved_file in moved_files: + moved_file.file_path.touch() + + # WHEN formatting the case files + formatted_files: list[FormattedFile] = file_formatter.format_files( + moved_files=moved_files, + ticket_dir_path=ticket_dir_path, + ) + + # THEN the case files should be formatted + assert formatted_files == expected_formatted_files + for file in formatted_files: + assert file.formatted_path.exists() + assert not file.original_path.exists()