From 5a95f628a0534d0bf29a6e6ae8eb95ec50d1c537 Mon Sep 17 00:00:00 2001 From: ChristianOertlin Date: Fri, 20 Dec 2024 15:26:12 +0100 Subject: [PATCH] fix a bug (#4040) (patch) # Description fix concatenation bug --- .../files/concatenation_service.py | 15 ++++++- .../delivery_files_models_fixtures.py | 16 +++---- .../delivery_formatted_files_fixtures.py | 6 +++ .../delivery_fixtures/path_fixtures.py | 4 +- .../files/test_formatter_utils.py | 42 +++++++++++++++++++ 5 files changed, 72 insertions(+), 11 deletions(-) diff --git a/cg/services/deliver_files/file_formatter/files/concatenation_service.py b/cg/services/deliver_files/file_formatter/files/concatenation_service.py index b2f20b6e57..4ae1c1a442 100644 --- a/cg/services/deliver_files/file_formatter/files/concatenation_service.py +++ b/cg/services/deliver_files/file_formatter/files/concatenation_service.py @@ -183,7 +183,9 @@ def _get_unique_sample_fastq_paths( list_of_files: list[Path] = get_all_files_in_directory_tree(delivery_path) for sample_name in sample_names: for file in list_of_files: - if sample_name in file.as_posix() and self._is_lane_fastq_file(file): + if self._has_expected_sample_name_format_match( + sample_name=sample_name, file_path=file + ) and self._is_lane_fastq_file(file): LOG.debug( f"[CONCATENATION SERVICE] Found fastq file: {file} for sample: {sample_name}" ) @@ -200,6 +202,17 @@ def _get_unique_sample_fastq_paths( ) return sample_paths + @staticmethod + def _has_expected_sample_name_format_match(sample_name: str, file_path: Path) -> bool: + """ + Check if the sample name is an exact match in the file path. + Fastq files are expected to have the sample name in the file path formatted as such: _{sample_name}_ + args: + sample_name: str: The sample name to match. + file_path: Path: The file path to check. + """ + return f"_{sample_name}_" in file_path.as_posix() + @staticmethod def _get_concatenation_map( forward_path: Path, reverse_path: Path, fastq_files: list[FastqFile] diff --git a/tests/fixture_plugins/delivery_fixtures/delivery_files_models_fixtures.py b/tests/fixture_plugins/delivery_fixtures/delivery_files_models_fixtures.py index ea2b2e8337..2d69ac742c 100644 --- a/tests/fixture_plugins/delivery_fixtures/delivery_files_models_fixtures.py +++ b/tests/fixture_plugins/delivery_fixtures/delivery_files_models_fixtures.py @@ -303,10 +303,10 @@ def fastq_concatenation_sample_files( sample_files = [] for sample_id, sample_name in sample_data: fastq_paths: list[Path] = [ - Path(tmp_path, inbox, f"{sample_id}_L001_R1_001.fastq.gz"), - Path(tmp_path, inbox, f"{sample_id}_L002_R1_001.fastq.gz"), - Path(tmp_path, inbox, f"{sample_id}_L001_R2_001.fastq.gz"), - Path(tmp_path, inbox, f"{sample_id}_L002_R2_001.fastq.gz"), + Path(tmp_path, inbox, f"FC_{sample_id}_L001_R1_001.fastq.gz"), + Path(tmp_path, inbox, f"FC_{sample_id}_L002_R1_001.fastq.gz"), + Path(tmp_path, inbox, f"FC_{sample_id}_L001_R2_001.fastq.gz"), + Path(tmp_path, inbox, f"FC_{sample_id}_L002_R2_001.fastq.gz"), ] sample_files.extend( @@ -329,10 +329,10 @@ def fastq_concatenation_sample_files_flat(tmp_path: Path) -> list[SampleFile]: sample_files = [] for sample_id, sample_name in sample_data: fastq_paths: list[Path] = [ - Path(tmp_path, f"{sample_id}_L001_R1_001.fastq.gz"), - Path(tmp_path, f"{sample_id}_L002_R1_001.fastq.gz"), - Path(tmp_path, f"{sample_id}_L001_R2_001.fastq.gz"), - Path(tmp_path, f"{sample_id}_L002_R2_001.fastq.gz"), + Path(tmp_path, f"FC_{sample_id}_L001_R1_001.fastq.gz"), + Path(tmp_path, f"FC_{sample_id}_L002_R1_001.fastq.gz"), + Path(tmp_path, f"FC_{sample_id}_L001_R2_001.fastq.gz"), + Path(tmp_path, f"FC_{sample_id}_L002_R2_001.fastq.gz"), ] sample_files.extend( diff --git a/tests/fixture_plugins/delivery_fixtures/delivery_formatted_files_fixtures.py b/tests/fixture_plugins/delivery_fixtures/delivery_formatted_files_fixtures.py index 2e90df0f80..872d39dd36 100644 --- a/tests/fixture_plugins/delivery_fixtures/delivery_formatted_files_fixtures.py +++ b/tests/fixture_plugins/delivery_fixtures/delivery_formatted_files_fixtures.py @@ -1,9 +1,13 @@ from pathlib import Path +from unittest.mock import Mock import pytest from cg.services.deliver_files.file_fetcher.models import DeliveryFiles from cg.services.deliver_files.file_formatter.destination.models import FormattedFile +from cg.services.deliver_files.file_formatter.files.concatenation_service import ( + SampleFileConcatenationFormatter, +) @pytest.fixture @@ -89,6 +93,7 @@ def expected_concatenated_fastq_formatted_files( replaced_sample_file_name = replaced_sample_file_name.replace("L002_R1_001", "1") replaced_sample_file_name = replaced_sample_file_name.replace("L001_R2_001", "2") replaced_sample_file_name = replaced_sample_file_name.replace("L002_R2_001", "2") + replaced_sample_file_name = replaced_sample_file_name.replace("FC_", "") formatted_file_path = Path( sample_file.file_path.parent, sample_file.sample_name, replaced_sample_file_name ) @@ -111,6 +116,7 @@ def expected_concatenated_fastq_flat_formatted_files( replaced_sample_file_name = replaced_sample_file_name.replace("L002_R1_001", "1") replaced_sample_file_name = replaced_sample_file_name.replace("L001_R2_001", "2") replaced_sample_file_name = replaced_sample_file_name.replace("L002_R2_001", "2") + replaced_sample_file_name = replaced_sample_file_name.replace("FC_", "") formatted_file_path = Path(sample_file.file_path.parent, replaced_sample_file_name) formatted_files.append( FormattedFile(original_path=sample_file.file_path, formatted_path=formatted_file_path) diff --git a/tests/fixture_plugins/delivery_fixtures/path_fixtures.py b/tests/fixture_plugins/delivery_fixtures/path_fixtures.py index 22d682b014..b114d0dccf 100644 --- a/tests/fixture_plugins/delivery_fixtures/path_fixtures.py +++ b/tests/fixture_plugins/delivery_fixtures/path_fixtures.py @@ -9,7 +9,7 @@ @pytest.fixture def delivery_fastq_file(tmp_path: Path, sample_id: str) -> Path: - file = Path(tmp_path, f"{sample_id}_L001_R1_001{FileExtensions.FASTQ_GZ}") + file = Path(tmp_path, f"FC_{sample_id}_L001_R1_001{FileExtensions.FASTQ_GZ}") file.touch() return file @@ -34,7 +34,7 @@ def delivery_bam_file(tmp_path: Path, sample_id: str) -> Path: @pytest.fixture def delivery_another_fastq_file(tmp_path: Path, another_sample_id: str) -> Path: - file = Path(tmp_path, f"{another_sample_id}L001_R1_001{FileExtensions.FASTQ_GZ}") + file = Path(tmp_path, f"FC_{another_sample_id}L001_R1_001{FileExtensions.FASTQ_GZ}") file.touch() return file diff --git a/tests/services/file_delivery/file_formatter/files/test_formatter_utils.py b/tests/services/file_delivery/file_formatter/files/test_formatter_utils.py index ce440e10d7..4d7b241b57 100644 --- a/tests/services/file_delivery/file_formatter/files/test_formatter_utils.py +++ b/tests/services/file_delivery/file_formatter/files/test_formatter_utils.py @@ -137,3 +137,45 @@ def test_mutant_file_formatter( for file in formatted_files: assert file.formatted_path.exists() assert not file.original_path.exists() + + +def test_concatenation_sample_name_match(): + # GIVEN a concatenation service and a list of file paths and a sample name that is a number + sample_name = "12" + concatentation_formatter = SampleFileConcatenationFormatter( + file_manager=Mock(), + path_name_formatter=Mock(), + concatenation_service=Mock(), + ) + # GIVEN two sets of file paths that should match and not match the sample name + should_match_file_paths = [ + Path("path/to/FC_12_L001_R1_001.fastq.gz"), + Path("path/to/FC_12_L002_R1_001.fastq.gz"), + Path("path/to/FC_12_L001_R2_001.fastq.gz"), + Path("path/to/FC_12_L002_R2_001.fastq.gz"), + ] + should_not_match_file_paths = [ + Path("path/to/FC_123_L001_R1_001.fastq.gz"), + Path("path/to/FC_123_L002_R1_001.fastq.gz"), + Path("path/to/FC_123_L001_R2_001.fastq.gz"), + Path("path/to/FC_123_L002_R2_001.fastq.gz"), + ] + + # WHEN checking if the file paths match the sample name + + # THEN the file paths that should match should return True and the file paths that should not match should return False + for file_path in should_match_file_paths: + assert ( + concatentation_formatter._has_expected_sample_name_format_match( + file_path=file_path, sample_name=sample_name + ) + is True + ) + + for file_path in should_not_match_file_paths: + assert ( + concatentation_formatter._has_expected_sample_name_format_match( + file_path=file_path, sample_name=sample_name + ) + is False + )