Skip to content

Commit

Permalink
fix a bug (#4040) (patch)
Browse files Browse the repository at this point in the history
# Description

fix concatenation bug
  • Loading branch information
ChrOertlin authored Dec 20, 2024
1 parent eaed26d commit 5a95f62
Show file tree
Hide file tree
Showing 5 changed files with 72 additions and 11 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -183,7 +183,9 @@ def _get_unique_sample_fastq_paths(
list_of_files: list[Path] = get_all_files_in_directory_tree(delivery_path)
for sample_name in sample_names:
for file in list_of_files:
if sample_name in file.as_posix() and self._is_lane_fastq_file(file):
if self._has_expected_sample_name_format_match(
sample_name=sample_name, file_path=file
) and self._is_lane_fastq_file(file):
LOG.debug(
f"[CONCATENATION SERVICE] Found fastq file: {file} for sample: {sample_name}"
)
Expand All @@ -200,6 +202,17 @@ def _get_unique_sample_fastq_paths(
)
return sample_paths

@staticmethod
def _has_expected_sample_name_format_match(sample_name: str, file_path: Path) -> bool:
"""
Check if the sample name is an exact match in the file path.
Fastq files are expected to have the sample name in the file path formatted as such: _{sample_name}_
args:
sample_name: str: The sample name to match.
file_path: Path: The file path to check.
"""
return f"_{sample_name}_" in file_path.as_posix()

@staticmethod
def _get_concatenation_map(
forward_path: Path, reverse_path: Path, fastq_files: list[FastqFile]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -303,10 +303,10 @@ def fastq_concatenation_sample_files(
sample_files = []
for sample_id, sample_name in sample_data:
fastq_paths: list[Path] = [
Path(tmp_path, inbox, f"{sample_id}_L001_R1_001.fastq.gz"),
Path(tmp_path, inbox, f"{sample_id}_L002_R1_001.fastq.gz"),
Path(tmp_path, inbox, f"{sample_id}_L001_R2_001.fastq.gz"),
Path(tmp_path, inbox, f"{sample_id}_L002_R2_001.fastq.gz"),
Path(tmp_path, inbox, f"FC_{sample_id}_L001_R1_001.fastq.gz"),
Path(tmp_path, inbox, f"FC_{sample_id}_L002_R1_001.fastq.gz"),
Path(tmp_path, inbox, f"FC_{sample_id}_L001_R2_001.fastq.gz"),
Path(tmp_path, inbox, f"FC_{sample_id}_L002_R2_001.fastq.gz"),
]

sample_files.extend(
Expand All @@ -329,10 +329,10 @@ def fastq_concatenation_sample_files_flat(tmp_path: Path) -> list[SampleFile]:
sample_files = []
for sample_id, sample_name in sample_data:
fastq_paths: list[Path] = [
Path(tmp_path, f"{sample_id}_L001_R1_001.fastq.gz"),
Path(tmp_path, f"{sample_id}_L002_R1_001.fastq.gz"),
Path(tmp_path, f"{sample_id}_L001_R2_001.fastq.gz"),
Path(tmp_path, f"{sample_id}_L002_R2_001.fastq.gz"),
Path(tmp_path, f"FC_{sample_id}_L001_R1_001.fastq.gz"),
Path(tmp_path, f"FC_{sample_id}_L002_R1_001.fastq.gz"),
Path(tmp_path, f"FC_{sample_id}_L001_R2_001.fastq.gz"),
Path(tmp_path, f"FC_{sample_id}_L002_R2_001.fastq.gz"),
]

sample_files.extend(
Expand Down
Original file line number Diff line number Diff line change
@@ -1,9 +1,13 @@
from pathlib import Path
from unittest.mock import Mock

import pytest

from cg.services.deliver_files.file_fetcher.models import DeliveryFiles
from cg.services.deliver_files.file_formatter.destination.models import FormattedFile
from cg.services.deliver_files.file_formatter.files.concatenation_service import (
SampleFileConcatenationFormatter,
)


@pytest.fixture
Expand Down Expand Up @@ -89,6 +93,7 @@ def expected_concatenated_fastq_formatted_files(
replaced_sample_file_name = replaced_sample_file_name.replace("L002_R1_001", "1")
replaced_sample_file_name = replaced_sample_file_name.replace("L001_R2_001", "2")
replaced_sample_file_name = replaced_sample_file_name.replace("L002_R2_001", "2")
replaced_sample_file_name = replaced_sample_file_name.replace("FC_", "")
formatted_file_path = Path(
sample_file.file_path.parent, sample_file.sample_name, replaced_sample_file_name
)
Expand All @@ -111,6 +116,7 @@ def expected_concatenated_fastq_flat_formatted_files(
replaced_sample_file_name = replaced_sample_file_name.replace("L002_R1_001", "1")
replaced_sample_file_name = replaced_sample_file_name.replace("L001_R2_001", "2")
replaced_sample_file_name = replaced_sample_file_name.replace("L002_R2_001", "2")
replaced_sample_file_name = replaced_sample_file_name.replace("FC_", "")
formatted_file_path = Path(sample_file.file_path.parent, replaced_sample_file_name)
formatted_files.append(
FormattedFile(original_path=sample_file.file_path, formatted_path=formatted_file_path)
Expand Down
4 changes: 2 additions & 2 deletions tests/fixture_plugins/delivery_fixtures/path_fixtures.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@

@pytest.fixture
def delivery_fastq_file(tmp_path: Path, sample_id: str) -> Path:
file = Path(tmp_path, f"{sample_id}_L001_R1_001{FileExtensions.FASTQ_GZ}")
file = Path(tmp_path, f"FC_{sample_id}_L001_R1_001{FileExtensions.FASTQ_GZ}")
file.touch()
return file

Expand All @@ -34,7 +34,7 @@ def delivery_bam_file(tmp_path: Path, sample_id: str) -> Path:

@pytest.fixture
def delivery_another_fastq_file(tmp_path: Path, another_sample_id: str) -> Path:
file = Path(tmp_path, f"{another_sample_id}L001_R1_001{FileExtensions.FASTQ_GZ}")
file = Path(tmp_path, f"FC_{another_sample_id}L001_R1_001{FileExtensions.FASTQ_GZ}")
file.touch()
return file

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -137,3 +137,45 @@ def test_mutant_file_formatter(
for file in formatted_files:
assert file.formatted_path.exists()
assert not file.original_path.exists()


def test_concatenation_sample_name_match():
# GIVEN a concatenation service and a list of file paths and a sample name that is a number
sample_name = "12"
concatentation_formatter = SampleFileConcatenationFormatter(
file_manager=Mock(),
path_name_formatter=Mock(),
concatenation_service=Mock(),
)
# GIVEN two sets of file paths that should match and not match the sample name
should_match_file_paths = [
Path("path/to/FC_12_L001_R1_001.fastq.gz"),
Path("path/to/FC_12_L002_R1_001.fastq.gz"),
Path("path/to/FC_12_L001_R2_001.fastq.gz"),
Path("path/to/FC_12_L002_R2_001.fastq.gz"),
]
should_not_match_file_paths = [
Path("path/to/FC_123_L001_R1_001.fastq.gz"),
Path("path/to/FC_123_L002_R1_001.fastq.gz"),
Path("path/to/FC_123_L001_R2_001.fastq.gz"),
Path("path/to/FC_123_L002_R2_001.fastq.gz"),
]

# WHEN checking if the file paths match the sample name

# THEN the file paths that should match should return True and the file paths that should not match should return False
for file_path in should_match_file_paths:
assert (
concatentation_formatter._has_expected_sample_name_format_match(
file_path=file_path, sample_name=sample_name
)
is True
)

for file_path in should_not_match_file_paths:
assert (
concatentation_formatter._has_expected_sample_name_format_match(
file_path=file_path, sample_name=sample_name
)
is False
)

0 comments on commit 5a95f62

Please sign in to comment.