Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

refactor(decompose sample file formatter) #3960

Merged
merged 6 commits into from
Nov 25, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,11 @@
from cg.services.deliver_files.file_formatter.utils.sample_concatenation_service import (
SampleFileConcatenationFormatter,
)
from cg.services.deliver_files.file_formatter.utils.sample_service import SampleFileFormatter
from cg.services.deliver_files.file_formatter.utils.sample_service import (
SampleFileFormatter,
FileManagingService,
SampleFileNameFormatter,
)
from cg.services.deliver_files.file_mover.service import DeliveryFilesMover
from cg.services.deliver_files.rsync.service import DeliveryRsyncService
from cg.services.deliver_files.tag_fetcher.abstract import FetchDeliveryFileTagsService
Expand Down Expand Up @@ -135,8 +139,14 @@ def _get_sample_file_formatter(
"""Get the file formatter service based on the workflow."""
converted_workflow: Workflow = self._convert_workflow(case)
if converted_workflow in [Workflow.MICROSALT]:
return SampleFileConcatenationFormatter(FastqConcatenationService())
return SampleFileFormatter()
return SampleFileConcatenationFormatter(
file_manager=FileManagingService(),
file_formatter=SampleFileNameFormatter(),
concatenation_service=FastqConcatenationService(),
islean marked this conversation as resolved.
Show resolved Hide resolved
)
return SampleFileFormatter(
file_manager=FileManagingService(), file_name_formatter=SampleFileNameFormatter()
ChrOertlin marked this conversation as resolved.
Show resolved Hide resolved
)

def build_delivery_service(
self, case: Case, delivery_type: DataDelivery | None = None
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,26 +9,43 @@
from cg.services.deliver_files.file_fetcher.models import SampleFile
from cg.services.deliver_files.file_formatter.models import FormattedFile
from cg.services.deliver_files.file_formatter.utils.sample_service import (
SampleFileFormatter,
SampleFileNameFormatter,
FileManagingService,
)


class SampleFileConcatenationFormatter(SampleFileFormatter):
class SampleFileConcatenationFormatter:
"""
Format the sample files to deliver, concatenate fastq files and return the formatted files.
Used for workflows: Microsalt and Mutant.
Used for workflows: Microsalt.
"""

def __init__(self, concatenation_service: FastqConcatenationService):
def __init__(
self,
file_manager: FileManagingService,
file_formatter: SampleFileNameFormatter,
concatenation_service: FastqConcatenationService,
):
self.file_manager = file_manager
self.file_name_formatter = file_formatter
self.concatenation_service = concatenation_service

def format_files(
self, moved_files: list[SampleFile], ticket_dir_path: Path
) -> list[FormattedFile]:
"""Format the sample files to deliver, concatenate fastq files and return the formatted files."""
formatted_files: list[FormattedFile] = super().format_files(
moved_files=moved_files, ticket_dir_path=ticket_dir_path
sample_names: set[str] = self.file_name_formatter.get_sample_names(sample_files=moved_files)
for sample_name in sample_names:
self.file_manager.create_directories(
base_path=ticket_dir_path, directories={sample_name}
)
formatted_files: list[FormattedFile] = self.file_name_formatter.format_sample_file_names(
sample_files=moved_files
)
for formatted_file in formatted_files:
self.file_manager.rename_file(
src=formatted_file.original_path, dst=formatted_file.formatted_path
)
forward_paths, reverse_path = self._concatenate_fastq_files(formatted_files=formatted_files)
self._replace_fastq_paths(
reverse_paths=reverse_path,
Expand Down
89 changes: 58 additions & 31 deletions cg/services/deliver_files/file_formatter/utils/sample_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,54 +4,81 @@
from cg.services.deliver_files.file_formatter.models import FormattedFile


class SampleFileFormatter:
class FileManagingService:
"""
Format the sample files to deliver.
Used for all workflows except Microsalt and Mutant.
Service to manage files.
Handles operations that create or rename files and directories.
"""

def format_files(
self, moved_files: list[SampleFile], ticket_dir_path: Path
) -> list[FormattedFile]:
"""Format the sample files to deliver and return the formatted files."""
sample_names: set[str] = self._get_sample_names(moved_files)
self._create_sample_folders(ticket_dir_path=ticket_dir_path, sample_names=sample_names)
return self._format_sample_files(moved_files)

@staticmethod
def _get_sample_names(sample_files: list[SampleFile]) -> set[str]:
return set(sample_file.sample_name for sample_file in sample_files)
def create_directories(base_path: Path, directories: set[str]) -> None:
"""Create directories for given names under the base path."""
for directory in directories:
Path(base_path, directory).mkdir(exist_ok=True)

@staticmethod
def _create_sample_folders(ticket_dir_path: Path, sample_names: set[str]):
for sample_name in sample_names:
sample_dir_path = Path(ticket_dir_path, sample_name)
sample_dir_path.mkdir(exist_ok=True)
def rename_file(src: Path, dst: Path) -> None:
"""Rename a file from src to dst."""
os.rename(src, dst)

def _format_sample_files(self, sample_files: list[SampleFile]) -> list[FormattedFile]:
formatted_files: list[FormattedFile] = self._get_formatted_files(sample_files)
for formatted_file in formatted_files:
os.rename(src=formatted_file.original_path, dst=formatted_file.formatted_path)
return formatted_files

class SampleFileNameFormatter:
"""
Class to format sample file names.
"""

@staticmethod
def get_sample_names(sample_files: list[SampleFile]) -> set[str]:
"""Extract sample names from the sample files."""
return {sample_file.sample_name for sample_file in sample_files}

@staticmethod
def _get_formatted_files(sample_files: list[SampleFile]) -> list[FormattedFile]:
def format_sample_file_names(sample_files: list[SampleFile]) -> list[FormattedFile]:
"""
Returns formatted files:
Returns formatted files with original and formatted file names:
1. Adds a folder with sample name to the path of the sample files.
2. Replaces sample id by sample name.
"""
formatted_files: list[FormattedFile] = []
formatted_files = []
for sample_file in sample_files:
replaced_sample_file_name: str = sample_file.file_path.name.replace(
replaced_name = sample_file.file_path.name.replace(
sample_file.sample_id, sample_file.sample_name
)
formatted_file_path = Path(
sample_file.file_path.parent, sample_file.sample_name, replaced_sample_file_name
formatted_path = Path(
sample_file.file_path.parent, sample_file.sample_name, replaced_name
)
formatted_files.append(
FormattedFile(
original_path=sample_file.file_path, formatted_path=formatted_file_path
)
FormattedFile(original_path=sample_file.file_path, formatted_path=formatted_path)
)
return formatted_files


class SampleFileFormatter:
"""
Format the sample files to deliver.
Used for all workflows except Microsalt and Mutant.
"""

def __init__(
self, file_manager: FileManagingService, file_name_formatter: SampleFileNameFormatter
):
self.file_manager = file_manager
self.file_name_formatter = file_name_formatter

def format_files(
self, moved_files: list[SampleFile], ticket_dir_path: Path
) -> list[FormattedFile]:
"""Format the sample files to deliver and return the formatted files."""
sample_names: set[str] = self.file_name_formatter.get_sample_names(sample_files=moved_files)
for sample_name in sample_names:
self.file_manager.create_directories(
base_path=ticket_dir_path, directories={sample_name}
)
formatted_files: list[FormattedFile] = self.file_name_formatter.format_sample_file_names(
sample_files=moved_files
)
for formatted_file in formatted_files:
self.file_manager.rename_file(
src=formatted_file.original_path, dst=formatted_file.formatted_path
)
return formatted_files
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,8 @@
)
from cg.services.deliver_files.file_formatter.utils.sample_service import (
SampleFileFormatter,
FileManagingService,
SampleFileNameFormatter,
)
from cg.store.store import Store

Expand Down Expand Up @@ -119,5 +121,8 @@ def analysis_delivery_service_no_housekeeper_bundle(
def generic_delivery_file_formatter() -> DeliveryFileFormatter:
"""Fixture to get an instance of GenericDeliveryFileFormatter."""
return DeliveryFileFormatter(
sample_file_formatter=SampleFileFormatter(), case_file_formatter=CaseFileFormatter()
sample_file_formatter=SampleFileFormatter(
file_manager=FileManagingService(), file_name_formatter=SampleFileNameFormatter()
),
case_file_formatter=CaseFileFormatter(),
)
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ class DeliveryServiceScenario(BaseModel):
expected_tag_fetcher: type[FetchDeliveryFileTagsService]
expected_file_fetcher: type[FetchDeliveryFilesService]
expected_file_mover: type[DeliveryFilesMover]
expected_sample_file_formatter: type[SampleFileFormatter]
expected_sample_file_formatter: type[SampleFileFormatter | SampleFileConcatenationFormatter]
store_name: str


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
FastqConcatenationService,
)
from cg.services.deliver_files.file_fetcher.models import (
DeliveryFiles,
CaseFile,
SampleFile,
)
Expand All @@ -19,6 +18,8 @@
)
from cg.services.deliver_files.file_formatter.utils.sample_service import (
SampleFileFormatter,
FileManagingService,
SampleFileNameFormatter,
)


Expand All @@ -33,12 +34,18 @@
(
"expected_moved_analysis_sample_delivery_files",
"expected_formatted_analysis_sample_files",
SampleFileFormatter(),
SampleFileFormatter(
file_manager=FileManagingService(), file_name_formatter=SampleFileNameFormatter()
),
),
(
"fastq_concatenation_sample_files",
"expected_concatenated_fastq_formatted_files",
SampleFileConcatenationFormatter(FastqConcatenationService()),
SampleFileConcatenationFormatter(
file_manager=FileManagingService(),
file_formatter=SampleFileNameFormatter(),
concatenation_service=FastqConcatenationService(),
),
),
],
)
Expand Down
Loading