Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add(delivery file formatter) #3626

Merged
merged 30 commits into from
Aug 29, 2024
Merged
Show file tree
Hide file tree
Changes from 29 commits
Commits
Show all changes
30 commits
Select commit Hold shift + click to select a range
4dd94c8
initial commit
ChrOertlin Aug 21, 2024
c09db2f
fixes
ChrOertlin Aug 21, 2024
f449da4
fix
ChrOertlin Aug 21, 2024
33456ef
add test
ChrOertlin Aug 21, 2024
f175540
fix test
ChrOertlin Aug 22, 2024
511ed7f
add exists assertion
ChrOertlin Aug 22, 2024
9063fc8
add func call
ChrOertlin Aug 22, 2024
8a64d3b
initial commit
ChrOertlin Aug 22, 2024
83d6c9f
Update tests/fixture_plugins/delivery_fixtures/delivery_files_models_…
ChrOertlin Aug 22, 2024
bacaa11
Update tests/fixture_plugins/delivery_fixtures/delivery_files_models_…
ChrOertlin Aug 22, 2024
0533071
Apply suggestions from code review
ChrOertlin Aug 22, 2024
3fbc9ef
review
ChrOertlin Aug 22, 2024
4eca28e
Merge branch 'add-file-mover' into add-file-reformatters
ChrOertlin Aug 22, 2024
f83f694
conflicsts
ChrOertlin Aug 22, 2024
652938e
refactor logic
ChrOertlin Aug 26, 2024
1aad85b
fix formatter
ChrOertlin Aug 26, 2024
6cabd68
fix concatenation
ChrOertlin Aug 26, 2024
56d7ec1
tests
ChrOertlin Aug 26, 2024
2845d05
add tests for formatters
ChrOertlin Aug 27, 2024
481c518
add test
ChrOertlin Aug 27, 2024
293ee09
review comments
ChrOertlin Aug 27, 2024
db99c05
make super call
ChrOertlin Aug 27, 2024
93056cd
simplifiy formatter
ChrOertlin Aug 27, 2024
c13ce34
add docstring
ChrOertlin Aug 27, 2024
5d1ddb9
solve imports
ChrOertlin Aug 27, 2024
3d605ac
review pt1
ChrOertlin Aug 28, 2024
1694efe
review pt2
ChrOertlin Aug 28, 2024
1c6e67c
small fix
ChrOertlin Aug 28, 2024
45f8777
missing typehints
ChrOertlin Aug 28, 2024
729de7a
typehint
ChrOertlin Aug 29, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 6 additions & 2 deletions cg/services/fastq_concatenation_service/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import uuid

from cg.services.fastq_concatenation_service.exceptions import ConcatenationError
from cg.constants.constants import ReadDirection
from cg.constants.constants import ReadDirection, FileFormat
from cg.constants import FileExtensions


Expand Down Expand Up @@ -73,7 +73,11 @@ def sort_files_by_name(files: list[Path]) -> list[Path]:


def file_can_be_removed(file: Path, forward_file: Path, reverse_file: Path) -> bool:
return file.suffix == FileExtensions.GZIP and file != forward_file and file != reverse_file
ChrOertlin marked this conversation as resolved.
Show resolved Hide resolved
return (
f"{FileFormat.FASTQ}{FileExtensions.GZIP}" in file.name
and file != forward_file
and file != reverse_file
)


def remove_raw_fastqs(fastq_directory: Path, forward_file: Path, reverse_file: Path) -> None:
Expand Down
18 changes: 7 additions & 11 deletions cg/services/file_delivery/abstract_classes.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,22 +3,16 @@
from cg.services.file_delivery.fetch_file_service.fetch_delivery_files_service import (
FetchDeliveryFilesService,
)

from cg.services.file_delivery.file_formatter_service.delivery_file_formatting_service import (
DeliveryFileFormattingService,
)

from cg.services.file_delivery.move_files_service.move_delivery_files_service import (
MoveDeliveryFilesService,
)


class FormatDeliveryFilesService(ABC):
"""
Abstract class that encapsulates the logic required for formatting files to deliver.
"""

@abstractmethod
def format_files(self, case_id: str) -> None:
"""Format the files to deliver."""
pass


class DeliverFilesService(ABC):
"""
Abstract class that encapsulates the logic required for delivering files to the customer.
Expand All @@ -35,9 +29,11 @@ def __init__(
self,
delivery_file_manager_service: FetchDeliveryFilesService,
move_file_service: MoveDeliveryFilesService,
file_formatter_service: DeliveryFileFormattingService,
):
self.file_manager = delivery_file_manager_service
self.file_mover = move_file_service
self.file_formatter = file_formatter_service

@abstractmethod
def deliver_files_for_case(self, case_id: str) -> None:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -51,12 +51,17 @@ def _get_sample_files_from_case_bundle(
"""Return a list of files from a case bundle with a sample id as tag."""
sample_tags: list[set[str]] = self.tags_fetcher.fetch_tags(workflow).sample_tags
sample_tags_with_sample_id: list[set[str]] = [tag | {sample_id} for tag in sample_tags]

sample_files: list[File] = self.hk_api.get_files_from_latest_version_containing_tags(
bundle_name=case_id, tags=sample_tags_with_sample_id
)
sample_name: str = self.status_db.get_sample_by_internal_id(sample_id).name
return [
SampleFile(case_id=case_id, sample_id=sample_id, file_path=sample_file.full_path)
SampleFile(
case_id=case_id,
sample_id=sample_id,
sample_name=sample_name,
file_path=sample_file.full_path,
)
for sample_file in sample_files
]

Expand All @@ -82,6 +87,10 @@ def get_analysis_case_delivery_files(self, case: Case) -> list[CaseFile]:
bundle_name=case.internal_id, tags=case_tags, excluded_tags=sample_id_tags
)
return [
CaseFile(case_id=case.internal_id, file_path=case_file.full_path)
CaseFile(
case_id=case.internal_id,
case_name=case.name,
file_path=case_file.full_path,
)
for case_file in case_files
]
Original file line number Diff line number Diff line change
Expand Up @@ -33,13 +33,14 @@ def get_files_to_deliver(self, case_id: str) -> DeliveryFiles:
self.hk_api,
tags_fetcher=FetchSampleAndCaseDeliveryFileTagsService(),
)
fastq_files = fetch_fastq_service.get_files_to_deliver(case_id)
analysis_files = fetch_analysis_service.get_files_to_deliver(case_id)

fastq_files: DeliveryFiles = fetch_fastq_service.get_files_to_deliver(case_id)
analysis_files: DeliveryFiles = fetch_analysis_service.get_files_to_deliver(case_id)
delivery_data = DeliveryMetaData(
customer_internal_id=case.customer.internal_id, ticket_id=case.latest_ticket
)
return DeliveryFiles(
delivery_data=delivery_data,
case_files=analysis_case_files,
sample_files=analysis_sample_files,
case_files=analysis_files.case_files,
sample_files=analysis_files.sample_files + fastq_files.sample_files,
)
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,13 @@ def _get_fastq_files_for_sample(self, case_id: str, sample_id: str) -> list[Samp
fastq_files: list[File] = self.hk_api.get_files_from_latest_version_containing_tags(
bundle_name=sample_id, tags=fastq_tags
)
sample_name: str = self.status_db.get_sample_by_internal_id(sample_id).name
return [
SampleFile(case_id=case_id, sample_id=sample_id, file_path=fastq_file.full_path)
SampleFile(
case_id=case_id,
sample_id=sample_id,
sample_name=sample_name,
file_path=fastq_file.full_path,
)
for fastq_file in fastq_files
]
2 changes: 2 additions & 0 deletions cg/services/file_delivery/fetch_file_service/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,14 @@ class DeliveryMetaData(BaseModel):

class CaseFile(BaseModel):
case_id: str
case_name: str
file_path: Path


class SampleFile(BaseModel):
case_id: str
sample_id: str
sample_name: str
file_path: Path


Expand Down
Empty file.
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
import os
from pathlib import Path
from cg.constants.delivery import INBOX_NAME
from cg.services.file_delivery.fetch_file_service.models import DeliveryFiles
from cg.services.file_delivery.file_formatter_service.utils.case_file_formatter import (
CaseFileFormatter,
)
from cg.services.file_delivery.file_formatter_service.delivery_file_formatting_service import (
DeliveryFileFormattingService,
)
from cg.services.file_delivery.file_formatter_service.models import FormattedFiles, FormattedFile
from cg.services.file_delivery.file_formatter_service.utils.sample_file_concatenation_formatter import (
SampleFileConcatenationFormatter,
)
from cg.services.file_delivery.file_formatter_service.utils.sample_file_formatter import (
SampleFileFormatter,
)


class DeliveryFileFormatter(DeliveryFileFormattingService):
"""
Format the files to be delivered in the generic format.
Expected structure:
<customer>/inbox/<ticket_id>/<case_name>/<case_files>
<customer>/inbox/<ticket_id>/<sample_name>/<sample_files>
"""

def __init__(
self,
case_file_formatter: CaseFileFormatter,
sample_file_formatter: SampleFileFormatter | SampleFileConcatenationFormatter,
):
self.case_file_formatter = case_file_formatter
self.sample_file_formatter = sample_file_formatter

def format_files(self, delivery_files: DeliveryFiles) -> FormattedFiles:
"""Format the files to be delivered and return the formatted files in the generic format."""
ticket_dir_path: Path = self.get_folder_under_inbox(
delivery_files.sample_files[0].file_path
)
self._create_ticket_dir(ticket_dir_path)
formatted_files: list[FormattedFile] = self._format_sample_and_case_files(
sample_files=delivery_files.sample_files,
case_files=delivery_files.case_files,
ticket_dir_path=ticket_dir_path,
)
return FormattedFiles(files=formatted_files)

def _format_sample_and_case_files(
self, sample_files, case_files, ticket_dir_path
ChrOertlin marked this conversation as resolved.
Show resolved Hide resolved
) -> list[FormattedFile]:
"""Helper method to format both sample and case files."""
formatted_files: list[FormattedFile] = self.sample_file_formatter.format_files(
moved_files=sample_files,
ticket_dir_path=ticket_dir_path,
)
if case_files:
formatted_case_files: list[FormattedFile] = self.case_file_formatter.format_files(
moved_files=case_files,
ticket_dir_path=ticket_dir_path,
)
formatted_files.extend(formatted_case_files)
return formatted_files

@staticmethod
def get_folder_under_inbox(file_path: Path) -> Path:
try:
inbox_index: int = file_path.parts.index(INBOX_NAME)
return Path(*file_path.parts[: inbox_index + 2])
except ValueError:
raise ValueError(f"Could not find the inbox directory in the path: {file_path}")

@staticmethod
def _create_ticket_dir(ticket_dir_path: Path) -> None:
os.makedirs(ticket_dir_path, exist_ok=True)
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
from abc import abstractmethod, ABC

from cg.services.file_delivery.fetch_file_service.models import DeliveryFiles
from cg.services.file_delivery.file_formatter_service.models import FormattedFiles
from cg.store.store import Store


class DeliveryFileFormattingService(ABC):
"""
Abstract class that encapsulates the logic required for formatting files to deliver.
"""

@abstractmethod
def format_files(self, delivery_files: DeliveryFiles) -> FormattedFiles:
"""Format the files to deliver."""
pass
11 changes: 11 additions & 0 deletions cg/services/file_delivery/file_formatter_service/models.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
from pathlib import Path
from pydantic import BaseModel


class FormattedFile(BaseModel):
original_path: Path
formatted_path: Path


class FormattedFiles(BaseModel):
files: list[FormattedFile]
Empty file.
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
import os
from pathlib import Path

from cg.services.file_delivery.fetch_file_service.models import CaseFile
from cg.services.file_delivery.file_formatter_service.models import FormattedFile


class CaseFileFormatter:

def format_files(
ChrOertlin marked this conversation as resolved.
Show resolved Hide resolved
self, moved_files: list[CaseFile], ticket_dir_path: Path
) -> list[FormattedFile]:
"""Format the case files to deliver and return the formatted files.."""
self._create_case_name_folder(
ticket_path=ticket_dir_path, case_name=moved_files[0].case_name
)
return self._format_case_files(moved_files)

def _format_case_files(self, case_files: list[CaseFile]) -> list[FormattedFile]:
formatted_files: list[FormattedFile] = self._get_formatted_files(case_files)
for formatted_file in formatted_files:
os.rename(src=formatted_file.original_path, dst=formatted_file.formatted_path)
return formatted_files

@staticmethod
def _create_case_name_folder(ticket_path: Path, case_name: str) -> None:
case_dir_path = Path(ticket_path, case_name)
case_dir_path.mkdir(exist_ok=True)

@staticmethod
def _get_formatted_files(case_files: list[CaseFile]) -> list[FormattedFile]:
"""
Returns formatted files:
1. Adds a folder with case name to the path of the case files.
2. Replaces case id by case name.
"""
formatted_files: list[FormattedFile] = []
for case_file in case_files:
ChrOertlin marked this conversation as resolved.
Show resolved Hide resolved
replaced_case_file_name: str = case_file.file_path.name.replace(
case_file.case_id, case_file.case_name
)
formatted_file_path = Path(
ChrOertlin marked this conversation as resolved.
Show resolved Hide resolved
case_file.file_path.parent, case_file.case_name, replaced_case_file_name
)
formatted_files.append(
FormattedFile(original_path=case_file.file_path, formatted_path=formatted_file_path)
)
return formatted_files
Loading
Loading