diff --git a/cg/apps/lims/api.py b/cg/apps/lims/api.py index 71a66b799e..33b6f3130d 100644 --- a/cg/apps/lims/api.py +++ b/cg/apps/lims/api.py @@ -555,3 +555,11 @@ def _get_negative_controls_from_list(samples: list[Sample]) -> list[Sample]: ): negative_controls.append(sample) return negative_controls + + def get_sample_region_and_lab_code(self, sample_id: str) -> str: + """Return the region code and lab code for a sample formatted as a prefix string.""" + region_code: str = self.get_sample_attribute(lims_id=sample_id, key="region_code").split( + " " + )[0] + lab_code: str = self.get_sample_attribute(lims_id=sample_id, key="lab_code").split(" ")[0] + return f"{region_code}_{lab_code}_" diff --git a/cg/cli/deliver/base.py b/cg/cli/deliver/base.py index 265fba2f8f..8762c8c555 100644 --- a/cg/cli/deliver/base.py +++ b/cg/cli/deliver/base.py @@ -15,7 +15,7 @@ from cg.services.deliver_files.deliver_files_service.deliver_files_service import ( DeliverFilesService, ) -from cg.services.deliver_files.deliver_files_service.deliver_files_service_factory import ( +from cg.services.deliver_files.factory import ( DeliveryServiceFactory, ) from cg.services.deliver_files.rsync.service import DeliveryRsyncService @@ -88,8 +88,7 @@ def deliver_case( LOG.error(f"Could not find case with id {case_id}") return delivery_service: DeliverFilesService = service_builder.build_delivery_service( - case=case, - delivery_type=delivery_type, + case=case, delivery_type=delivery_type ) delivery_service.deliver_files_for_case( case=case, delivery_base_path=Path(inbox), dry_run=dry_run @@ -124,8 +123,7 @@ def deliver_ticket( LOG.error(f"Could not find case connected to ticket {ticket}") return delivery_service: DeliverFilesService = service_builder.build_delivery_service( - case=cases[0], - delivery_type=delivery_type, + case=cases[0], delivery_type=delivery_type ) delivery_service.deliver_files_for_ticket( ticket_id=ticket, delivery_base_path=Path(inbox), dry_run=dry_run @@ -172,8 +170,7 @@ def deliver_sample_raw_data( LOG.error(f"Could not find case with id {case_id}") return delivery_service: DeliverFilesService = service_builder.build_delivery_service( - case=case, - delivery_type=delivery_type, + case=case, delivery_type=delivery_type ) delivery_service.deliver_files_for_sample( case=case, sample_id=sample_id, delivery_base_path=Path(inbox), dry_run=dry_run diff --git a/cg/cli/deliver/utils.py b/cg/cli/deliver/utils.py index 14e8255c51..f4b0040112 100644 --- a/cg/cli/deliver/utils.py +++ b/cg/cli/deliver/utils.py @@ -5,7 +5,7 @@ from cg.services.deliver_files.deliver_files_service.deliver_files_service import ( DeliverFilesService, ) -from cg.services.deliver_files.deliver_files_service.deliver_files_service_factory import ( +from cg.services.deliver_files.factory import ( DeliveryServiceFactory, ) from cg.store.models import Analysis, Case @@ -26,8 +26,7 @@ def deliver_raw_data_for_analyses( try: case: Case = analysis.case delivery_service: DeliverFilesService = service_builder.build_delivery_service( - case=case, - delivery_type=case.data_delivery, + case=case, delivery_type=case.data_delivery ) delivery_service.deliver_files_for_case( diff --git a/cg/cli/upload/base.py b/cg/cli/upload/base.py index 4bf46d6303..46ffa1ac0d 100644 --- a/cg/cli/upload/base.py +++ b/cg/cli/upload/base.py @@ -39,6 +39,7 @@ from cg.meta.upload.microsalt.microsalt_upload_api import MicrosaltUploadAPI from cg.meta.upload.mip.mip_dna import MipDNAUploadAPI from cg.meta.upload.mip.mip_rna import MipRNAUploadAPI +from cg.meta.upload.mutant.mutant import MutantUploadAPI from cg.meta.upload.nf_analysis import NfAnalysisUploadAPI from cg.meta.upload.tomte.tomte import TomteUploadAPI from cg.meta.upload.raredisease.raredisease import RarediseaseUploadAPI @@ -94,6 +95,8 @@ def upload(context: click.Context, case_id: str | None, restart: bool): Workflow.TAXPROFILER, }: upload_api = NfAnalysisUploadAPI(config_object, case.data_analysis) + elif case.data_analysis == Workflow.MUTANT: + upload_api = MutantUploadAPI(config_object) context.obj.meta_apis["upload_api"] = upload_api upload_api.upload(ctx=context, case=case, restart=restart) diff --git a/cg/cli/upload/fohm.py b/cg/cli/upload/fohm.py index 6571e0be8c..34caf6ba9f 100644 --- a/cg/cli/upload/fohm.py +++ b/cg/cli/upload/fohm.py @@ -41,7 +41,11 @@ def aggregate_delivery( context: CGConfig, cases: list, dry_run: bool = False, datestr: str | None = None ): """Re-aggregates delivery files for FOHM and saves them to default working directory.""" - fohm_api = FOHMUploadAPI(config=context, dry_run=dry_run, datestr=datestr) + fohm_api = FOHMUploadAPI( + config=context, + dry_run=dry_run, + datestr=datestr, + ) try: fohm_api.aggregate_delivery(cases) except (ValidationError, TypeError) as error: @@ -57,7 +61,11 @@ def create_komplettering( context: CGConfig, cases: list, dry_run: bool = False, datestr: str | None = None ): """Re-aggregates komplettering files for FOHM and saves them to default working directory.""" - fohm_api = FOHMUploadAPI(config=context, dry_run=dry_run, datestr=datestr) + fohm_api = FOHMUploadAPI( + config=context, + dry_run=dry_run, + datestr=datestr, + ) try: fohm_api.create_and_write_complementary_report(cases) except ValidationError as error: @@ -73,7 +81,11 @@ def preprocess_all( context: CGConfig, cases: list, dry_run: bool = False, datestr: str | None = None ): """Create all FOHM upload files, upload to GISAID, sync SFTP and mail reports for all provided cases.""" - fohm_api = FOHMUploadAPI(config=context, dry_run=dry_run, datestr=datestr) + fohm_api = FOHMUploadAPI( + config=context, + dry_run=dry_run, + datestr=datestr, + ) gisaid_api = GisaidAPI(config=context) cases = list(cases) upload_cases = [] @@ -105,7 +117,11 @@ def preprocess_all( @click.pass_obj def upload_rawdata(context: CGConfig, dry_run: bool = False, datestr: str | None = None): """Deliver files in daily upload directory via sftp.""" - fohm_api = FOHMUploadAPI(config=context, dry_run=dry_run, datestr=datestr) + fohm_api = FOHMUploadAPI( + config=context, + dry_run=dry_run, + datestr=datestr, + ) fohm_api.sync_files_sftp() @@ -115,5 +131,9 @@ def upload_rawdata(context: CGConfig, dry_run: bool = False, datestr: str | None @click.pass_obj def send_reports(context: CGConfig, dry_run: bool = False, datestr: str | None = None): """Send all komplettering reports found in the current daily directory to target recipients.""" - fohm_api = FOHMUploadAPI(config=context, dry_run=dry_run, datestr=datestr) + fohm_api = FOHMUploadAPI( + config=context, + dry_run=dry_run, + datestr=datestr, + ) fohm_api.send_mail_reports() diff --git a/cg/constants/delivery.py b/cg/constants/delivery.py index ec492f28f0..f914305e3e 100644 --- a/cg/constants/delivery.py +++ b/cg/constants/delivery.py @@ -144,7 +144,6 @@ ] MUTANT_ANALYSIS_SAMPLE_TAGS: list[set[str]] = [ - {"fastq"}, {"vcf", "vcf-report", "fohm-delivery"}, ] diff --git a/cg/constants/orderforms.py b/cg/constants/orderforms.py index b071904789..b02d088c3b 100644 --- a/cg/constants/orderforms.py +++ b/cg/constants/orderforms.py @@ -39,7 +39,7 @@ def get_current_orderform_version(order_form: str) -> str: Orderform.MIP_DNA: "33", Orderform.RML: "19", Orderform.MICROSALT: "11", - Orderform.SARS_COV_2: "9", + Orderform.SARS_COV_2: "10", Orderform.MICROBIAL_FASTQ: "1", Orderform.PACBIO_LONG_READ: "1", } diff --git a/cg/meta/upload/fohm/fohm.py b/cg/meta/upload/fohm/fohm.py index 8026debda8..f416777a48 100644 --- a/cg/meta/upload/fohm/fohm.py +++ b/cg/meta/upload/fohm/fohm.py @@ -3,22 +3,22 @@ import logging import os import re -import shutil from pathlib import Path - import paramiko -from housekeeper.store.models import Version - from cg.apps.housekeeper.hk import HousekeeperAPI from cg.apps.lims import LimsAPI from cg.constants import FileExtensions -from cg.constants.constants import SARS_COV_REGEX +from cg.constants.constants import SARS_COV_REGEX, DataDelivery from cg.constants.housekeeper_tags import FohmTag from cg.exc import CgError from cg.io.csv import read_csv, write_csv_from_dict from cg.models.cg_config import CGConfig from cg.models.email import EmailInfo from cg.models.fohm.reports import FohmComplementaryReport, FohmPangolinReport +from cg.services.deliver_files.constants import DeliveryDestination, DeliveryStructure +from cg.services.deliver_files.factory import ( + DeliveryServiceFactory, +) from cg.store.models import Case, Sample from cg.store.store import Store from cg.utils.dict import remove_duplicate_dicts @@ -28,7 +28,12 @@ class FOHMUploadAPI: - def __init__(self, config: CGConfig, dry_run: bool = False, datestr: str | None = None): + def __init__( + self, + config: CGConfig, + dry_run: bool = False, + datestr: str | None = None, + ): self.config: CGConfig = config self.housekeeper_api: HousekeeperAPI = config.housekeeper_api self.lims_api: LimsAPI = config.lims_api @@ -44,6 +49,7 @@ def __init__(self, config: CGConfig, dry_run: bool = False, datestr: str | None self._reports_dataframe = None self._pangolin_dataframe = None self._aggregation_dataframe = None + self._delivery_factory: DeliveryServiceFactory = config.delivery_service_factory @property def current_datestr(self) -> str: @@ -196,16 +202,16 @@ def link_sample_raw_data_files( sample: Sample = self.status_db.get_sample_by_internal_id( internal_id=report.internal_id ) - bundle_name: str = sample.links[0].case.internal_id - version: Version = self.housekeeper_api.last_version(bundle=bundle_name) - files = self.housekeeper_api.files(version=version.id, tags={report.internal_id}).all() - for file in files: - if self._dry_run: - LOG.info( - f"Would have copied {file.full_path} to {Path(self.daily_rawdata_path)}" - ) - continue - shutil.copy(file.full_path, Path(self.daily_rawdata_path)) + case: Case = sample.links[0].case + delivery_service = self._delivery_factory.build_delivery_service( + case=case, + delivery_type=DataDelivery.FASTQ_ANALYSIS, + delivery_destination=DeliveryDestination.FOHM, + delivery_structure=DeliveryStructure.FLAT, + ) + delivery_service.deliver_files_for_sample_no_rsync( + case=case, sample_id=sample.internal_id, delivery_base_path=self.daily_rawdata_path + ) def create_pangolin_report(self, reports: list[FohmPangolinReport]) -> None: LOG.info("Creating aggregate Pangolin report") @@ -362,9 +368,13 @@ def parse_and_write_pangolin_report(self) -> list[FohmPangolinReport]: self.create_pangolin_report(sars_cov_pangolin_reports) return sars_cov_pangolin_reports - def aggregate_delivery(self, cases: list[str]) -> None: - """Aggregate and hardlink reports.""" - self.set_cases_to_aggregate(cases) + def aggregate_delivery(self, case_ids: list[str]) -> None: + """ + Aggregate and hardlink reports. + args: + case_ids: The internal ids for cases to aggregate. + """ + self.set_cases_to_aggregate(case_ids) self.create_daily_delivery_folders() sars_cov_complementary_reports: list[FohmComplementaryReport] = ( self.parse_and_write_complementary_report() diff --git a/cg/meta/upload/mutant/mutant.py b/cg/meta/upload/mutant/mutant.py new file mode 100644 index 0000000000..a61398ebe1 --- /dev/null +++ b/cg/meta/upload/mutant/mutant.py @@ -0,0 +1,27 @@ +from click import Context + +from cg.meta.upload.fohm.fohm import FOHMUploadAPI +from cg.meta.upload.gisaid import GisaidAPI +from cg.meta.upload.upload_api import UploadAPI +from cg.meta.workflow.mutant import MutantAnalysisAPI +from cg.models.cg_config import CGConfig +from cg.store.models import Analysis, Case + + +class MutantUploadAPI(UploadAPI): + + def __init__(self, config: CGConfig): + self.analysis_api: MutantAnalysisAPI = MutantAnalysisAPI(config) + self.fohm_api = FOHMUploadAPI(config) + self.gsaid_api = GisaidAPI(config) + + super().__init__(config=config, analysis_api=self.analysis_api) + + def upload(self, ctx: Context, case: Case, restart: bool) -> None: + latest_analysis: Analysis = case.analyses[0] + self.update_upload_started_at(latest_analysis) + self.upload_files_to_customer_inbox(case) + self.gsaid_api.upload(case.internal_id) + self.fohm_api.aggregate_delivery(case_ids=[case.internal_id]) + self.fohm_api.sync_files_sftp() + self.update_uploaded_at(latest_analysis) diff --git a/cg/meta/upload/upload_api.py b/cg/meta/upload/upload_api.py index d455079f92..ce52c9d913 100644 --- a/cg/meta/upload/upload_api.py +++ b/cg/meta/upload/upload_api.py @@ -15,7 +15,7 @@ from cg.services.deliver_files.deliver_files_service.deliver_files_service import ( DeliverFilesService, ) -from cg.services.deliver_files.deliver_files_service.deliver_files_service_factory import ( +from cg.services.deliver_files.factory import ( DeliveryServiceFactory, ) from cg.store.models import Analysis, Case @@ -97,8 +97,7 @@ def upload_files_to_customer_inbox(self, case: Case) -> None: """Uploads the analysis files to the customer inbox.""" factory_service: DeliveryServiceFactory = self.config.delivery_service_factory delivery_service: DeliverFilesService = factory_service.build_delivery_service( - case=case, - delivery_type=case.data_delivery, + case=case, delivery_type=case.data_delivery ) delivery_service.deliver_files_for_case( case=case, delivery_base_path=Path(self.config.delivery_path) diff --git a/cg/models/cg_config.py b/cg/models/cg_config.py index 66673796a8..bab7e52a26 100644 --- a/cg/models/cg_config.py +++ b/cg/models/cg_config.py @@ -27,7 +27,7 @@ from cg.meta.delivery.delivery import DeliveryAPI from cg.services.analysis_service.analysis_service import AnalysisService from cg.services.decompression_service.decompressor import Decompressor -from cg.services.deliver_files.deliver_files_service.deliver_files_service_factory import ( +from cg.services.deliver_files.factory import ( DeliveryServiceFactory, ) from cg.services.deliver_files.rsync.models import RsyncDeliveryConfig @@ -748,6 +748,7 @@ def delivery_service_factory(self) -> DeliveryServiceFactory: LOG.debug("Instantiating delivery service factory") factory = DeliveryServiceFactory( store=self.status_db, + lims_api=self.lims_api, hk_api=self.housekeeper_api, tb_service=self.trailblazer_api, rsync_service=self.delivery_rsync_service, diff --git a/cg/services/deliver_files/constants.py b/cg/services/deliver_files/constants.py new file mode 100644 index 0000000000..b126b7cf09 --- /dev/null +++ b/cg/services/deliver_files/constants.py @@ -0,0 +1,23 @@ +from enum import Enum + + +class DeliveryDestination(Enum): + """Enum for the DeliveryDestination + BASE: Deliver to the base folder provided in the call + CUSTOMER: Deliver to the customer folder on hasta + FOHM: Deliver to the FOHM folder on hasta + """ + + BASE = "base" + CUSTOMER = "customer" + FOHM = "fohm" + + +class DeliveryStructure(Enum): + """Enum for the DeliveryStructure + FLAT: Deliver the files in a flat structure, i.e. all files in the same folder + NESTED: Deliver the files in a nested structure, i.e. files in folders for each sample/case + """ + + FLAT: str = "flat" + NESTED: str = "nested" diff --git a/cg/services/deliver_files/deliver_files_service/deliver_files_service.py b/cg/services/deliver_files/deliver_files_service/deliver_files_service.py index bc2f8a7ddd..46cfe38a65 100644 --- a/cg/services/deliver_files/deliver_files_service/deliver_files_service.py +++ b/cg/services/deliver_files/deliver_files_service/deliver_files_service.py @@ -12,10 +12,11 @@ ) from cg.services.deliver_files.file_fetcher.abstract import FetchDeliveryFilesService from cg.services.deliver_files.file_fetcher.models import DeliveryFiles -from cg.services.deliver_files.file_filter.abstract import FilterDeliveryFilesService -from cg.services.deliver_files.file_formatter.abstract import DeliveryFileFormattingService -from cg.services.deliver_files.file_formatter.models import FormattedFiles -from cg.services.deliver_files.file_mover.service import DeliveryFilesMover +from cg.services.deliver_files.file_formatter.destination.abstract import ( + DeliveryDestinationFormatter, +) +from cg.services.deliver_files.file_formatter.destination.models import FormattedFiles +from cg.services.deliver_files.file_mover.abstract import DestinationFilesMover from cg.services.deliver_files.rsync.service import DeliveryRsyncService from cg.store.exc import EntryNotFoundError from cg.store.models import Case @@ -26,27 +27,28 @@ class DeliverFilesService: """ - Deliver files to the customer inbox on the HPC and Rsync them to the inbox folder on the delivery server. - 1. Get the files to deliver from Housekeeper based on workflow and data delivery - 2. Create a delivery folder structure in the customer folder on Hasta and move the files there - 3. Reformatting of output / renaming of files - 4. Rsync the files to the customer inbox on the delivery server - 5. Add the rsync job to Trailblazer + Deliver files for a case, cases in a ticket or a sample to a specified destination or upload location. + Requires: + - FetchDeliveryFilesService: Service to fetch the files to deliver from housekeeper + - DestinationFilesMover: Service to move the files to the destination of delivery or upload + - DeliveryDestinationFormatter: Service to format the files to the destination format + - DeliveryRsyncService: Service to run rsync for the delivery + - TrailblazerAPI: Service to interact with Trailblazer + - AnalysisService: Service to interact with the analysis + - Store: Store to interact with the database """ def __init__( self, delivery_file_manager_service: FetchDeliveryFilesService, - file_filter: FilterDeliveryFilesService, - move_file_service: DeliveryFilesMover, - file_formatter_service: DeliveryFileFormattingService, + move_file_service: DestinationFilesMover, + file_formatter_service: DeliveryDestinationFormatter, rsync_service: DeliveryRsyncService, tb_service: TrailblazerAPI, analysis_service: AnalysisService, status_db: Store, ): self.file_manager = delivery_file_manager_service - self.file_filter = file_filter self.file_mover = move_file_service self.file_formatter = file_formatter_service self.status_db = status_db @@ -58,14 +60,22 @@ def __init__( def deliver_files_for_case( self, case: Case, delivery_base_path: Path, dry_run: bool = False ) -> None: - """Deliver the files for a case to the customer folder.""" + """Deliver the files for a case to the customer folder. + args: + case: The case to deliver files for + delivery_base_path: The base path to deliver the files to + dry_run: Whether to perform a dry run or not + """ delivery_files: DeliveryFiles = self.file_manager.get_files_to_deliver( case_id=case.internal_id ) moved_files: DeliveryFiles = self.file_mover.move_files( delivery_files=delivery_files, delivery_base_path=delivery_base_path ) - formatted_files: FormattedFiles = self.file_formatter.format_files(moved_files) + formatted_files: FormattedFiles = self.file_formatter.format_files( + delivery_files=moved_files + ) + folders_to_deliver: set[Path] = set( [formatted_file.formatted_path.parent for formatted_file in formatted_files.files] ) @@ -77,7 +87,12 @@ def deliver_files_for_case( def deliver_files_for_ticket( self, ticket_id: str, delivery_base_path: Path, dry_run: bool = False ) -> None: - """Deliver the files for all cases in a ticket to the customer folder.""" + """Deliver the files for all cases in a ticket to the customer folder. + args: + ticket_id: The ticket id to deliver files for + delivery_base_path: The base path to deliver the files to + dry_run: Whether to perform a dry run or not + """ cases: list[Case] = self.status_db.get_cases_by_ticket_id(ticket_id) if not cases: raise EntryNotFoundError(f"No cases found for ticket {ticket_id}") @@ -91,15 +106,14 @@ def deliver_files_for_sample( ): """Deliver the files for a sample to the customer folder.""" delivery_files: DeliveryFiles = self.file_manager.get_files_to_deliver( - case_id=case.internal_id - ) - filtered_files: DeliveryFiles = self.file_filter.filter_delivery_files( - delivery_files=delivery_files, sample_id=sample_id + case_id=case.internal_id, sample_id=sample_id ) moved_files: DeliveryFiles = self.file_mover.move_files( - delivery_files=filtered_files, delivery_base_path=delivery_base_path + delivery_files=delivery_files, delivery_base_path=delivery_base_path + ) + formatted_files: FormattedFiles = self.file_formatter.format_files( + delivery_files=moved_files ) - formatted_files: FormattedFiles = self.file_formatter.format_files(moved_files) folders_to_deliver: set[Path] = set( [formatted_file.formatted_path.parent for formatted_file in formatted_files.files] ) @@ -108,7 +122,31 @@ def deliver_files_for_sample( ) self._add_trailblazer_tracking(case=case, job_id=job_id, dry_run=dry_run) + def deliver_files_for_sample_no_rsync( + self, case: Case, sample_id: str, delivery_base_path: Path + ): + """ + Deliver the files for a sample to the delivery base path. Does not perform rsync. + args: + case: The case to deliver files for + sample_id: The sample to deliver files for + delivery_base_path: The base path to deliver the files to + """ + delivery_files: DeliveryFiles = self.file_manager.get_files_to_deliver( + case_id=case.internal_id, sample_id=sample_id + ) + moved_files: DeliveryFiles = self.file_mover.move_files( + delivery_files=delivery_files, delivery_base_path=delivery_base_path + ) + self.file_formatter.format_files(delivery_files=moved_files) + def _start_rsync_job(self, case: Case, dry_run: bool, folders_to_deliver: set[Path]) -> int: + """Start a rsync job for the case. + args: + case: The case to start the rsync job for + dry_run: Whether to perform a dry run or not + folders_to_deliver: The folders to deliver + """ LOG.debug(f"[RSYNC] Starting rsync job for case {case.internal_id}") job_id: int = self.rsync_service.run_rsync_for_case( case=case, @@ -123,6 +161,12 @@ def _start_rsync_job(self, case: Case, dry_run: bool, folders_to_deliver: set[Pa return job_id def _add_trailblazer_tracking(self, case: Case, job_id: int, dry_run: bool) -> None: + """Add the rsync job to Trailblazer for tracking. + args: + case: The case to add the job for + job_id: The job id to add for trailblazer tracking + dry_run: Whether to perform a dry run or not + """ if dry_run: LOG.info(f"Would have added the analysis for case {case.internal_id} to Trailblazer") else: diff --git a/cg/services/deliver_files/deliver_files_service/deliver_files_service_factory.py b/cg/services/deliver_files/deliver_files_service/deliver_files_service_factory.py deleted file mode 100644 index 45854fce29..0000000000 --- a/cg/services/deliver_files/deliver_files_service/deliver_files_service_factory.py +++ /dev/null @@ -1,175 +0,0 @@ -"""Module for the factory of the deliver files service.""" - -from typing import Type - -from cg.apps.housekeeper.hk import HousekeeperAPI -from cg.apps.tb import TrailblazerAPI -from cg.constants import DataDelivery, Workflow -from cg.constants.sequencing import SeqLibraryPrepCategory -from cg.services.analysis_service.analysis_service import AnalysisService -from cg.services.deliver_files.deliver_files_service.deliver_files_service import ( - DeliverFilesService, -) -from cg.services.deliver_files.deliver_files_service.exc import DeliveryTypeNotSupported -from cg.services.deliver_files.file_fetcher.abstract import FetchDeliveryFilesService -from cg.services.deliver_files.file_fetcher.analysis_raw_data_service import ( - RawDataAndAnalysisDeliveryFileFetcher, -) -from cg.services.deliver_files.file_fetcher.analysis_service import AnalysisDeliveryFileFetcher -from cg.services.deliver_files.file_fetcher.raw_data_service import RawDataDeliveryFileFetcher -from cg.services.deliver_files.file_filter.sample_service import SampleFileFilter -from cg.services.deliver_files.file_formatter.abstract import DeliveryFileFormattingService -from cg.services.deliver_files.file_formatter.service import DeliveryFileFormatter -from cg.services.deliver_files.file_formatter.utils.case_service import CaseFileFormatter -from cg.services.deliver_files.file_formatter.utils.sample_concatenation_service import ( - SampleFileConcatenationFormatter, -) -from cg.services.deliver_files.file_formatter.utils.sample_service import ( - SampleFileFormatter, - FileManagingService, - SampleFileNameFormatter, -) -from cg.services.deliver_files.file_mover.service import DeliveryFilesMover -from cg.services.deliver_files.rsync.service import DeliveryRsyncService -from cg.services.deliver_files.tag_fetcher.abstract import FetchDeliveryFileTagsService -from cg.services.deliver_files.tag_fetcher.bam_service import BamDeliveryTagsFetcher -from cg.services.deliver_files.tag_fetcher.sample_and_case_service import ( - SampleAndCaseDeliveryTagsFetcher, -) -from cg.services.fastq_concatenation_service.fastq_concatenation_service import ( - FastqConcatenationService, -) -from cg.store.models import Case -from cg.store.store import Store - - -class DeliveryServiceFactory: - """Class to build the delivery services based on workflow and delivery type.""" - - def __init__( - self, - store: Store, - hk_api: HousekeeperAPI, - rsync_service: DeliveryRsyncService, - tb_service: TrailblazerAPI, - analysis_service: AnalysisService, - ): - self.store = store - self.hk_api = hk_api - self.rsync_service = rsync_service - self.tb_service = tb_service - self.analysis_service = analysis_service - - @staticmethod - def _sanitise_delivery_type(delivery_type: DataDelivery) -> DataDelivery: - """Sanitise the delivery type.""" - if delivery_type in [DataDelivery.FASTQ_QC, DataDelivery.FASTQ_SCOUT]: - return DataDelivery.FASTQ - if delivery_type in [DataDelivery.ANALYSIS_SCOUT]: - return DataDelivery.ANALYSIS_FILES - if delivery_type in [ - DataDelivery.FASTQ_ANALYSIS_SCOUT, - DataDelivery.FASTQ_QC_ANALYSIS, - ]: - return DataDelivery.FASTQ_ANALYSIS - return delivery_type - - @staticmethod - def _validate_delivery_type(delivery_type: DataDelivery): - """Check if the delivery type is supported. Raises DeliveryTypeNotSupported error.""" - if delivery_type in [ - DataDelivery.FASTQ, - DataDelivery.ANALYSIS_FILES, - DataDelivery.FASTQ_ANALYSIS, - DataDelivery.BAM, - ]: - return - raise DeliveryTypeNotSupported( - f"Delivery type {delivery_type} is not supported. Supported delivery types are" - f" {DataDelivery.FASTQ}, {DataDelivery.ANALYSIS_FILES}," - f" {DataDelivery.FASTQ_ANALYSIS}, {DataDelivery.BAM}." - ) - - @staticmethod - def _get_file_tag_fetcher(delivery_type: DataDelivery) -> FetchDeliveryFileTagsService: - """Get the file tag fetcher based on the delivery type.""" - service_map: dict[DataDelivery, Type[FetchDeliveryFileTagsService]] = { - DataDelivery.FASTQ: SampleAndCaseDeliveryTagsFetcher, - DataDelivery.ANALYSIS_FILES: SampleAndCaseDeliveryTagsFetcher, - DataDelivery.FASTQ_ANALYSIS: SampleAndCaseDeliveryTagsFetcher, - DataDelivery.BAM: BamDeliveryTagsFetcher, - } - return service_map[delivery_type]() - - def _get_file_fetcher(self, delivery_type: DataDelivery) -> FetchDeliveryFilesService: - """Get the file fetcher based on the delivery type.""" - service_map: dict[DataDelivery, Type[FetchDeliveryFilesService]] = { - DataDelivery.FASTQ: RawDataDeliveryFileFetcher, - DataDelivery.ANALYSIS_FILES: AnalysisDeliveryFileFetcher, - DataDelivery.FASTQ_ANALYSIS: RawDataAndAnalysisDeliveryFileFetcher, - DataDelivery.BAM: RawDataDeliveryFileFetcher, - } - file_tag_fetcher: FetchDeliveryFileTagsService = self._get_file_tag_fetcher(delivery_type) - return service_map[delivery_type]( - status_db=self.store, - hk_api=self.hk_api, - tags_fetcher=file_tag_fetcher, - ) - - def _convert_workflow(self, case: Case) -> Workflow: - """Converts a workflow with the introduction of the microbial-fastq delivery type an - unsupported combination of delivery type and workflow setup is required. This function - makes sure that a raw data workflow with microbial fastq delivery type is treated as a - microsalt workflow so that the microbial-fastq sample files can be concatenated.""" - tag: str = case.samples[0].application_version.application.tag - microbial_tags: list[str] = [ - application.tag - for application in self.store.get_active_applications_by_prep_category( - prep_category=SeqLibraryPrepCategory.MICROBIAL - ) - ] - if case.data_analysis == Workflow.RAW_DATA and tag in microbial_tags: - return Workflow.MICROSALT - return case.data_analysis - - def _get_sample_file_formatter( - self, - case: Case, - ) -> SampleFileFormatter | SampleFileConcatenationFormatter: - """Get the file formatter service based on the workflow.""" - converted_workflow: Workflow = self._convert_workflow(case) - if converted_workflow in [Workflow.MICROSALT]: - return SampleFileConcatenationFormatter( - file_manager=FileManagingService(), - file_formatter=SampleFileNameFormatter(), - concatenation_service=FastqConcatenationService(), - ) - return SampleFileFormatter( - file_manager=FileManagingService(), file_name_formatter=SampleFileNameFormatter() - ) - - def build_delivery_service( - self, case: Case, delivery_type: DataDelivery | None = None - ) -> DeliverFilesService: - """Build a delivery service based on a case.""" - delivery_type: DataDelivery = self._sanitise_delivery_type( - delivery_type if delivery_type else case.data_delivery - ) - self._validate_delivery_type(delivery_type) - file_fetcher: FetchDeliveryFilesService = self._get_file_fetcher(delivery_type) - sample_file_formatter: SampleFileFormatter | SampleFileConcatenationFormatter = ( - self._get_sample_file_formatter(case) - ) - file_formatter: DeliveryFileFormattingService = DeliveryFileFormatter( - case_file_formatter=CaseFileFormatter(), sample_file_formatter=sample_file_formatter - ) - return DeliverFilesService( - delivery_file_manager_service=file_fetcher, - move_file_service=DeliveryFilesMover(), - file_filter=SampleFileFilter(), - file_formatter_service=file_formatter, - status_db=self.store, - rsync_service=self.rsync_service, - tb_service=self.tb_service, - analysis_service=self.analysis_service, - ) diff --git a/cg/services/deliver_files/factory.py b/cg/services/deliver_files/factory.py new file mode 100644 index 0000000000..1701cba5fb --- /dev/null +++ b/cg/services/deliver_files/factory.py @@ -0,0 +1,323 @@ +"""Module for the factory of the deliver files service.""" + +from typing import Type + +from cg.apps.housekeeper.hk import HousekeeperAPI +from cg.apps.lims import LimsAPI +from cg.apps.tb import TrailblazerAPI +from cg.constants import DataDelivery, Workflow +from cg.constants.sequencing import SeqLibraryPrepCategory +from cg.services.analysis_service.analysis_service import AnalysisService +from cg.services.deliver_files.constants import DeliveryDestination, DeliveryStructure +from cg.services.deliver_files.deliver_files_service.deliver_files_service import ( + DeliverFilesService, +) +from cg.services.deliver_files.deliver_files_service.exc import DeliveryTypeNotSupported +from cg.services.deliver_files.file_fetcher.abstract import FetchDeliveryFilesService +from cg.services.deliver_files.file_fetcher.analysis_raw_data_service import ( + RawDataAndAnalysisDeliveryFileFetcher, +) +from cg.services.deliver_files.file_fetcher.analysis_service import AnalysisDeliveryFileFetcher +from cg.services.deliver_files.file_fetcher.raw_data_service import RawDataDeliveryFileFetcher +from cg.services.deliver_files.file_formatter.destination.abstract import ( + DeliveryDestinationFormatter, +) +from cg.services.deliver_files.file_formatter.destination.base_service import ( + BaseDeliveryFormatter, +) +from cg.services.deliver_files.file_formatter.files.case_service import CaseFileFormatter +from cg.services.deliver_files.file_formatter.files.mutant_service import ( + MutantFileFormatter, +) +from cg.services.deliver_files.file_formatter.files.concatenation_service import ( + SampleFileConcatenationFormatter, +) +from cg.services.deliver_files.file_formatter.files.sample_service import ( + SampleFileFormatter, + FileManager, +) +from cg.services.deliver_files.file_formatter.path_name.abstract import PathNameFormatter +from cg.services.deliver_files.file_formatter.path_name.flat_structure import ( + FlatStructurePathFormatter, +) +from cg.services.deliver_files.file_formatter.path_name.nested_structure import ( + NestedStructurePathFormatter, +) +from cg.services.deliver_files.file_mover.abstract import DestinationFilesMover +from cg.services.deliver_files.file_mover.customer_inbox_service import ( + CustomerInboxDestinationFilesMover, +) +from cg.services.deliver_files.file_mover.base_service import BaseDestinationFilesMover +from cg.services.deliver_files.rsync.service import DeliveryRsyncService +from cg.services.deliver_files.tag_fetcher.abstract import FetchDeliveryFileTagsService +from cg.services.deliver_files.tag_fetcher.bam_service import BamDeliveryTagsFetcher +from cg.services.deliver_files.tag_fetcher.fohm_upload_service import FOHMUploadTagsFetcher +from cg.services.deliver_files.tag_fetcher.sample_and_case_service import ( + SampleAndCaseDeliveryTagsFetcher, +) +from cg.services.deliver_files.utils import FileMover +from cg.services.fastq_concatenation_service.fastq_concatenation_service import ( + FastqConcatenationService, +) +from cg.store.models import Case +from cg.store.store import Store + + +class DeliveryServiceFactory: + """ + Class to build the delivery services based on case, workflow, delivery type, delivery destination and delivery structure. + The delivery destination is used to specify delivery to the customer or for external upload. + Workflow is used to specify the workflow of the case and is required for the tag fetcher. + Delivery type is used to specify the type of delivery to perform. + Delivery structure is used to specify the structure of the delivery. + """ + + def __init__( + self, + store: Store, + lims_api: LimsAPI, + hk_api: HousekeeperAPI, + rsync_service: DeliveryRsyncService, + tb_service: TrailblazerAPI, + analysis_service: AnalysisService, + ): + self.store = store + self.lims_api = lims_api + self.hk_api = hk_api + self.rsync_service = rsync_service + self.tb_service = tb_service + self.analysis_service = analysis_service + + @staticmethod + def _sanitise_delivery_type(delivery_type: DataDelivery) -> DataDelivery: + """Sanitise the delivery type. + We have multiple delivery types that are a combination of other delivery types or uploads. + Here we make sure to convert unsupported delivery types to supported ones. + args: + delivery_type: The type of delivery to perform. + """ + if delivery_type in [DataDelivery.FASTQ_QC, DataDelivery.FASTQ_SCOUT]: + return DataDelivery.FASTQ + if delivery_type in [DataDelivery.ANALYSIS_SCOUT]: + return DataDelivery.ANALYSIS_FILES + if delivery_type in [ + DataDelivery.FASTQ_ANALYSIS_SCOUT, + DataDelivery.FASTQ_QC_ANALYSIS, + ]: + return DataDelivery.FASTQ_ANALYSIS + return delivery_type + + @staticmethod + def _validate_delivery_type(delivery_type: DataDelivery): + """ + Check if the delivery type is supported. Raises DeliveryTypeNotSupported error. + args: + delivery_type: The type of delivery to perform. + """ + if delivery_type in [ + DataDelivery.FASTQ, + DataDelivery.ANALYSIS_FILES, + DataDelivery.FASTQ_ANALYSIS, + DataDelivery.BAM, + ]: + return + raise DeliveryTypeNotSupported( + f"Delivery type {delivery_type} is not supported. Supported delivery types are" + f" {DataDelivery.FASTQ}, {DataDelivery.ANALYSIS_FILES}," + f" {DataDelivery.FASTQ_ANALYSIS}, {DataDelivery.BAM}." + ) + + @staticmethod + def _get_file_tag_fetcher( + delivery_type: DataDelivery, delivery_destination: DeliveryDestination + ) -> FetchDeliveryFileTagsService: + """ + Get the file tag fetcher based on the delivery type or delivery destination. + NOTE: added complexity to handle the FOHM delivery type as it requires a special set of tags as compared to customer delivery. + It overrides the default behaviour of the delivery type given by the case. + args: + delivery_type: The type of delivery to perform. + delivery_destination: The destination of the delivery defaults to customer. + + """ + if delivery_destination == DeliveryDestination.FOHM: + return FOHMUploadTagsFetcher() + service_map: dict[DataDelivery, Type[FetchDeliveryFileTagsService]] = { + DataDelivery.FASTQ: SampleAndCaseDeliveryTagsFetcher, + DataDelivery.ANALYSIS_FILES: SampleAndCaseDeliveryTagsFetcher, + DataDelivery.FASTQ_ANALYSIS: SampleAndCaseDeliveryTagsFetcher, + DataDelivery.BAM: BamDeliveryTagsFetcher, + } + return service_map[delivery_type]() + + def _get_file_fetcher( + self, delivery_type: DataDelivery, delivery_destination: DeliveryDestination + ) -> FetchDeliveryFilesService: + """Get the file fetcher based on the delivery type. + args: + delivery_type: The type of delivery to perform. + delivery_destination: The destination of the delivery defaults to customer. See DeliveryDestination enum for explanation. + + """ + service_map: dict[DataDelivery, Type[FetchDeliveryFilesService]] = { + DataDelivery.FASTQ: RawDataDeliveryFileFetcher, + DataDelivery.ANALYSIS_FILES: AnalysisDeliveryFileFetcher, + DataDelivery.FASTQ_ANALYSIS: RawDataAndAnalysisDeliveryFileFetcher, + DataDelivery.BAM: RawDataDeliveryFileFetcher, + } + file_tag_fetcher: FetchDeliveryFileTagsService = self._get_file_tag_fetcher( + delivery_type=delivery_type, delivery_destination=delivery_destination + ) + return service_map[delivery_type]( + status_db=self.store, + hk_api=self.hk_api, + tags_fetcher=file_tag_fetcher, + ) + + def _convert_workflow(self, case: Case) -> Workflow: + """Change the workflow of a Microbial Fastq case to Microsalt to allow the concatenation of fastq files. + With the introduction of the microbial-fastq delivery type, an unsupported combination of delivery type and + workflow setup is required. This function makes sure that a raw data workflow with microbial fastq delivery + type is treated as a microsalt workflow so that the microbial-fastq sample files can be concatenated. + args: + case: The case to convert the workflow for + """ + tag: str = case.samples[0].application_version.application.tag + microbial_tags: list[str] = [ + application.tag + for application in self.store.get_active_applications_by_prep_category( + prep_category=SeqLibraryPrepCategory.MICROBIAL + ) + ] + if case.data_analysis == Workflow.RAW_DATA and tag in microbial_tags: + return Workflow.MICROSALT + return case.data_analysis + + def _get_sample_file_formatter( + self, + case: Case, + delivery_structure: DeliveryStructure = DeliveryStructure.NESTED, + ) -> SampleFileFormatter | SampleFileConcatenationFormatter | MutantFileFormatter: + """Get the file formatter service based on the workflow. + Depending on the delivery structure the path name formatter will be different. + Args: + case: The case to deliver files for. + delivery_structure: The structure of the delivery. See DeliveryStructure enum for explanation. Defaults to nested. + """ + + converted_workflow: Workflow = self._convert_workflow(case) + if converted_workflow in [Workflow.MICROSALT]: + return SampleFileConcatenationFormatter( + file_manager=FileManager(), + path_name_formatter=self._get_path_name_formatter(delivery_structure), + concatenation_service=FastqConcatenationService(), + ) + if converted_workflow == Workflow.MUTANT: + return MutantFileFormatter( + lims_api=self.lims_api, + file_manager=FileManager(), + file_formatter=SampleFileConcatenationFormatter( + file_manager=FileManager(), + path_name_formatter=self._get_path_name_formatter(delivery_structure), + concatenation_service=FastqConcatenationService(), + ), + ) + return SampleFileFormatter( + file_manager=FileManager(), + path_name_formatter=self._get_path_name_formatter(delivery_structure), + ) + + def _get_case_file_formatter(self, delivery_structure: DeliveryStructure) -> CaseFileFormatter: + """ + Get the case file formatter based on the delivery structure. + args: + delivery_structure: The structure of the delivery. See DeliveryStructure enum for explanation. + """ + return CaseFileFormatter( + file_manager=FileManager(), + path_name_formatter=self._get_path_name_formatter(delivery_structure), + ) + + @staticmethod + def _get_path_name_formatter( + delivery_structure: DeliveryStructure, + ) -> PathNameFormatter: + """ + Get the path name formatter based on the delivery destination + args: + delivery_structure: The structure of the delivery. See DeliveryStructure enum for explanation. + """ + if delivery_structure == DeliveryStructure.FLAT: + return FlatStructurePathFormatter() + return NestedStructurePathFormatter() + + @staticmethod + def _get_file_mover( + delivery_destination: DeliveryDestination, + ) -> CustomerInboxDestinationFilesMover | BaseDestinationFilesMover: + """Get the file mover based on the delivery type. + args: + delivery_destination: The destination of the delivery. See DeliveryDestination enum for explanation. + """ + if delivery_destination in [DeliveryDestination.BASE, DeliveryDestination.FOHM]: + return BaseDestinationFilesMover(FileMover(FileManager())) + return CustomerInboxDestinationFilesMover(FileMover(FileManager())) + + def _get_file_formatter( + self, + delivery_structure: DeliveryStructure, + case: Case, + ) -> DeliveryDestinationFormatter: + """ + Get the file formatter service based on the delivery destination. + args: + delivery_structure: The structure of the delivery. See DeliveryStructure enum for explanation. + case: The case to deliver files for. + """ + sample_file_formatter: ( + SampleFileFormatter | SampleFileConcatenationFormatter | MutantFileFormatter + ) = self._get_sample_file_formatter(case=case, delivery_structure=delivery_structure) + case_file_formatter: CaseFileFormatter = self._get_case_file_formatter( + delivery_structure=delivery_structure + ) + return BaseDeliveryFormatter( + case_file_formatter=case_file_formatter, + sample_file_formatter=sample_file_formatter, + ) + + def build_delivery_service( + self, + case: Case, + delivery_type: DataDelivery | None = None, + delivery_destination: DeliveryDestination = DeliveryDestination.CUSTOMER, + delivery_structure: DeliveryStructure = DeliveryStructure.NESTED, + ) -> DeliverFilesService: + """Build a delivery service based on a case. + args: + case: The case to deliver files for. + delivery_type: The type of data delivery to perform. See DataDelivery enum for explanation. + delivery_destination: The destination of the delivery defaults to customer. See DeliveryDestination enum for explanation. + delivery_structure: The structure of the delivery defaults to nested. See DeliveryStructure enum for explanation. + """ + delivery_type: DataDelivery = self._sanitise_delivery_type( + delivery_type if delivery_type else case.data_delivery + ) + self._validate_delivery_type(delivery_type) + file_fetcher: FetchDeliveryFilesService = self._get_file_fetcher( + delivery_type=delivery_type, delivery_destination=delivery_destination + ) + file_move_service: DestinationFilesMover = self._get_file_mover( + delivery_destination=delivery_destination + ) + file_formatter: DeliveryDestinationFormatter = self._get_file_formatter( + case=case, delivery_structure=delivery_structure + ) + return DeliverFilesService( + delivery_file_manager_service=file_fetcher, + move_file_service=file_move_service, + file_formatter_service=file_formatter, + status_db=self.store, + rsync_service=self.rsync_service, + tb_service=self.tb_service, + analysis_service=self.analysis_service, + ) diff --git a/cg/services/deliver_files/file_fetcher/abstract.py b/cg/services/deliver_files/file_fetcher/abstract.py index 95c5d78a98..65fe83e6d5 100644 --- a/cg/services/deliver_files/file_fetcher/abstract.py +++ b/cg/services/deliver_files/file_fetcher/abstract.py @@ -29,6 +29,6 @@ def __init__( self.tags_fetcher = tags_fetcher @abstractmethod - def get_files_to_deliver(self, case_id: str) -> DeliveryFiles: + def get_files_to_deliver(self, case_id: str, sample_id: str | None) -> DeliveryFiles: """Get the files to deliver.""" pass diff --git a/cg/services/deliver_files/file_fetcher/analysis_raw_data_service.py b/cg/services/deliver_files/file_fetcher/analysis_raw_data_service.py index 4eafa0210d..daceca0fb4 100644 --- a/cg/services/deliver_files/file_fetcher/analysis_raw_data_service.py +++ b/cg/services/deliver_files/file_fetcher/analysis_raw_data_service.py @@ -28,13 +28,19 @@ def __init__( self.hk_api = hk_api self.tags_fetcher = tags_fetcher - def get_files_to_deliver(self, case_id: str) -> DeliveryFiles: + def get_files_to_deliver(self, case_id: str, sample_id: str | None = None) -> DeliveryFiles: + """ + Get files to deliver for a case or sample for both analysis and raw data. + args: + case_id: The case id to deliver files for + sample_id: The sample id to deliver files for + """ case: Case = self.status_db.get_case_by_internal_id(internal_id=case_id) fastq_files: DeliveryFiles = self._fetch_files( - service_class=RawDataDeliveryFileFetcher, case_id=case_id + service_class=RawDataDeliveryFileFetcher, case_id=case_id, sample_id=sample_id ) analysis_files: DeliveryFiles = self._fetch_files( - service_class=AnalysisDeliveryFileFetcher, case_id=case_id + service_class=AnalysisDeliveryFileFetcher, case_id=case_id, sample_id=sample_id ) delivery_data = DeliveryMetaData( case_id=case.internal_id, @@ -48,7 +54,15 @@ def get_files_to_deliver(self, case_id: str) -> DeliveryFiles: sample_files=analysis_files.sample_files + fastq_files.sample_files, ) - def _fetch_files(self, service_class: type, case_id: str) -> DeliveryFiles: - """Fetch files using the provided service class.""" + def _fetch_files( + self, service_class: type, case_id: str, sample_id: str | None + ) -> DeliveryFiles: + """Fetch files using the provided service class. + Wrapper to fetch files using the provided service class. This is either the RawDataDeliveryFileFetcher or the AnalysisDeliveryFileFetcher. + args: + service_class: The service class to use to fetch the files + case_id: The case id to fetch files for + sample_id: The sample id to fetch files for + """ service = service_class(self.status_db, self.hk_api, tags_fetcher=self.tags_fetcher) - return service.get_files_to_deliver(case_id) + return service.get_files_to_deliver(case_id=case_id, sample_id=sample_id) diff --git a/cg/services/deliver_files/file_fetcher/analysis_service.py b/cg/services/deliver_files/file_fetcher/analysis_service.py index ed0072cb2b..43e0b2f920 100644 --- a/cg/services/deliver_files/file_fetcher/analysis_service.py +++ b/cg/services/deliver_files/file_fetcher/analysis_service.py @@ -38,12 +38,23 @@ def __init__( self.hk_api = hk_api self.tags_fetcher = tags_fetcher - def get_files_to_deliver(self, case_id: str) -> DeliveryFiles: - """Return a list of analysis files to be delivered for a case.""" - LOG.debug(f"[FETCH SERVICE] Fetching analysis files for case: {case_id}") + def get_files_to_deliver(self, case_id: str, sample_id: str | None = None) -> DeliveryFiles: + """Return a list of analysis files to be delivered for a case. + args: + case_id: The case id to deliver files for + sample_id: The sample id to deliver files for + """ + LOG.debug( + f"[FETCH SERVICE] Fetching analysis files for case: {case_id}, sample: {sample_id}" + ) case: Case = self.status_db.get_case_by_internal_id(internal_id=case_id) - analysis_case_files: list[CaseFile] = self._get_analysis_case_delivery_files(case) - analysis_sample_files: list[SampleFile] = self._get_analysis_sample_delivery_files(case) + analysis_case_files: list[CaseFile] = self._get_analysis_case_delivery_files( + case=case, sample_id=sample_id + ) + + analysis_sample_files: list[SampleFile] = self._get_analysis_sample_delivery_files( + case=case, sample_id=sample_id + ) delivery_data = DeliveryMetaData( case_id=case.internal_id, customer_internal_id=case.customer.internal_id, @@ -60,7 +71,13 @@ def get_files_to_deliver(self, case_id: str) -> DeliveryFiles: @staticmethod def _validate_delivery_has_content(delivery_files: DeliveryFiles) -> DeliveryFiles: - """Check if the delivery files has files to deliver.""" + """ + Check if the delivery files has files to deliver. + raises: + NoDeliveryFilesError if no files to deliver. + args: + delivery_files: The delivery files to check + """ if delivery_files.case_files or delivery_files.sample_files: return delivery_files LOG.info( @@ -71,9 +88,17 @@ def _validate_delivery_has_content(delivery_files: DeliveryFiles) -> DeliveryFil @handle_missing_bundle_errors def _get_sample_files_from_case_bundle( self, workflow: Workflow, sample_id: str, case_id: str - ) -> list[SampleFile]: - """Return a list of files from a case bundle with a sample id as tag.""" + ) -> list[SampleFile] | None: + """Return a list of files from a case bundle with a sample id as tag. + This is to fetch sample specific analysis files that are stored on the case level. + args: + workflow: The workflow to fetch files for + sample_id: The sample id to fetch files for + case_id: The case id to fetch files for + """ sample_tags: list[set[str]] = self.tags_fetcher.fetch_tags(workflow).sample_tags + if not sample_tags: + return [] sample_tags_with_sample_id: list[set[str]] = [tag | {sample_id} for tag in sample_tags] sample_files: list[File] = self.hk_api.get_files_from_latest_version_containing_tags( bundle_name=case_id, tags=sample_tags_with_sample_id @@ -89,9 +114,16 @@ def _get_sample_files_from_case_bundle( for sample_file in sample_files ] - def _get_analysis_sample_delivery_files(self, case: Case) -> list[SampleFile] | None: - """Return a all sample files to deliver for a case.""" - sample_ids: list[str] = case.sample_ids + def _get_analysis_sample_delivery_files( + self, case: Case, sample_id: str | None + ) -> list[SampleFile]: + """Return all sample files to deliver for a case. + Write a list of sample files to deliver for a case. + args: + case: The case to deliver files for + sample_id: The sample id to deliver files for + """ + sample_ids: list[str] = [sample_id] if sample_id else case.sample_ids delivery_files: list[SampleFile] = [] for sample_id in sample_ids: sample_files: list[SampleFile] = self._get_sample_files_from_case_bundle( @@ -101,13 +133,20 @@ def _get_analysis_sample_delivery_files(self, case: Case) -> list[SampleFile] | return delivery_files @handle_missing_bundle_errors - def _get_analysis_case_delivery_files(self, case: Case) -> list[CaseFile]: + def _get_analysis_case_delivery_files( + self, case: Case, sample_id: str | None + ) -> list[CaseFile]: """ Return a complete list of analysis case files to be delivered and ignore analysis sample - files. + files. This is to ensure that only case level analysis files are delivered. + args: + case: The case to deliver files for + sample_id: The sample id to deliver files for """ case_tags: list[set[str]] = self.tags_fetcher.fetch_tags(case.data_analysis).case_tags - sample_id_tags: list[str] = case.sample_ids + if not case_tags: + return [] + sample_id_tags: list[str] = [sample_id] if sample_id else case.sample_ids case_files: list[File] = self.hk_api.get_files_from_latest_version_containing_tags( bundle_name=case.internal_id, tags=case_tags, excluded_tags=sample_id_tags ) diff --git a/cg/services/deliver_files/file_fetcher/models.py b/cg/services/deliver_files/file_fetcher/models.py index ef38780862..f22a491d43 100644 --- a/cg/services/deliver_files/file_fetcher/models.py +++ b/cg/services/deliver_files/file_fetcher/models.py @@ -7,7 +7,7 @@ class DeliveryMetaData(BaseModel): case_id: str customer_internal_id: str ticket_id: str - customer_ticket_inbox: Path | None = None + delivery_path: Path | None = None class CaseFile(BaseModel): diff --git a/cg/services/deliver_files/file_fetcher/raw_data_service.py b/cg/services/deliver_files/file_fetcher/raw_data_service.py index bdc99cf1ca..a38dedd8d0 100644 --- a/cg/services/deliver_files/file_fetcher/raw_data_service.py +++ b/cg/services/deliver_files/file_fetcher/raw_data_service.py @@ -43,11 +43,16 @@ def __init__( self.hk_api = hk_api self.tags_fetcher = tags_fetcher - def get_files_to_deliver(self, case_id: str) -> DeliveryFiles: - """Return a list of raw data files to be delivered for a case and its samples.""" + def get_files_to_deliver(self, case_id: str, sample_id: str | None = None) -> DeliveryFiles: + """ + Return a list of raw data files to be delivered for a case and its samples. + args: + case_id: The case id to deliver files for + sample_id: The sample id to deliver files for + """ LOG.debug(f"[FETCH SERVICE] Fetching raw data files for case: {case_id}") case: Case = self.status_db.get_case_by_internal_id(internal_id=case_id) - sample_ids: list[str] = case.sample_ids + sample_ids: list[str] = [sample_id] if sample_id else case.sample_ids raw_data_files: list[SampleFile] = [] for sample_id in sample_ids: raw_data_files.extend( @@ -68,7 +73,12 @@ def get_files_to_deliver(self, case_id: str) -> DeliveryFiles: @staticmethod def _validate_delivery_has_content(delivery_files: DeliveryFiles) -> DeliveryFiles: - """Check if the delivery files has files to deliver.""" + """Check if the delivery files has files to deliver. + raises: + NoDeliveryFilesError if no files to deliver. + args: + delivery_files: The delivery files to check + """ for sample_file in delivery_files.sample_files: LOG.debug( f"Found file to deliver: {sample_file.file_path} for sample: {sample_file.sample_id}" @@ -82,7 +92,12 @@ def _validate_delivery_has_content(delivery_files: DeliveryFiles) -> DeliveryFil @handle_missing_bundle_errors def _get_raw_data_files_for_sample(self, case_id: str, sample_id: str) -> list[SampleFile]: - """Get the RawData files for a sample.""" + """ + Get the RawData files for a sample. Hardcoded tags to fetch from the raw data workflow. + args: + case_id: The case id to get the raw data files for + sample_id: The sample id to get the raw data files for + """ file_tags: list[set[str]] = self.tags_fetcher.fetch_tags(Workflow.RAW_DATA).sample_tags raw_data_files: list[File] = self.hk_api.get_files_from_latest_version_containing_tags( bundle_name=sample_id, tags=file_tags diff --git a/cg/services/deliver_files/file_filter/abstract.py b/cg/services/deliver_files/file_filter/abstract.py deleted file mode 100644 index a0d846b544..0000000000 --- a/cg/services/deliver_files/file_filter/abstract.py +++ /dev/null @@ -1,10 +0,0 @@ -from abc import abstractmethod, ABC - -from cg.services.deliver_files.file_fetcher.models import DeliveryFiles - - -class FilterDeliveryFilesService(ABC): - - @abstractmethod - def filter_delivery_files(self, delivery_files: DeliveryFiles, sample_id: str) -> DeliveryFiles: - pass diff --git a/cg/services/deliver_files/file_filter/sample_service.py b/cg/services/deliver_files/file_filter/sample_service.py deleted file mode 100644 index 3f4ed9e56c..0000000000 --- a/cg/services/deliver_files/file_filter/sample_service.py +++ /dev/null @@ -1,13 +0,0 @@ -from cg.services.deliver_files.file_fetcher.models import DeliveryFiles -from cg.services.deliver_files.file_filter.abstract import FilterDeliveryFilesService - - -class SampleFileFilter(FilterDeliveryFilesService): - - def filter_delivery_files(self, delivery_files: DeliveryFiles, sample_id: str) -> DeliveryFiles: - delivery_files.sample_files = [ - sample_file - for sample_file in delivery_files.sample_files - if sample_file.sample_id == sample_id - ] - return delivery_files diff --git a/cg/services/deliver_files/file_formatter/utils/__init__.py b/cg/services/deliver_files/file_formatter/destination/__init__.py similarity index 100% rename from cg/services/deliver_files/file_formatter/utils/__init__.py rename to cg/services/deliver_files/file_formatter/destination/__init__.py diff --git a/cg/services/deliver_files/file_formatter/abstract.py b/cg/services/deliver_files/file_formatter/destination/abstract.py similarity index 70% rename from cg/services/deliver_files/file_formatter/abstract.py rename to cg/services/deliver_files/file_formatter/destination/abstract.py index 31eb12f582..559f553e55 100644 --- a/cg/services/deliver_files/file_formatter/abstract.py +++ b/cg/services/deliver_files/file_formatter/destination/abstract.py @@ -1,10 +1,11 @@ from abc import abstractmethod, ABC +from pathlib import Path from cg.services.deliver_files.file_fetcher.models import DeliveryFiles -from cg.services.deliver_files.file_formatter.models import FormattedFiles +from cg.services.deliver_files.file_formatter.destination.models import FormattedFiles -class DeliveryFileFormattingService(ABC): +class DeliveryDestinationFormatter(ABC): """ Abstract class that encapsulates the logic required for formatting files to deliver. """ diff --git a/cg/services/deliver_files/file_formatter/service.py b/cg/services/deliver_files/file_formatter/destination/base_service.py similarity index 56% rename from cg/services/deliver_files/file_formatter/service.py rename to cg/services/deliver_files/file_formatter/destination/base_service.py index 2265db4f2e..5b5b3493e8 100644 --- a/cg/services/deliver_files/file_formatter/service.py +++ b/cg/services/deliver_files/file_formatter/destination/base_service.py @@ -1,31 +1,42 @@ import logging -import os from pathlib import Path from cg.services.deliver_files.file_fetcher.models import CaseFile, DeliveryFiles, SampleFile -from cg.services.deliver_files.file_formatter.abstract import DeliveryFileFormattingService -from cg.services.deliver_files.file_formatter.models import FormattedFile, FormattedFiles -from cg.services.deliver_files.file_formatter.utils.case_service import CaseFileFormatter -from cg.services.deliver_files.file_formatter.utils.sample_concatenation_service import ( +from cg.services.deliver_files.file_formatter.destination.abstract import ( + DeliveryDestinationFormatter, +) +from cg.services.deliver_files.file_formatter.destination.models import ( + FormattedFile, + FormattedFiles, +) +from cg.services.deliver_files.file_formatter.files.case_service import CaseFileFormatter +from cg.services.deliver_files.file_formatter.files.mutant_service import ( + MutantFileFormatter, +) +from cg.services.deliver_files.file_formatter.files.concatenation_service import ( SampleFileConcatenationFormatter, ) -from cg.services.deliver_files.file_formatter.utils.sample_service import SampleFileFormatter +from cg.services.deliver_files.file_formatter.files.sample_service import ( + SampleFileFormatter, +) LOG = logging.getLogger(__name__) -class DeliveryFileFormatter(DeliveryFileFormattingService): +class BaseDeliveryFormatter(DeliveryDestinationFormatter): """ Format the files to be delivered in the generic format. - Expected structure: - /inbox/// - /inbox/// + args: + case_file_formatter: The case file formatter + sample_file_formatter: The sample file formatter. This can be a SampleFileFormatter, SampleFileConcatenationFormatter or MutantFileFormatter. """ def __init__( self, case_file_formatter: CaseFileFormatter, - sample_file_formatter: SampleFileFormatter | SampleFileConcatenationFormatter, + sample_file_formatter: ( + SampleFileFormatter | SampleFileConcatenationFormatter | MutantFileFormatter + ), ): self.case_file_formatter = case_file_formatter self.sample_file_formatter = sample_file_formatter @@ -33,32 +44,26 @@ def __init__( def format_files(self, delivery_files: DeliveryFiles) -> FormattedFiles: """Format the files to be delivered and return the formatted files in the generic format.""" LOG.debug("[FORMAT SERVICE] Formatting files for delivery") - ticket_dir_path: Path = delivery_files.delivery_data.customer_ticket_inbox - self._create_ticket_dir(ticket_dir_path) formatted_files: list[FormattedFile] = self._format_sample_and_case_files( sample_files=delivery_files.sample_files, case_files=delivery_files.case_files, - ticket_dir_path=ticket_dir_path, + delivery_path=delivery_files.delivery_data.delivery_path, ) return FormattedFiles(files=formatted_files) def _format_sample_and_case_files( - self, sample_files: list[SampleFile], case_files: list[CaseFile], ticket_dir_path: Path + self, sample_files: list[SampleFile], case_files: list[CaseFile], delivery_path: Path ) -> list[FormattedFile]: """Helper method to format both sample and case files.""" + LOG.debug(f"[FORMAT SERVICE] delivery_path: {delivery_path}") formatted_files: list[FormattedFile] = self.sample_file_formatter.format_files( moved_files=sample_files, - ticket_dir_path=ticket_dir_path, + delivery_path=delivery_path, ) if case_files: formatted_case_files: list[FormattedFile] = self.case_file_formatter.format_files( moved_files=case_files, - ticket_dir_path=ticket_dir_path, + delivery_path=delivery_path, ) formatted_files.extend(formatted_case_files) return formatted_files - - @staticmethod - def _create_ticket_dir(ticket_dir_path: Path) -> None: - """Create the ticket directory if it does not exist.""" - os.makedirs(ticket_dir_path, exist_ok=True) diff --git a/cg/services/deliver_files/file_formatter/models.py b/cg/services/deliver_files/file_formatter/destination/models.py similarity index 100% rename from cg/services/deliver_files/file_formatter/models.py rename to cg/services/deliver_files/file_formatter/destination/models.py diff --git a/cg/services/deliver_files/file_formatter/files/__init__.py b/cg/services/deliver_files/file_formatter/files/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/cg/services/deliver_files/file_formatter/files/abstract.py b/cg/services/deliver_files/file_formatter/files/abstract.py new file mode 100644 index 0000000000..bb1f241b3d --- /dev/null +++ b/cg/services/deliver_files/file_formatter/files/abstract.py @@ -0,0 +1,15 @@ +from abc import abstractmethod, ABC +from pathlib import Path + +from cg.services.deliver_files.file_fetcher.models import SampleFile, CaseFile +from cg.services.deliver_files.file_formatter.destination.models import FormattedFile + + +class FileFormatter(ABC): + + @abstractmethod + def format_files( + self, moved_files: list[CaseFile | SampleFile], delivery_path: Path + ) -> list[FormattedFile]: + """Format the files to deliver.""" + pass diff --git a/cg/services/deliver_files/file_formatter/files/case_service.py b/cg/services/deliver_files/file_formatter/files/case_service.py new file mode 100644 index 0000000000..deb1a1e4b0 --- /dev/null +++ b/cg/services/deliver_files/file_formatter/files/case_service.py @@ -0,0 +1,83 @@ +import logging +from pathlib import Path + +from cg.services.deliver_files.file_fetcher.models import CaseFile +from cg.services.deliver_files.file_formatter.files.abstract import FileFormatter +from cg.services.deliver_files.file_formatter.destination.models import FormattedFile +from cg.services.deliver_files.file_formatter.path_name.abstract import PathNameFormatter +from cg.services.deliver_files.file_formatter.path_name.nested_structure import ( + NestedStructurePathFormatter, +) +from cg.services.deliver_files.utils import FileManager + +LOG = logging.getLogger(__name__) + + +class CaseFileFormatter(FileFormatter): + """ + Format the case files to deliver and return the formatted files. + args: + path_name_formatter: The path name formatter to format paths to either a flat or nested structure in the delivery destination + file_manager: The file manager + """ + + def __init__( + self, + path_name_formatter: PathNameFormatter, + file_manager: FileManager, + ): + self.path_name_formatter = path_name_formatter + self.file_manager = file_manager + + def format_files(self, moved_files: list[CaseFile], delivery_path: Path) -> list[FormattedFile]: + """Format the case files to deliver and return the formatted files. + args: + moved_files: The case files to format + delivery_path: The path to deliver the files to + """ + LOG.debug("[FORMAT SERVICE] Formatting case files") + self._create_case_name_folder( + delivery_path=delivery_path, case_name=moved_files[0].case_name + ) + return self._format_case_files(moved_files) + + def _format_case_files(self, case_files: list[CaseFile]) -> list[FormattedFile]: + """Format the case files to deliver and return the formatted files. + args: + case_files: The case files to format + """ + formatted_files: list[FormattedFile] = self._get_formatted_paths(case_files) + for formatted_file in formatted_files: + self.file_manager.rename_file( + src=formatted_file.original_path, dst=formatted_file.formatted_path + ) + return formatted_files + + def _create_case_name_folder(self, delivery_path: Path, case_name: str) -> None: + """ + Create a folder for the case in the delivery path. + The folder is only created if the provided PathStructureFormatter is a NestedStructurePathFormatter. + args: + delivery_path: The path to deliver the files to + case_name: The name of the case + """ + LOG.debug(f"[FORMAT SERVICE] Creating folder for case: {case_name}") + if isinstance(self.path_name_formatter, NestedStructurePathFormatter): + self.file_manager.create_directories(base_path=delivery_path, directories={case_name}) + + def _get_formatted_paths(self, case_files: list[CaseFile]) -> list[FormattedFile]: + """Return a list of formatted case files. + args: + case_files: The case files to format + """ + formatted_files: list[FormattedFile] = [] + for case_file in case_files: + formatted_path = self.path_name_formatter.format_file_path( + file_path=case_file.file_path, + provided_id=case_file.case_id, + provided_name=case_file.case_name, + ) + formatted_files.append( + FormattedFile(original_path=case_file.file_path, formatted_path=formatted_path) + ) + return formatted_files diff --git a/cg/services/deliver_files/file_formatter/files/concatenation_service.py b/cg/services/deliver_files/file_formatter/files/concatenation_service.py new file mode 100644 index 0000000000..b2f20b6e57 --- /dev/null +++ b/cg/services/deliver_files/file_formatter/files/concatenation_service.py @@ -0,0 +1,307 @@ +import logging +from pathlib import Path +import re + +from cg.constants.constants import ReadDirection, FileFormat, FileExtensions +from cg.services.deliver_files.file_formatter.files.abstract import FileFormatter +from cg.services.deliver_files.file_formatter.files.models import FastqFile +from cg.services.deliver_files.file_formatter.path_name.abstract import PathNameFormatter + +from cg.services.fastq_concatenation_service.fastq_concatenation_service import ( + FastqConcatenationService, +) +from cg.services.fastq_concatenation_service.utils import generate_concatenated_fastq_delivery_path +from cg.services.deliver_files.file_fetcher.models import SampleFile +from cg.services.deliver_files.file_formatter.destination.models import FormattedFile +from cg.services.deliver_files.file_formatter.files.sample_service import ( + FileManager, +) +from cg.services.deliver_files.file_formatter.path_name.nested_structure import ( + NestedStructurePathFormatter, +) +from cg.utils.files import get_all_files_in_directory_tree + +LOG = logging.getLogger(__name__) + + +class SampleFileConcatenationFormatter(FileFormatter): + """ + Format the sample files to deliver, concatenate fastq files and return the formatted files. + Used for workflows: Microsalt. + args: + file_manager: The file manager + path_name_formatter: The path name formatter to format paths to either a flat or nested structure in the delivery destination + concatenation_service: The fastq concatenation service to concatenate fastq files. + """ + + def __init__( + self, + file_manager: FileManager, + path_name_formatter: PathNameFormatter, + concatenation_service: FastqConcatenationService, + ): + self.file_manager = file_manager + self.path_name_formatter = path_name_formatter + self.concatenation_service = concatenation_service + + def format_files( + self, moved_files: list[SampleFile], delivery_path: Path + ) -> list[FormattedFile]: + """ + Format the sample files to deliver, concatenate fastq files and return the formatted files. + args: + moved_files: list[SampleFile]: List of sample files to deliver. + These are files that have been moved from housekeeper to the delivery path. + delivery_path: Path: Path to the delivery directory. + """ + LOG.debug("[FORMAT SERVICE] Formatting and concatenating sample files") + sample_names: set[str] = self._get_sample_names(sample_files=moved_files) + self._create_sample_directories(delivery_path=delivery_path, sample_names=sample_names) + formatted_files: list[FormattedFile] = self._format_sample_file_paths(moved_files) + LOG.debug( + f"[FORMAT SERVICE] number of formatted files: {len(formatted_files)}, number of moved files: {len(moved_files)}" + ) + self._rename_original_files(formatted_files) + LOG.debug(f"[FORMAT SERVICE] delivery_path: {delivery_path}") + concatenation_map: dict[Path, Path] = self._concatenate_fastq_files( + delivery_path=delivery_path, + sample_names=sample_names, + ) + self._replace_fastq_paths( + concatenation_maps=concatenation_map, + formatted_files=formatted_files, + ) + return formatted_files + + @staticmethod + def _get_sample_names(sample_files: list[SampleFile]) -> set[str]: + """Extract sample names from the sample files.""" + return {sample_file.sample_name for sample_file in sample_files} + + def _create_sample_directories(self, sample_names: set[str], delivery_path: Path) -> None: + """Create directories for each sample name only if the file name formatter is the NestedSampleFileFormatter. + args: + sample_names: set[str]: Set of sample names. + delivery_path: Path: Path to the delivery directory. + """ + if not isinstance(self.path_name_formatter, NestedStructurePathFormatter): + return + for sample_name in sample_names: + self.file_manager.create_directories(base_path=delivery_path, directories={sample_name}) + + def _format_sample_file_paths(self, sample_files: list[SampleFile]) -> list[FormattedFile]: + """ + Return a list of formatted sample files. + args: + sample_files: The sample files to format + """ + return [ + FormattedFile( + original_path=sample_file.file_path, + formatted_path=self.path_name_formatter.format_file_path( + file_path=sample_file.file_path, + provided_id=sample_file.sample_id, + provided_name=sample_file.sample_name, + ), + ) + for sample_file in sample_files + ] + + def _rename_original_files(self, formatted_files: list[FormattedFile]) -> None: + """ + Rename the formatted files. + args: + formatted_files: list[FormattedFile]: List of formatted files. + """ + LOG.debug("[FORMAT SERVICE] Renaming original files") + for formatted_file in formatted_files: + self.file_manager.rename_file( + src=formatted_file.original_path, dst=formatted_file.formatted_path + ) + + def _concatenate_fastq_files( + self, delivery_path: Path, sample_names: set[str] + ) -> dict[Path, Path]: + """Concatenate fastq files for each sample and return the forward and reverse concatenated paths. + args: + delivery_path: Path: Path to the delivery directory. + sample_names: set[str]: Set of sample names. + returns: + dict[Path, Path]: Dictionary with the original fastq file path as key and the concatenated path as value. + """ + LOG.debug(f"[FORMAT SERVICE] delivery_path: {delivery_path}") + fastq_files: list[FastqFile] = self._get_unique_sample_fastq_paths( + sample_names=sample_names, delivery_path=delivery_path + ) + grouped_fastq_files: dict[str, list[FastqFile]] = self._group_fastq_files_per_sample( + sample_names=sample_names, fastq_files=fastq_files + ) + concatenation_maps: dict[Path, Path] = {} + for sample in grouped_fastq_files.keys(): + fastq_directory: Path = grouped_fastq_files[sample][0].fastq_file_path.parent + forward_path: Path = generate_concatenated_fastq_delivery_path( + fastq_directory=fastq_directory, + sample_name=sample, + direction=ReadDirection.FORWARD, + ) + reverse_path: Path = generate_concatenated_fastq_delivery_path( + fastq_directory=fastq_directory, + sample_name=sample, + direction=ReadDirection.REVERSE, + ) + self.concatenation_service.concatenate( + sample_id=sample, + fastq_directory=fastq_directory, + forward_output_path=forward_path, + reverse_output_path=reverse_path, + remove_raw=True, + ) + concatenation_maps.update( + self._get_concatenation_map( + forward_path=forward_path, + reverse_path=reverse_path, + fastq_files=grouped_fastq_files[sample], + ) + ) + return concatenation_maps + + def _get_unique_sample_fastq_paths( + self, sample_names: set[str], delivery_path: Path + ) -> list[FastqFile]: + """ + Get a list of unique sample fastq file paths given a delivery path. + args: + sample_names: set[str]: Set of sample names. + delivery_path: Path: Path to the delivery directory + returns: + list[FastqFile]: List of FastqFile objects. + """ + sample_paths: list[FastqFile] = [] + LOG.debug( + f"[CONCATENATION SERVICE] Getting unique sample fastq file paths in {delivery_path}" + ) + list_of_files: list[Path] = get_all_files_in_directory_tree(delivery_path) + for sample_name in sample_names: + for file in list_of_files: + if sample_name in file.as_posix() and self._is_lane_fastq_file(file): + LOG.debug( + f"[CONCATENATION SERVICE] Found fastq file: {file} for sample: {sample_name}" + ) + sample_paths.append( + FastqFile( + fastq_file_path=Path(delivery_path, file), + sample_name=sample_name, + read_direction=self._determine_read_direction(file), + ) + ) + if not sample_paths: + raise FileNotFoundError( + f"Could not find any fastq files to concatenate in {delivery_path}." + ) + return sample_paths + + @staticmethod + def _get_concatenation_map( + forward_path: Path, reverse_path: Path, fastq_files: list[FastqFile] + ) -> dict[Path, Path]: + """ + Get a list of ConcatenationMap objects for a sample. + NOTE: the fastq_files must be grouped by sample name. + args: + forward_path: Path: Path to the forward concatenated file. + reverse_path: Path: Path to the reverse concatenated file. + fastq_files: list[FastqFile]: List of fastq files for a single ample. + """ + concatenation_map: dict[Path, Path] = {} + for fastq_file in fastq_files: + concatenation_map[fastq_file.fastq_file_path] = ( + forward_path if fastq_file.read_direction == ReadDirection.FORWARD else reverse_path + ) + return concatenation_map + + @staticmethod + def _determine_read_direction(fastq_path: Path) -> ReadDirection: + """Determine the read direction of a fastq file. + Assumes that the fastq file path contains 'R1' or 'R2' to determine the read direction. + args: + fastq_path: Path: Path to the fastq file. + """ + if f"R{ReadDirection.FORWARD}" in fastq_path.as_posix(): + return ReadDirection.FORWARD + return ReadDirection.REVERSE + + def _group_fastq_files_per_sample( + self, sample_names: set[str], fastq_files: list[FastqFile] + ) -> dict[str, list[FastqFile]]: + """Group fastq files per sample. + returns a dictionary with sample names as keys and a list of fastq files as values. + args: + sample_names: set[str]: Set of sample names. + fastq_files: list[FastqFile]: List of fastq files. + """ + + sample_fastq_files: dict[str, list[FastqFile]] = { + sample_name: [] for sample_name in sample_names + } + for fastq_file in fastq_files: + sample_fastq_files[fastq_file.sample_name].append(fastq_file) + self._validate_sample_fastq_file_share_same_directory(sample_fastq_files=sample_fastq_files) + return sample_fastq_files + + def _replace_fastq_paths( + self, + concatenation_maps: dict[Path, Path], + formatted_files: list[FormattedFile], + ) -> None: + """ + Replace the fastq file paths with the new concatenated fastq file paths. + Uses the concatenation map with the formatted file path as key and the concatenated path as value. + args: + concatenation_maps: list[ConcatenationMap]: List of ConcatenationMap objects. + formatted_files: list[FormattedFile]: List of formatted files. + """ + for formatted_file in formatted_files: + if self._is_lane_fastq_file(formatted_file.formatted_path): + formatted_file.formatted_path = concatenation_maps[formatted_file.formatted_path] + + @staticmethod + def _validate_sample_fastq_file_share_same_directory( + sample_fastq_files: dict[str, list[FastqFile]] + ) -> None: + """ + Assert that all fastq files for a sample share the same directory. + This is to ensure that the files are concatenated within the expected directory path. + raises: ValueError if the fastq files are not in the same directory. + args: + sample_fastq_files: dict[str, list[FastqFile]]: Dictionary of sample names and their fastq files. + """ + for sample_name in sample_fastq_files.keys(): + fastq_files: list[FastqFile] = sample_fastq_files[sample_name] + parent_dir: Path = fastq_files[0].fastq_file_path.parent + for fastq_file in fastq_files: + if fastq_file.fastq_file_path.parent != parent_dir: + raise ValueError( + f"Sample {sample_name} fastq files are not in the same directory. " + f"Cannot concatenate. It will would result in sporadic file paths." + ) + + @staticmethod + def _is_lane_fastq_file(file_path: Path) -> bool: + """Check if a fastq file is a from a lane and read direction. + Note pattern: *_L[0-9]{3}_R[1-2]_[0-9]{3}.fastq.gz + *_ is a wildcard for the flow cell id followed by sample name. + L[0-9]{3} is the lane number, i.e. L001, L002 etc. + R[1-2] is the read direction, i.e. R1 or R2. + [0-9]{3} is the trailing three digits after read direction. + args: + file_path: Path: Path to the fastq file. + """ + + pattern = f".*_L[0-9]{{3}}_R[1-2]_[0-9]{{3}}{FileExtensions.FASTQ}{FileExtensions.GZIP}" + return ( + re.fullmatch( + pattern=pattern, + string=file_path.name, + ) + is not None + ) diff --git a/cg/services/deliver_files/file_formatter/files/models.py b/cg/services/deliver_files/file_formatter/files/models.py new file mode 100644 index 0000000000..52c6db156a --- /dev/null +++ b/cg/services/deliver_files/file_formatter/files/models.py @@ -0,0 +1,12 @@ +from pathlib import Path +from pydantic import BaseModel + +from cg.constants.constants import ReadDirection + + +class FastqFile(BaseModel): + """A fastq file with a sample name, file path and read direction.""" + + sample_name: str + fastq_file_path: Path + read_direction: ReadDirection diff --git a/cg/services/deliver_files/file_formatter/files/mutant_service.py b/cg/services/deliver_files/file_formatter/files/mutant_service.py new file mode 100644 index 0000000000..910a72bd70 --- /dev/null +++ b/cg/services/deliver_files/file_formatter/files/mutant_service.py @@ -0,0 +1,147 @@ +import logging +from pathlib import Path +import re +from cg.apps.lims import LimsAPI +from cg.services.deliver_files.file_fetcher.models import SampleFile +from cg.services.deliver_files.file_formatter.files.abstract import FileFormatter +from cg.services.deliver_files.file_formatter.destination.models import FormattedFile +from cg.services.deliver_files.file_formatter.files.concatenation_service import ( + SampleFileConcatenationFormatter, +) +from cg.services.deliver_files.file_formatter.files.sample_service import FileManager + +LOG = logging.getLogger(__name__) + + +class MutantFileFormatter(FileFormatter): + """ + Formatter for file to deliver or upload for the Mutant workflow. + Args: + lims_api: The LIMS API + file_formatter: The SampleFileConcatenationFormatter. This is used to format the files and concatenate the fastq files. + file_manager: The FileManager + + """ + + def __init__( + self, + lims_api: LimsAPI, + file_formatter: SampleFileConcatenationFormatter, + file_manager: FileManager, + ): + self.lims_api: LimsAPI = lims_api + self.file_formatter: SampleFileConcatenationFormatter = file_formatter + self.file_manager = file_manager + + def format_files( + self, moved_files: list[SampleFile], delivery_path: Path + ) -> list[FormattedFile]: + """ + Format the mutant files to deliver and return the formatted files. + args: + moved_files: The sample files to format + delivery_path: The path to deliver the files + + """ + LOG.debug("[FORMAT SERVICE] Formatting and concatenating mutant files") + formatted_files: list[FormattedFile] = self.file_formatter.format_files( + moved_files=moved_files, delivery_path=delivery_path + ) + appended_formatted_files: list[FormattedFile] = self._add_lims_metadata_to_file_name( + formatted_files=formatted_files, sample_files=moved_files + ) + unique_formatted_files: list[FormattedFile] = self._filter_unique_path_combinations( + appended_formatted_files + ) + for unique_files in unique_formatted_files: + self.file_manager.rename_file( + src=unique_files.original_path, dst=unique_files.formatted_path + ) + return unique_formatted_files + + @staticmethod + def _is_concatenated_file(file_path: Path) -> bool: + """Check if the file is a concatenated file. + Returns True if the file is a concatenated file, otherwise False. + regex pattern: *._[1,2].fastq.gz + *. is the sample id + _[1,2] is the read direction + .fastq.gz is the file extension + args: + file_path: The file path to check + """ + pattern = ".*_[1,2].fastq.gz" + return re.fullmatch(pattern, file_path.name) is not None + + def _add_lims_metadata_to_file_name( + self, formatted_files: list[FormattedFile], sample_files: list[SampleFile] + ) -> list[FormattedFile]: + """ + This functions adds the region and lab code to the file name of the formatted files. + Note: The region and lab code is fetched from LIMS using the sample id. It is required for delivery of the files. + This should only be done for concatenated fastq files. + + args: + formatted_files: The formatted files to add the metadata to + sample_files: The sample files to get the metadata from + """ + appended_formatted_files: list[FormattedFile] = [] + for formatted_file in formatted_files: + if self._is_concatenated_file(formatted_file.formatted_path): + sample_id: str = self._get_sample_id_by_original_path( + original_path=formatted_file.original_path, sample_files=sample_files + ) + lims_meta_data = self.lims_api.get_sample_region_and_lab_code(sample_id) + + new_original_path: Path = formatted_file.formatted_path + new_formatted_path = Path( + formatted_file.formatted_path.parent, + f"{lims_meta_data}{formatted_file.formatted_path.name}", + ) + appended_formatted_files.append( + FormattedFile( + original_path=new_original_path, formatted_path=new_formatted_path + ) + ) + else: + appended_formatted_files.append(formatted_file) + return appended_formatted_files + + @staticmethod + def _get_sample_id_by_original_path(original_path: Path, sample_files: list[SampleFile]) -> str: + """Get the sample id by the original path of the sample file. + args: + original_path: The original path of the sample file + sample_files: The list of sample files to search in + """ + for sample_file in sample_files: + if sample_file.file_path == original_path: + return sample_file.sample_id + raise ValueError(f"Could not find sample file with path {original_path}") + + @staticmethod + def _filter_unique_path_combinations( + formatted_files: list[FormattedFile], + ) -> list[FormattedFile]: + """ + Filter out duplicates from the formatted files list. + + note: + During fastq concatenation Sample_L1_R1 and Sample_L2_R1 files are concatenated + and moved to the same file Concat_Sample. This mean that there can be multiple entries + for the same concatenated file in the formatted_files list + coming from the SampleFileConcatenationService. + This function filters out the duplicates to avoid moving the same file multiple times + which would result in an error the second time since the files is no longer in the original path. + + args: + formatted_files: The formatted files to filter + """ + unique_combinations = set() + unique_files: list[FormattedFile] = [] + for formatted_file in formatted_files: + combination = (formatted_file.original_path, formatted_file.formatted_path) + if combination not in unique_combinations: + unique_combinations.add(combination) + unique_files.append(formatted_file) + return unique_files diff --git a/cg/services/deliver_files/file_formatter/files/sample_service.py b/cg/services/deliver_files/file_formatter/files/sample_service.py new file mode 100644 index 0000000000..276a3b2649 --- /dev/null +++ b/cg/services/deliver_files/file_formatter/files/sample_service.py @@ -0,0 +1,71 @@ +import logging +from pathlib import Path + +from cg.services.deliver_files.file_fetcher.models import SampleFile +from cg.services.deliver_files.file_formatter.files.abstract import FileFormatter +from cg.services.deliver_files.file_formatter.destination.models import FormattedFile +from cg.services.deliver_files.file_formatter.path_name.abstract import PathNameFormatter +from cg.services.deliver_files.utils import FileManager + +LOG = logging.getLogger(__name__) + + +class SampleFileFormatter(FileFormatter): + """ + Format the sample files to deliver. + Used for all workflows except Microsalt and Mutant. + args: + file_manager: The file manager + path_name_formatter: The path name formatter to format paths to either a flat or nested structure in the delivery destination + """ + + def __init__( + self, + file_manager: FileManager, + path_name_formatter: PathNameFormatter, + ): + self.file_manager = file_manager + self.path_name_formatter = path_name_formatter + + def format_files( + self, moved_files: list[SampleFile], delivery_path: Path + ) -> list[FormattedFile]: + """ + Format the sample files to deliver and return the formatted files. + args: + moved_sample_files: The sample files to format. These are files that have been moved from housekeeper to the delivery path. + delivery_path: The path to deliver the files to + """ + LOG.debug("[FORMAT SERVICE] Formatting sample files") + sample_names: set[str] = self._get_sample_names(sample_files=moved_files) + for sample_name in sample_names: + self.file_manager.create_directories(base_path=delivery_path, directories={sample_name}) + formatted_files: list[FormattedFile] = self._format_sample_file_paths(moved_files) + for formatted_file in formatted_files: + self.file_manager.rename_file( + src=formatted_file.original_path, dst=formatted_file.formatted_path + ) + return formatted_files + + @staticmethod + def _get_sample_names(sample_files: list[SampleFile]) -> set[str]: + """Extract sample names from the sample files.""" + return {sample_file.sample_name for sample_file in sample_files} + + def _format_sample_file_paths(self, sample_files: list[SampleFile]) -> list[FormattedFile]: + """ + Return a list of formatted sample files. + args: + sample_files: The sample files to format + """ + return [ + FormattedFile( + original_path=sample_file.file_path, + formatted_path=self.path_name_formatter.format_file_path( + file_path=sample_file.file_path, + provided_id=sample_file.sample_id, + provided_name=sample_file.sample_name, + ), + ) + for sample_file in sample_files + ] diff --git a/cg/services/deliver_files/file_formatter/path_name/__init__.py b/cg/services/deliver_files/file_formatter/path_name/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/cg/services/deliver_files/file_formatter/path_name/abstract.py b/cg/services/deliver_files/file_formatter/path_name/abstract.py new file mode 100644 index 0000000000..5fc0409843 --- /dev/null +++ b/cg/services/deliver_files/file_formatter/path_name/abstract.py @@ -0,0 +1,13 @@ +from abc import abstractmethod, ABC +from pathlib import Path + + +class PathNameFormatter(ABC): + """ + Abstract class that encapsulates the logic required for formatting the path name. + """ + + @abstractmethod + def format_file_path(self, file_path: Path, provided_id: str, provided_name: str) -> Path: + """Format the file path.""" + pass diff --git a/cg/services/deliver_files/file_formatter/path_name/flat_structure.py b/cg/services/deliver_files/file_formatter/path_name/flat_structure.py new file mode 100644 index 0000000000..f851a6bf7b --- /dev/null +++ b/cg/services/deliver_files/file_formatter/path_name/flat_structure.py @@ -0,0 +1,24 @@ +from pathlib import Path + +from cg.services.deliver_files.file_formatter.files.sample_service import LOG +from cg.services.deliver_files.file_formatter.path_name.abstract import PathNameFormatter + + +class FlatStructurePathFormatter(PathNameFormatter): + """ + Class to format sample file names in place. + """ + + def format_file_path(self, file_path: Path, provided_id: str, provided_name: str) -> Path: + """ + Returns formatted files with original and formatted file names: + Replaces id by name. + args: + file_path: The path to the file + provided_id: The id to replace + provided_name: The name to replace the id with + """ + LOG.debug("[FORMAT SERVICE] Formatting sample file names with flat structure.") + replaced_name = file_path.name.replace(provided_id, provided_name) + formatted_path = Path(file_path.parent, replaced_name) + return formatted_path diff --git a/cg/services/deliver_files/file_formatter/path_name/nested_structure.py b/cg/services/deliver_files/file_formatter/path_name/nested_structure.py new file mode 100644 index 0000000000..26ede0ea99 --- /dev/null +++ b/cg/services/deliver_files/file_formatter/path_name/nested_structure.py @@ -0,0 +1,26 @@ +from pathlib import Path + +from cg.services.deliver_files.file_formatter.files.sample_service import LOG +from cg.services.deliver_files.file_formatter.path_name.abstract import PathNameFormatter + + +class NestedStructurePathFormatter(PathNameFormatter): + """ + Class to format sample file names and paths in a nested format used to deliver files to a customer inbox. + """ + + def format_file_path(self, file_path: Path, provided_id: str, provided_name: str) -> Path: + """ + Returns formatted files with original and formatted file names: + 1. Adds a folder with provided name to the path of the files. + 2. Replaces id by name. + + args: + file_path: The path to the file + provided_id: The id to replace + provided_name: The name to replace the id with + """ + LOG.debug("[FORMAT SERVICE] Formatting sample file names with nested structure.") + replaced_name = file_path.name.replace(provided_id, provided_name) + formatted_path = Path(file_path.parent, provided_name, replaced_name) + return formatted_path diff --git a/cg/services/deliver_files/file_formatter/utils/case_service.py b/cg/services/deliver_files/file_formatter/utils/case_service.py deleted file mode 100644 index ccc4f656e6..0000000000 --- a/cg/services/deliver_files/file_formatter/utils/case_service.py +++ /dev/null @@ -1,48 +0,0 @@ -import os -from pathlib import Path - -from cg.services.deliver_files.file_fetcher.models import CaseFile -from cg.services.deliver_files.file_formatter.models import FormattedFile - - -class CaseFileFormatter: - - def format_files( - self, moved_files: list[CaseFile], ticket_dir_path: Path - ) -> list[FormattedFile]: - """Format the case files to deliver and return the formatted files.""" - self._create_case_name_folder( - ticket_path=ticket_dir_path, case_name=moved_files[0].case_name - ) - return self._format_case_files(moved_files) - - def _format_case_files(self, case_files: list[CaseFile]) -> list[FormattedFile]: - formatted_files: list[FormattedFile] = self._get_formatted_files(case_files) - for formatted_file in formatted_files: - os.rename(src=formatted_file.original_path, dst=formatted_file.formatted_path) - return formatted_files - - @staticmethod - def _create_case_name_folder(ticket_path: Path, case_name: str) -> None: - case_dir_path = Path(ticket_path, case_name) - case_dir_path.mkdir(exist_ok=True) - - @staticmethod - def _get_formatted_files(case_files: list[CaseFile]) -> list[FormattedFile]: - """ - Returns formatted files: - 1. Adds a folder with case name to the path of the case files. - 2. Replaces case id by case name. - """ - formatted_files: list[FormattedFile] = [] - for case_file in case_files: - replaced_case_file_name: str = case_file.file_path.name.replace( - case_file.case_id, case_file.case_name - ) - formatted_file_path = Path( - case_file.file_path.parent, case_file.case_name, replaced_case_file_name - ) - formatted_files.append( - FormattedFile(original_path=case_file.file_path, formatted_path=formatted_file_path) - ) - return formatted_files diff --git a/cg/services/deliver_files/file_formatter/utils/sample_concatenation_service.py b/cg/services/deliver_files/file_formatter/utils/sample_concatenation_service.py deleted file mode 100644 index c7eaea6b63..0000000000 --- a/cg/services/deliver_files/file_formatter/utils/sample_concatenation_service.py +++ /dev/null @@ -1,128 +0,0 @@ -from pathlib import Path - -from cg.constants.constants import ReadDirection, FileFormat, FileExtensions - -from cg.services.fastq_concatenation_service.fastq_concatenation_service import ( - FastqConcatenationService, -) -from cg.services.fastq_concatenation_service.utils import generate_concatenated_fastq_delivery_path -from cg.services.deliver_files.file_fetcher.models import SampleFile -from cg.services.deliver_files.file_formatter.models import FormattedFile -from cg.services.deliver_files.file_formatter.utils.sample_service import ( - SampleFileNameFormatter, - FileManagingService, -) - - -class SampleFileConcatenationFormatter: - """ - Format the sample files to deliver, concatenate fastq files and return the formatted files. - Used for workflows: Microsalt. - """ - - def __init__( - self, - file_manager: FileManagingService, - file_formatter: SampleFileNameFormatter, - concatenation_service: FastqConcatenationService, - ): - self.file_manager = file_manager - self.file_name_formatter = file_formatter - self.concatenation_service = concatenation_service - - def format_files( - self, moved_files: list[SampleFile], ticket_dir_path: Path - ) -> list[FormattedFile]: - """Format the sample files to deliver, concatenate fastq files and return the formatted files.""" - sample_names: set[str] = self.file_name_formatter.get_sample_names(sample_files=moved_files) - for sample_name in sample_names: - self.file_manager.create_directories( - base_path=ticket_dir_path, directories={sample_name} - ) - formatted_files: list[FormattedFile] = self.file_name_formatter.format_sample_file_names( - sample_files=moved_files - ) - for formatted_file in formatted_files: - self.file_manager.rename_file( - src=formatted_file.original_path, dst=formatted_file.formatted_path - ) - forward_paths, reverse_path = self._concatenate_fastq_files(formatted_files=formatted_files) - self._replace_fastq_paths( - reverse_paths=reverse_path, - forward_paths=forward_paths, - formatted_files=formatted_files, - ) - return formatted_files - - def _concatenate_fastq_files( - self, formatted_files: list[FormattedFile] - ) -> tuple[list[Path], list[Path]]: - unique_sample_dir_paths: set[Path] = self._get_unique_sample_paths( - sample_files=formatted_files - ) - forward_paths: list[Path] = [] - reverse_paths: list[Path] = [] - for fastq_directory in unique_sample_dir_paths: - sample_name: str = fastq_directory.name - - forward_path: Path = generate_concatenated_fastq_delivery_path( - fastq_directory=fastq_directory, - sample_name=sample_name, - direction=ReadDirection.FORWARD, - ) - forward_paths.append(forward_path) - reverse_path: Path = generate_concatenated_fastq_delivery_path( - fastq_directory=fastq_directory, - sample_name=sample_name, - direction=ReadDirection.REVERSE, - ) - reverse_paths.append(reverse_path) - self.concatenation_service.concatenate( - fastq_directory=fastq_directory, - forward_output_path=forward_path, - reverse_output_path=reverse_path, - remove_raw=True, - ) - return forward_paths, reverse_paths - - @staticmethod - def _get_unique_sample_paths(sample_files: list[FormattedFile]) -> set[Path]: - sample_paths: list[Path] = [] - for sample_file in sample_files: - sample_paths.append(sample_file.formatted_path.parent) - return set(sample_paths) - - @staticmethod - def _replace_fastq_formatted_file_path( - formatted_files: list[FormattedFile], - direction: ReadDirection, - new_path: Path, - ) -> None: - """Replace the formatted file path with the new path.""" - for formatted_file in formatted_files: - if ( - formatted_file.formatted_path.parent == new_path.parent - and f"{FileFormat.FASTQ}{FileExtensions.GZIP}" in formatted_file.formatted_path.name - and f"R{direction}" in formatted_file.formatted_path.name - ): - formatted_file.formatted_path = new_path - - def _replace_fastq_paths( - self, - forward_paths: list[Path], - reverse_paths: list[Path], - formatted_files: list[FormattedFile], - ) -> None: - """Replace the fastq file paths with the new concatenated fastq file paths.""" - for forward_path in forward_paths: - self._replace_fastq_formatted_file_path( - formatted_files=formatted_files, - direction=ReadDirection.FORWARD, - new_path=forward_path, - ) - for reverse_path in reverse_paths: - self._replace_fastq_formatted_file_path( - formatted_files=formatted_files, - direction=ReadDirection.REVERSE, - new_path=reverse_path, - ) diff --git a/cg/services/deliver_files/file_formatter/utils/sample_service.py b/cg/services/deliver_files/file_formatter/utils/sample_service.py deleted file mode 100644 index 8efc383d1c..0000000000 --- a/cg/services/deliver_files/file_formatter/utils/sample_service.py +++ /dev/null @@ -1,84 +0,0 @@ -import os -from pathlib import Path -from cg.services.deliver_files.file_fetcher.models import SampleFile -from cg.services.deliver_files.file_formatter.models import FormattedFile - - -class FileManagingService: - """ - Service to manage files. - Handles operations that create or rename files and directories. - """ - - @staticmethod - def create_directories(base_path: Path, directories: set[str]) -> None: - """Create directories for given names under the base path.""" - for directory in directories: - Path(base_path, directory).mkdir(exist_ok=True) - - @staticmethod - def rename_file(src: Path, dst: Path) -> None: - """Rename a file from src to dst.""" - os.rename(src, dst) - - -class SampleFileNameFormatter: - """ - Class to format sample file names. - """ - - @staticmethod - def get_sample_names(sample_files: list[SampleFile]) -> set[str]: - """Extract sample names from the sample files.""" - return {sample_file.sample_name for sample_file in sample_files} - - @staticmethod - def format_sample_file_names(sample_files: list[SampleFile]) -> list[FormattedFile]: - """ - Returns formatted files with original and formatted file names: - 1. Adds a folder with sample name to the path of the sample files. - 2. Replaces sample id by sample name. - """ - formatted_files = [] - for sample_file in sample_files: - replaced_name = sample_file.file_path.name.replace( - sample_file.sample_id, sample_file.sample_name - ) - formatted_path = Path( - sample_file.file_path.parent, sample_file.sample_name, replaced_name - ) - formatted_files.append( - FormattedFile(original_path=sample_file.file_path, formatted_path=formatted_path) - ) - return formatted_files - - -class SampleFileFormatter: - """ - Format the sample files to deliver. - Used for all workflows except Microsalt and Mutant. - """ - - def __init__( - self, file_manager: FileManagingService, file_name_formatter: SampleFileNameFormatter - ): - self.file_manager = file_manager - self.file_name_formatter = file_name_formatter - - def format_files( - self, moved_files: list[SampleFile], ticket_dir_path: Path - ) -> list[FormattedFile]: - """Format the sample files to deliver and return the formatted files.""" - sample_names: set[str] = self.file_name_formatter.get_sample_names(sample_files=moved_files) - for sample_name in sample_names: - self.file_manager.create_directories( - base_path=ticket_dir_path, directories={sample_name} - ) - formatted_files: list[FormattedFile] = self.file_name_formatter.format_sample_file_names( - sample_files=moved_files - ) - for formatted_file in formatted_files: - self.file_manager.rename_file( - src=formatted_file.original_path, dst=formatted_file.formatted_path - ) - return formatted_files diff --git a/cg/services/deliver_files/file_mover/abstract.py b/cg/services/deliver_files/file_mover/abstract.py new file mode 100644 index 0000000000..54bf7638a6 --- /dev/null +++ b/cg/services/deliver_files/file_mover/abstract.py @@ -0,0 +1,11 @@ +from abc import ABC, abstractmethod +from pathlib import Path + +from cg.services.deliver_files.file_fetcher.models import DeliveryFiles + + +class DestinationFilesMover(ABC): + @abstractmethod + def move_files(self, delivery_files: DeliveryFiles, delivery_base_path: Path) -> DeliveryFiles: + """Move files to the delivery folder.""" + pass diff --git a/cg/services/deliver_files/file_mover/base_service.py b/cg/services/deliver_files/file_mover/base_service.py new file mode 100644 index 0000000000..67ad1827b1 --- /dev/null +++ b/cg/services/deliver_files/file_mover/base_service.py @@ -0,0 +1,30 @@ +from pathlib import Path + +from cg.services.deliver_files.file_fetcher.models import DeliveryFiles, SampleFile, CaseFile +from cg.services.deliver_files.file_mover.abstract import DestinationFilesMover +from cg.services.deliver_files.utils import FileMover + + +class BaseDestinationFilesMover(DestinationFilesMover): + """ + Class to move files directly to the delivery base path. + """ + + def __init__(self, file_mover: FileMover): + self.file_mover = file_mover + + def move_files(self, delivery_files: DeliveryFiles, delivery_base_path: Path) -> DeliveryFiles: + """ + Move the files directly to the delivery base path. + args: + delivery_files: DeliveryFiles: The files to move. + delivery_base_path: Path: The path to move the files to. + """ + delivery_files.delivery_data.delivery_path = delivery_base_path + delivery_files.case_files = self.file_mover.move_and_update_files( + file_models=delivery_files.case_files, target_dir=delivery_base_path + ) + delivery_files.sample_files = self.file_mover.move_and_update_files( + file_models=delivery_files.sample_files, target_dir=delivery_base_path + ) + return delivery_files diff --git a/cg/services/deliver_files/file_mover/customer_inbox_service.py b/cg/services/deliver_files/file_mover/customer_inbox_service.py new file mode 100644 index 0000000000..d613bca4bf --- /dev/null +++ b/cg/services/deliver_files/file_mover/customer_inbox_service.py @@ -0,0 +1,61 @@ +import logging +from pathlib import Path + +from cg.constants.delivery import INBOX_NAME +from cg.services.deliver_files.file_fetcher.models import ( + DeliveryFiles, + DeliveryMetaData, +) +from cg.services.deliver_files.file_mover.abstract import DestinationFilesMover +from cg.services.deliver_files.utils import FileMover + +LOG = logging.getLogger(__name__) + + +class CustomerInboxDestinationFilesMover(DestinationFilesMover): + """ + Class to move files to the customer folder. + """ + + def __init__(self, file_mover: FileMover): + self.file_mover = file_mover + + def move_files(self, delivery_files: DeliveryFiles, delivery_base_path: Path) -> DeliveryFiles: + """ + Move the files to the customer folder. + args: + delivery_files: DeliveryFiles: The files to move. + delivery_base_path: Path: The path to move the files to. + """ + inbox_ticket_dir_path: Path = self._create_ticket_inbox_dir_path( + delivery_base_path=delivery_base_path, delivery_data=delivery_files.delivery_data + ) + delivery_files.delivery_data.delivery_path = inbox_ticket_dir_path + + self.file_mover.create_directories( + base_path=delivery_base_path, + directories={str(inbox_ticket_dir_path.relative_to(delivery_base_path))}, + ) + delivery_files.case_files = self.file_mover.move_and_update_files( + file_models=delivery_files.case_files, target_dir=inbox_ticket_dir_path + ) + delivery_files.sample_files = self.file_mover.move_and_update_files( + file_models=delivery_files.sample_files, target_dir=inbox_ticket_dir_path + ) + return delivery_files + + @staticmethod + def _create_ticket_inbox_dir_path( + delivery_base_path: Path, delivery_data: DeliveryMetaData + ) -> Path: + """Generate the path to the ticket inbox directory. + args: + delivery_base_path: The base path to the delivery folder. + delivery_data: The delivery data containing the customer internal id and ticket id. + """ + return Path( + delivery_base_path, + delivery_data.customer_internal_id, + INBOX_NAME, + delivery_data.ticket_id, + ) diff --git a/cg/services/deliver_files/file_mover/service.py b/cg/services/deliver_files/file_mover/service.py deleted file mode 100644 index d02d55d6be..0000000000 --- a/cg/services/deliver_files/file_mover/service.py +++ /dev/null @@ -1,111 +0,0 @@ -import logging -from pathlib import Path - -from cg.constants.delivery import INBOX_NAME -from cg.services.deliver_files.file_fetcher.models import ( - CaseFile, - DeliveryFiles, - DeliveryMetaData, - SampleFile, -) -from cg.utils.files import link_or_overwrite_file - -LOG = logging.getLogger(__name__) - - -class DeliveryFilesMover: - """ - Class that encapsulates the logic required for moving files to the customer folder. - """ - - def move_files(self, delivery_files: DeliveryFiles, delivery_base_path: Path) -> DeliveryFiles: - """Move the files to the customer folder.""" - inbox_ticket_dir_path: Path = self._create_ticket_inbox_dir_path( - delivery_base_path=delivery_base_path, delivery_data=delivery_files.delivery_data - ) - delivery_files.delivery_data.customer_ticket_inbox = inbox_ticket_dir_path - self._create_ticket_inbox_folder(inbox_ticket_dir_path) - self._create_hard_links_for_delivery_files( - delivery_files=delivery_files, inbox_dir_path=inbox_ticket_dir_path - ) - return self._replace_file_paths_with_inbox_dir_paths( - delivery_files=delivery_files, inbox_dir_path=inbox_ticket_dir_path - ) - - @staticmethod - def _create_ticket_inbox_folder( - inbox_ticket_dir_path: Path, - ) -> Path: - """Create a ticket inbox folder in the customer folder, overwrites if already present.""" - LOG.debug(f"[MOVE SERVICE] Creating ticket inbox folder: {inbox_ticket_dir_path}") - inbox_ticket_dir_path.mkdir(parents=True, exist_ok=True) - return inbox_ticket_dir_path - - @staticmethod - def _create_ticket_inbox_dir_path( - delivery_base_path: Path, delivery_data: DeliveryMetaData - ) -> Path: - """Create the path to the ticket inbox folder.""" - return Path( - delivery_base_path, - delivery_data.customer_internal_id, - INBOX_NAME, - delivery_data.ticket_id, - ) - - @staticmethod - def _create_inbox_file_path(file_path: Path, inbox_dir_path: Path) -> Path: - """Create the path to the inbox file.""" - return Path(inbox_dir_path, file_path.name) - - def _create_hard_link_file_paths( - self, file_models: list[SampleFile | CaseFile], inbox_dir_path: Path - ) -> None: - """Create hard links to the sample files in the customer folder.""" - for file_model in file_models: - inbox_file_path: Path = self._create_inbox_file_path( - file_path=file_model.file_path, inbox_dir_path=inbox_dir_path - ) - link_or_overwrite_file(src=file_model.file_path, dst=inbox_file_path) - - def _create_hard_links_for_delivery_files( - self, delivery_files: DeliveryFiles, inbox_dir_path: Path - ) -> None: - """Create hard links to the files in the customer folder.""" - LOG.debug(f"[MOVE SERVICE] Creating hard links for delivery files in: {inbox_dir_path}") - if delivery_files.case_files: - self._create_hard_link_file_paths( - file_models=delivery_files.case_files, inbox_dir_path=inbox_dir_path - ) - self._create_hard_link_file_paths( - file_models=delivery_files.sample_files, inbox_dir_path=inbox_dir_path - ) - - def _replace_file_path_with_inbox_dir_path( - self, file_models: list[SampleFile | CaseFile], inbox_dir_path: Path - ) -> list[SampleFile | CaseFile]: - """Replace the file path with the inbox path.""" - for file_model in file_models: - inbox_file_path: Path = self._create_inbox_file_path( - file_path=file_model.file_path, inbox_dir_path=inbox_dir_path - ) - file_model.file_path = inbox_file_path - return file_models - - def _replace_file_paths_with_inbox_dir_paths( - self, - delivery_files: DeliveryFiles, - inbox_dir_path: Path, - ) -> DeliveryFiles: - """ - Replace to original file paths in the delivery files with the customer inbox file paths. - """ - LOG.debug(f"[MOVE SERVICE] Replacing file paths with inbox dir path: {inbox_dir_path}") - if delivery_files.case_files: - delivery_files.case_files = self._replace_file_path_with_inbox_dir_path( - file_models=delivery_files.case_files, inbox_dir_path=inbox_dir_path - ) - delivery_files.sample_files = self._replace_file_path_with_inbox_dir_path( - file_models=delivery_files.sample_files, inbox_dir_path=inbox_dir_path - ) - return delivery_files diff --git a/cg/services/deliver_files/tag_fetcher/bam_service.py b/cg/services/deliver_files/tag_fetcher/bam_service.py index 571cf265df..6abf3a2830 100644 --- a/cg/services/deliver_files/tag_fetcher/bam_service.py +++ b/cg/services/deliver_files/tag_fetcher/bam_service.py @@ -14,7 +14,12 @@ class BamDeliveryTagsFetcher(FetchDeliveryFileTagsService): @handle_tag_errors def fetch_tags(self, workflow: Workflow) -> DeliveryFileTags: - """Fetch the tags for the bam files to deliver.""" + """ + Fetch the tags for the bam files to deliver. + Hardcoded to only return the BAM tag. + args: + workflow: The workflow to fetch tags for + """ self._validate_workflow(workflow=workflow) return DeliveryFileTags( case_tags=None, diff --git a/cg/services/deliver_files/tag_fetcher/fohm_upload_service.py b/cg/services/deliver_files/tag_fetcher/fohm_upload_service.py new file mode 100644 index 0000000000..e0a42e393f --- /dev/null +++ b/cg/services/deliver_files/tag_fetcher/fohm_upload_service.py @@ -0,0 +1,47 @@ +from cg.constants import Workflow, SequencingFileTag +from cg.services.deliver_files.tag_fetcher.abstract import FetchDeliveryFileTagsService +from cg.services.deliver_files.tag_fetcher.error_handling import handle_tag_errors +from cg.services.deliver_files.tag_fetcher.models import DeliveryFileTags + + +class FOHMUploadTagsFetcher(FetchDeliveryFileTagsService): + """Class to fetch tags for FOHM upload files.""" + + @handle_tag_errors + def fetch_tags(self, workflow: Workflow) -> DeliveryFileTags: + """ + Fetch the tags for the bam files to deliver. + NOTE: workflow raw data here is required to fit the implementation of the raw data delivery file fetcher. + if workflow is MUTANT, return tags for consensus-sample and vcf-report to fetch sample files from the case bundle. + if workflow is RAW_DATA, return tags for fastq to fetch fastq files from the sample bundle. + Required since some of the sample specific files are stored on the case bundle, but also fastq files. + Not separating these would cause fetching of case bundle fastq files if present. + + Hardcoded to only return the tags for the files to deliver. + args: + workflow: Workflow: The workflow to fetch tags + """ + self._validate_workflow(workflow=workflow) + return ( + DeliveryFileTags( + case_tags=None, + sample_tags=[{"consensus-sample"}, {"vcf-report"}], + ) + if workflow == Workflow.MUTANT + else DeliveryFileTags( + case_tags=None, + sample_tags=[{SequencingFileTag.FASTQ}], + ) + ) + + @staticmethod + def _validate_workflow(workflow: Workflow): + """ + Validate the workflow. + NOTE: workflow raw data here is required to fit the implementation of the raw data delivery file fetcher. + args: + workflow: Workflow: The workflow to validate. + """ + if workflow not in [Workflow.MUTANT, Workflow.RAW_DATA]: + raise ValueError(f"Workflow {workflow} is not supported for FOHM upload file delivery.") + return workflow diff --git a/cg/services/deliver_files/tag_fetcher/models.py b/cg/services/deliver_files/tag_fetcher/models.py index 580e95c663..791b7b767e 100644 --- a/cg/services/deliver_files/tag_fetcher/models.py +++ b/cg/services/deliver_files/tag_fetcher/models.py @@ -2,5 +2,11 @@ class DeliveryFileTags(BaseModel): + """ + Model to hold the tags for the files to deliver. + case_tags: The tags for the case files to deliver + sample_tags: The tags for the sample files to deliver + """ + case_tags: list[set[str]] | None sample_tags: list[set[str]] diff --git a/cg/services/deliver_files/tag_fetcher/sample_and_case_service.py b/cg/services/deliver_files/tag_fetcher/sample_and_case_service.py index 14bc032266..fe822b9b2b 100644 --- a/cg/services/deliver_files/tag_fetcher/sample_and_case_service.py +++ b/cg/services/deliver_files/tag_fetcher/sample_and_case_service.py @@ -13,7 +13,10 @@ class SampleAndCaseDeliveryTagsFetcher(FetchDeliveryFileTagsService): @handle_tag_errors def fetch_tags(self, workflow: Workflow) -> DeliveryFileTags: - """Get the case tags for the files that need to be delivered for a workflow.""" + """Get the case tags for the files that need to be delivered for a workflow. + args: + workflow: The workflow to fetch tags for + """ self._validate_workflow(workflow) return DeliveryFileTags( case_tags=PIPELINE_ANALYSIS_TAG_MAP[workflow]["case_tags"], diff --git a/cg/services/deliver_files/utils.py b/cg/services/deliver_files/utils.py new file mode 100644 index 0000000000..69452ef988 --- /dev/null +++ b/cg/services/deliver_files/utils.py @@ -0,0 +1,123 @@ +import logging +import os +from pathlib import Path + +from cg.services.deliver_files.file_fetcher.models import SampleFile, CaseFile + +LOG = logging.getLogger(__name__) + + +class FileManager: + """ + Service to manage files. + Handles operations that create or rename files and directories. + """ + + @staticmethod + def create_directories(base_path: Path, directories: set[str]) -> None: + """Create directories for given names under the base path. + args: + base_path: The base path to create the directories under. + directories: The directories to create within the given base path. Can be a list of one. + """ + + for directory in directories: + LOG.debug(f"[FileManager] Creating directory or file: {base_path}/{directory}") + Path(base_path, directory).mkdir(parents=True, exist_ok=True) + + @staticmethod + def rename_file(src: Path, dst: Path) -> None: + """ + Rename a file from src to dst. + raise ValueError if src does not exist. + args: + src: The source file path. + dst: The destination file path. + """ + if not src or not dst: + raise ValueError("Source and destination paths cannot be None.") + LOG.debug(f"[FileManager] Renaming file: {src} -> {dst}") + if not src.exists(): + raise FileNotFoundError(f"Source file {src} does not exist.") + os.rename(src=src, dst=dst) + + @staticmethod + def create_hard_link(src: Path, dst: Path) -> None: + """ + Create a hard link from src to dst. + args: + src: The source file path. + dst: The destination file path. + """ + LOG.debug(f"[FileManager] Creating hard link: {src} -> {dst}") + os.link(src=src, dst=dst) + + +class FileMover: + """ + Service class to move files. + Requires a file management service to perform file operations. + """ + + def __init__(self, file_manager): + """ + args: + file_manager: Service for file operations (e.g., create directories, move files). + """ + self.file_management_service = file_manager + + def create_directories(self, base_path: Path, directories: set[str]) -> None: + """Create required directories. + args: + base_path: The base path to create the directories under. + directories: The directories to create. + """ + self.file_management_service.create_directories(base_path, directories) + + def move_files_to_directory(self, file_models: list, target_dir: Path) -> None: + """Move files to the target directory. + args: + file_models: The file models that contain the files to move. + target_dir: The directory to move the files to. + """ + for file_model in file_models: + target_path = Path(target_dir, file_model.file_path.name) + self._move_or_link_file(src=file_model.file_path, dst=target_path) + + @staticmethod + def update_file_paths( + file_models: list[CaseFile | SampleFile], target_dir: Path + ) -> list[CaseFile | SampleFile]: + """Update file paths to point to the target directory. + args: + file_models: The file models to update. + target_dir: The target directory to point the file paths to. + """ + for file_model in file_models: + file_model.file_path = Path(target_dir, file_model.file_path.name) + return file_models + + def move_and_update_files( + self, file_models: list[CaseFile | SampleFile], target_dir: Path + ) -> list[CaseFile | SampleFile]: + """Move files to the target directory and update the file paths. + args: + file_models: The file models that contain the files to move. + target_dir: The directory to move the files to. + """ + if file_models: + self.move_files_to_directory(file_models=file_models, target_dir=target_dir) + return self.update_file_paths(file_models=file_models, target_dir=target_dir) + return file_models + + def _move_or_link_file(self, src: Path, dst: Path) -> None: + """Move or create a hard link for a file. + args: + src: The source file path + dst: The destination file path + """ + LOG.debug(f"[FileMover] Moving file: {src} -> {dst}") + if dst.exists(): + LOG.debug(f"Overwriting existing file: {dst}") + dst.unlink() + self.file_management_service.create_hard_link(src=src, dst=dst) diff --git a/cg/services/fastq_concatenation_service/fastq_concatenation_service.py b/cg/services/fastq_concatenation_service/fastq_concatenation_service.py index c36673cee6..4aaec3cf02 100644 --- a/cg/services/fastq_concatenation_service/fastq_concatenation_service.py +++ b/cg/services/fastq_concatenation_service/fastq_concatenation_service.py @@ -1,28 +1,48 @@ import logging from pathlib import Path +from cg.constants.constants import ReadDirection from cg.services.fastq_concatenation_service.utils import ( - concatenate_forward_reads, - concatenate_reverse_reads, remove_raw_fastqs, + concatenate_fastq_reads_for_direction, ) LOG = logging.getLogger(__name__) class FastqConcatenationService: + """Fastq file concatenation service.""" + + @staticmethod def concatenate( - self, + sample_id: str, fastq_directory: Path, forward_output_path: Path, reverse_output_path: Path, remove_raw: bool = False, ): - temp_forward: Path | None = concatenate_forward_reads(fastq_directory) - temp_reverse: Path | None = concatenate_reverse_reads(fastq_directory) + """Concatenate fastq files for a given sample in a directory and write the concatenated files to the output path. + + Args: + sample_id: The identifier to identify the samples by it should be a unique identifier in the file name. + fastq_directory: The directory containing the fastq files. + forward_output_path: The path where the concatenated forward reads will be written. + reverse_output_path: The path where the concatenated reverse reads will be written. + remove_raw: If True, remove the raw fastq files after concatenation. + """ + LOG.debug( + f"[Concatenation Service] Concatenating fastq files for {sample_id} in {fastq_directory}" + ) + temp_forward: Path | None = concatenate_fastq_reads_for_direction( + directory=fastq_directory, sample_id=sample_id, direction=ReadDirection.FORWARD + ) + temp_reverse: Path | None = concatenate_fastq_reads_for_direction( + directory=fastq_directory, sample_id=sample_id, direction=ReadDirection.REVERSE + ) if remove_raw: remove_raw_fastqs( + sample_id=sample_id, fastq_directory=fastq_directory, forward_file=temp_forward, reverse_file=temp_reverse, diff --git a/cg/services/fastq_concatenation_service/utils.py b/cg/services/fastq_concatenation_service/utils.py index b3196cd7cf..bfeb12c39e 100644 --- a/cg/services/fastq_concatenation_service/utils.py +++ b/cg/services/fastq_concatenation_service/utils.py @@ -8,8 +8,12 @@ from cg.constants import FileExtensions -def concatenate_forward_reads(directory: Path) -> Path | None: - fastqs: list[Path] = get_forward_read_fastqs(directory) +def concatenate_fastq_reads_for_direction( + directory: Path, sample_id: str, direction: ReadDirection +) -> Path | None: + fastqs: list[Path] = get_fastqs_by_direction( + fastq_directory=directory, direction=direction, sample_id=sample_id + ) if not fastqs: return output_file: Path = get_new_unique_file(directory) @@ -18,31 +22,19 @@ def concatenate_forward_reads(directory: Path) -> Path | None: return output_file -def concatenate_reverse_reads(directory: Path) -> Path | None: - fastqs: list[Path] = get_reverse_read_fastqs(directory) - if not fastqs: - return - file: Path = get_new_unique_file(directory) - concatenate(input_files=fastqs, output_file=file) - validate_concatenation(input_files=fastqs, output_file=file) - return file - - def get_new_unique_file(directory: Path) -> Path: unique_id = uuid.uuid4() return Path(directory, f"{unique_id}{FileExtensions.FASTQ}{FileExtensions.GZIP}") -def get_forward_read_fastqs(fastq_directory: Path) -> list[Path]: - return get_fastqs_by_direction(fastq_directory=fastq_directory, direction=ReadDirection.FORWARD) - - -def get_reverse_read_fastqs(fastq_directory: Path) -> list[Path]: - return get_fastqs_by_direction(fastq_directory=fastq_directory, direction=ReadDirection.REVERSE) - - -def get_fastqs_by_direction(fastq_directory: Path, direction: int) -> list[Path]: - pattern = f".+_R{direction}_[0-9]+{FileExtensions.FASTQ}{FileExtensions.GZIP}" +def get_fastqs_by_direction(fastq_directory: Path, direction: int, sample_id: str) -> list[Path]: + """Get fastq files by direction and sample id in a given directory. + args: + fastq_directory: Path: The directory containing the fastq files. + direction: int: The direction of the reads. + sample_id: str: The identifier to identify the samples by it should be a unique identifier in the file name. + """ + pattern = f".*{sample_id}.*_R{direction}_[0-9]+{FileExtensions.FASTQ}{FileExtensions.GZIP}" fastqs: list[Path] = [] for file in fastq_directory.iterdir(): if re.match(pattern, file.name): @@ -72,17 +64,30 @@ def sort_files_by_name(files: list[Path]) -> list[Path]: return sorted(files, key=lambda file: file.name) -def file_can_be_removed(file: Path, forward_file: Path, reverse_file: Path) -> bool: +def file_can_be_removed(file: Path, forward_file: Path, reverse_file: Path, sample_id: str) -> bool: + """ + Check if a file can be removed. + args: + file: Path: The file to check. + forward_file: Path: The forward file. + reverse_file: Path: The reverse file. + sample_id: str: The identifier to identify the samples by it should be a unique identifier in the file name. + """ return ( f"{FileFormat.FASTQ}{FileExtensions.GZIP}" in file.name + and sample_id in file.name and file != forward_file and file != reverse_file ) -def remove_raw_fastqs(fastq_directory: Path, forward_file: Path, reverse_file: Path) -> None: +def remove_raw_fastqs( + fastq_directory: Path, forward_file: Path, reverse_file: Path, sample_id: str +) -> None: for file in fastq_directory.iterdir(): - if file_can_be_removed(file=file, forward_file=forward_file, reverse_file=reverse_file): + if file_can_be_removed( + file=file, forward_file=forward_file, reverse_file=reverse_file, sample_id=sample_id + ): file.unlink() @@ -92,23 +97,3 @@ def generate_concatenated_fastq_delivery_path( return Path( fastq_directory, f"{sample_name}_{direction}{FileExtensions.FASTQ}{FileExtensions.GZIP}" ) - - -def generate_forward_concatenated_fastq_delivery_path( - fastq_directory: Path, sample_name: str -) -> Path: - return generate_concatenated_fastq_delivery_path( - fastq_directory=fastq_directory, - sample_name=sample_name, - direction=ReadDirection.FORWARD, - ) - - -def generate_reverse_concatenated_fastq_delivery_path( - fastq_directory: Path, sample_name: str -) -> Path: - return generate_concatenated_fastq_delivery_path( - fastq_directory=fastq_directory, - sample_name=sample_name, - direction=ReadDirection.REVERSE, - ) diff --git a/tests/conftest.py b/tests/conftest.py index 68903ee995..79b389e3dd 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -4054,7 +4054,6 @@ def store_with_case_and_sample_with_reads( customer_id=case.customer_id, ticket_id=case.latest_ticket, order_date=case.ordered_at, - workflow=case.data_analysis, ) case.orders.append(order) for sample_internal_id in [downsample_sample_internal_id_1, downsample_sample_internal_id_2]: diff --git a/tests/fixture_plugins/delivery_fixtures/bundle_fixtures.py b/tests/fixture_plugins/delivery_fixtures/bundle_fixtures.py index 4196daa0ba..489a3f5a99 100644 --- a/tests/fixture_plugins/delivery_fixtures/bundle_fixtures.py +++ b/tests/fixture_plugins/delivery_fixtures/bundle_fixtures.py @@ -100,3 +100,59 @@ def hk_delivery_case_bundle( }, ] return case_hk_bundle + + +@pytest.fixture +def hk_delivery_case_bundle_fohm_upload( + case_hk_bundle_no_files: dict[str, Any], + case_id: str, + sample_id: str, + another_sample_id: str, + delivery_report_file: Path, + delivery_case_fastq_file: Path, + delivery_another_case_fastq_file: Path, + delivery_consensus_sample_file: Path, + delivery_another_consensus_sample_file: Path, + delivery_vcf_report_file: Path, + delivery_another_vcf_report_file: Path, +) -> dict: + case_hk_bundle: dict[str, Any] = deepcopy(case_hk_bundle_no_files) + case_hk_bundle["name"] = case_id + case_hk_bundle["files"] = [ + { + "archive": False, + "path": delivery_report_file.as_posix(), + "tags": [HK_DELIVERY_REPORT_TAG, case_id], + }, + { + "archive": False, + "path": delivery_case_fastq_file.as_posix(), + "tags": ["fastq", sample_id], + }, + { + "archive": False, + "path": delivery_another_case_fastq_file.as_posix(), + "tags": ["fastq", another_sample_id], + }, + { + "archive": False, + "path": delivery_consensus_sample_file.as_posix(), + "tags": ["consensus-sample", sample_id], + }, + { + "archive": False, + "path": delivery_another_consensus_sample_file.as_posix(), + "tags": ["consensus-sample", another_sample_id], + }, + { + "archive": False, + "path": delivery_vcf_report_file.as_posix(), + "tags": ["vcf-report", sample_id], + }, + { + "archive": False, + "path": delivery_another_vcf_report_file.as_posix(), + "tags": ["vcf-report", another_sample_id], + }, + ] + return case_hk_bundle diff --git a/tests/fixture_plugins/delivery_fixtures/context_fixtures.py b/tests/fixture_plugins/delivery_fixtures/context_fixtures.py index 3c217896c0..95a8e576be 100644 --- a/tests/fixture_plugins/delivery_fixtures/context_fixtures.py +++ b/tests/fixture_plugins/delivery_fixtures/context_fixtures.py @@ -7,7 +7,7 @@ from cg.apps.housekeeper.hk import HousekeeperAPI from cg.constants import DataDelivery, Workflow from cg.models.cg_config import CGConfig -from cg.store.models import Case, Sample +from cg.store.models import Case, Sample, Order from cg.store.store import Store from tests.store_helpers import StoreHelpers @@ -21,16 +21,33 @@ def delivery_housekeeper_api( hk_delivery_case_bundle: dict[str, Any], ) -> HousekeeperAPI: """Delivery API Housekeeper context.""" + hk_api: HousekeeperAPI = real_housekeeper_api + helpers.ensure_hk_bundle(store=hk_api, bundle_data=hk_delivery_sample_bundle, include=True) helpers.ensure_hk_bundle( - store=real_housekeeper_api, bundle_data=hk_delivery_sample_bundle, include=True + store=hk_api, bundle_data=hk_delivery_another_sample_bundle, include=True ) + helpers.ensure_hk_bundle(store=hk_api, bundle_data=hk_delivery_case_bundle, include=True) + return hk_api + + +@pytest.fixture +def delivery_fohm_upload_housekeeper_api( + real_housekeeper_api: HousekeeperAPI, + helpers: StoreHelpers, + hk_delivery_case_bundle_fohm_upload: dict[str, Any], + hk_delivery_sample_bundle: dict[str, Any], + hk_delivery_another_sample_bundle: dict[str, Any], +) -> HousekeeperAPI: + """Delivery API Housekeeper context.""" + hk_api: HousekeeperAPI = real_housekeeper_api + helpers.ensure_hk_bundle(store=hk_api, bundle_data=hk_delivery_sample_bundle, include=True) helpers.ensure_hk_bundle( - store=real_housekeeper_api, bundle_data=hk_delivery_another_sample_bundle, include=True + store=hk_api, bundle_data=hk_delivery_another_sample_bundle, include=True ) helpers.ensure_hk_bundle( - store=real_housekeeper_api, bundle_data=hk_delivery_case_bundle, include=True + store=hk_api, bundle_data=hk_delivery_case_bundle_fohm_upload, include=True ) - return real_housekeeper_api + return hk_api @pytest.fixture @@ -112,7 +129,8 @@ def delivery_store_microsalt( data_analysis=Workflow.MICROSALT, data_delivery=DataDelivery.FASTQ_QC, ) - + order: Order = helpers.add_order(store=status_db, customer_id=case.customer.id, ticket_id=1) + case.orders.append(order) # MicroSALT samples sample: Sample = helpers.add_sample( store=status_db, @@ -143,6 +161,68 @@ def delivery_store_microsalt( return status_db +@pytest.fixture +def delivery_store_mutant( + cg_context: CGConfig, + helpers: StoreHelpers, + case_id: str, + no_sample_case_id: str, + case_name: str, + sample_id: str, + another_sample_id: str, + sample_id_not_enough_reads: str, + total_sequenced_reads_pass: int, + total_sequenced_reads_not_pass: int, + sample_name: str, + another_sample_name: str, + microbial_application_tag: str, +) -> Store: + """Delivery API StatusDB context for Mutant.""" + status_db: Store = cg_context.status_db + + # Error case without samples + helpers.add_case(store=status_db, internal_id=no_sample_case_id, name=no_sample_case_id) + + # Mutant case with fastq-analysis as data delivery + case: Case = helpers.add_case( + store=status_db, + internal_id=case_id, + name=case_name, + data_analysis=Workflow.MUTANT, + data_delivery=DataDelivery.FASTQ_ANALYSIS, + ) + order: Order = helpers.add_order(store=status_db, customer_id=case.customer.id, ticket_id=1) + case.orders.append(order) + # Mutant samples + sample: Sample = helpers.add_sample( + store=status_db, + application_tag=microbial_application_tag, + internal_id=sample_id, + name=sample_name, + reads=total_sequenced_reads_pass, + ) + + another_sample: Sample = helpers.add_sample( + store=status_db, + application_tag=microbial_application_tag, + internal_id=another_sample_id, + name=another_sample_name, + reads=total_sequenced_reads_pass, + ) + + sample_not_enough_reads: Sample = helpers.add_sample( + store=status_db, + application_tag=microbial_application_tag, + internal_id=sample_id_not_enough_reads, + reads=total_sequenced_reads_not_pass, + ) + + for sample_mutant in [sample, another_sample, sample_not_enough_reads]: + helpers.add_relationship(store=status_db, case=case, sample=sample_mutant) + + return status_db + + @pytest.fixture def delivery_context_balsamic( cg_context: CGConfig, diff --git a/tests/fixture_plugins/delivery_fixtures/delivery_files_models_fixtures.py b/tests/fixture_plugins/delivery_fixtures/delivery_files_models_fixtures.py index a252c4791c..ea2b2e8337 100644 --- a/tests/fixture_plugins/delivery_fixtures/delivery_files_models_fixtures.py +++ b/tests/fixture_plugins/delivery_fixtures/delivery_files_models_fixtures.py @@ -1,3 +1,4 @@ +import os from pathlib import Path import pytest @@ -15,6 +16,7 @@ DeliveryMetaData, SampleFile, ) +from cg.services.deliver_files.file_formatter.destination.models import FormattedFile from cg.store.models import Case from cg.store.store import Store @@ -89,6 +91,78 @@ def expected_bam_delivery_files( return DeliveryFiles(delivery_data=delivery_meta_data, case_files=[], sample_files=sample_files) +@pytest.fixture() +def expected_bam_delivery_files_single_sample( + expected_bam_delivery_files: DeliveryFiles, sample_id: str +) -> DeliveryFiles: + expected_bam_delivery_files.sample_files = [ + sample_file + for sample_file in expected_bam_delivery_files.sample_files + if sample_file.sample_id == sample_id + ] + return expected_bam_delivery_files + + +@pytest.fixture +def expected_fohm_delivery_files( + delivery_fohm_upload_housekeeper_api: HousekeeperAPI, + case_id: str, + case_name: str, + sample_id: str, + sample_name: str, + another_sample_id: str, + another_sample_name: str, + delivery_store_mutant: Store, +) -> DeliveryFiles: + """Return the expected fastq delivery files.""" + sample_info: list[tuple[str, str]] = [ + (sample_id, sample_name), + (another_sample_id, another_sample_name), + ] + sample_files: list[SampleFile] = [ + SampleFile( + case_id=case_id, + sample_id=sample[0], + sample_name=sample[1], + file_path=delivery_fohm_upload_housekeeper_api.get_files_from_latest_version( + bundle_name=sample[0], tags=[SequencingFileTag.FASTQ] + )[0].full_path, + ) + for sample in sample_info + ] + case_sample_info: list[tuple[str, str, str]] = [ + (sample_id, sample_name, "consensus-sample"), + (sample_id, sample_name, "vcf-report"), + (another_sample_id, another_sample_name, "consensus-sample"), + (another_sample_id, another_sample_name, "vcf-report"), + ] + case_sample_files: list[SampleFile] = [ + SampleFile( + case_id=case_id, + sample_id=sample[0], + sample_name=sample[1], + file_path=delivery_fohm_upload_housekeeper_api.get_files_from_latest_version_containing_tags( + bundle_name=case_id, tags=[{sample[2], sample[0]}] + )[ + 0 + ].full_path, + ) + for sample in case_sample_info + ] + + case: Case = delivery_store_mutant.get_case_by_internal_id(case_id) + delivery_meta_data = DeliveryMetaData( + case_id=case.internal_id, + customer_internal_id=case.customer.internal_id, + ticket_id=case.latest_ticket, + ) + return DeliveryFiles( + delivery_data=delivery_meta_data, + case_files=[], + sample_files=case_sample_files + sample_files, + ) + + @pytest.fixture def expected_analysis_delivery_files( delivery_housekeeper_api: HousekeeperAPI, @@ -152,7 +226,7 @@ def expected_moved_fastq_delivery_files( INBOX_NAME, delivery_files.delivery_data.ticket_id, ) - delivery_files.delivery_data.customer_ticket_inbox = inbox_dir_path + delivery_files.delivery_data.delivery_path = inbox_dir_path new_sample_files: list[SampleFile] = swap_file_paths_with_inbox_paths( file_models=delivery_files.sample_files, inbox_dir_path=inbox_dir_path ) @@ -175,7 +249,7 @@ def expected_moved_analysis_delivery_files( INBOX_NAME, delivery_files.delivery_data.ticket_id, ) - delivery_files.delivery_data.customer_ticket_inbox = inbox_dir_path + delivery_files.delivery_data.delivery_path = inbox_dir_path new_case_files: list[CaseFile] = swap_file_paths_with_inbox_paths( file_models=delivery_files.case_files, inbox_dir_path=inbox_dir_path ) @@ -214,23 +288,65 @@ def expected_moved_analysis_case_delivery_files( @pytest.fixture -def fastq_concatenation_sample_files(tmp_path: Path) -> list[SampleFile]: - some_ticket: str = "some_ticket" - fastq_paths: list[Path] = [ - Path(tmp_path, some_ticket, "S1_1_R1_1.fastq.gz"), - Path(tmp_path, some_ticket, "S1_2_R1_1.fastq.gz"), - Path(tmp_path, some_ticket, "S1_1_R2_1.fastq.gz"), - Path(tmp_path, some_ticket, "S1_2_R2_1.fastq.gz"), - ] - return [ - SampleFile( - sample_id="S1", - case_id="Case1", - sample_name="Sample1", - file_path=fastq_path, +def fastq_concatenation_sample_files( + tmp_path: Path, expected_fastq_delivery_files: DeliveryFiles +) -> list[SampleFile]: + """ + Return a list of sample files that are to be concatenated. + """ + inbox = Path( + expected_fastq_delivery_files.delivery_data.customer_internal_id, + INBOX_NAME, + expected_fastq_delivery_files.delivery_data.ticket_id, + ) + sample_data = [("Sample_ID1", "Sample_Name1"), ("Sample_ID2", "Sample_Name2")] + sample_files = [] + for sample_id, sample_name in sample_data: + fastq_paths: list[Path] = [ + Path(tmp_path, inbox, f"{sample_id}_L001_R1_001.fastq.gz"), + Path(tmp_path, inbox, f"{sample_id}_L002_R1_001.fastq.gz"), + Path(tmp_path, inbox, f"{sample_id}_L001_R2_001.fastq.gz"), + Path(tmp_path, inbox, f"{sample_id}_L002_R2_001.fastq.gz"), + ] + + sample_files.extend( + [ + SampleFile( + sample_id=sample_id, + case_id="Case1", + sample_name=sample_name, + file_path=fastq_path, + ) + for fastq_path in fastq_paths + ] ) - for fastq_path in fastq_paths - ] + return sample_files + + +@pytest.fixture +def fastq_concatenation_sample_files_flat(tmp_path: Path) -> list[SampleFile]: + sample_data = [("Sample_ID2", "Sample_Name2"), ("Sample_ID1", "Sample_Name1")] + sample_files = [] + for sample_id, sample_name in sample_data: + fastq_paths: list[Path] = [ + Path(tmp_path, f"{sample_id}_L001_R1_001.fastq.gz"), + Path(tmp_path, f"{sample_id}_L002_R1_001.fastq.gz"), + Path(tmp_path, f"{sample_id}_L001_R2_001.fastq.gz"), + Path(tmp_path, f"{sample_id}_L002_R2_001.fastq.gz"), + ] + + sample_files.extend( + [ + SampleFile( + sample_id=sample_id, + case_id="Case1", + sample_name=sample_name, + file_path=fastq_path, + ) + for fastq_path in fastq_paths + ] + ) + return sample_files def swap_file_paths_with_inbox_paths( @@ -243,3 +359,57 @@ def swap_file_paths_with_inbox_paths( new_file_model.file_path = Path(inbox_dir_path, file_model.file_path.name) new_file_models.append(new_file_model) return new_file_models + + +@pytest.fixture +def lims_naming_metadata() -> str: + return "01_SE100_" + + +@pytest.fixture +def expected_mutant_formatted_files( + expected_concatenated_fastq_formatted_files, lims_naming_metadata +) -> list[FormattedFile]: + unique_combinations = [] + for formatted_file in expected_concatenated_fastq_formatted_files: + formatted_file.original_path = formatted_file.formatted_path + formatted_file.formatted_path = Path( + formatted_file.formatted_path.parent, + f"{lims_naming_metadata}{formatted_file.formatted_path.name}", + ) + if formatted_file not in unique_combinations: + unique_combinations.append(formatted_file) + return unique_combinations + + +@pytest.fixture +def mutant_moved_files(fastq_concatenation_sample_files) -> list[SampleFile]: + return fastq_concatenation_sample_files + + +@pytest.fixture +def expected_upload_files(expected_analysis_delivery_files: DeliveryFiles): + return expected_analysis_delivery_files + + +@pytest.fixture +def expected_moved_upload_files(expected_analysis_delivery_files: DeliveryFiles, tmp_path: Path): + delivery_files = DeliveryFiles(**expected_analysis_delivery_files.model_dump()) + delivery_files.delivery_data.delivery_path = tmp_path + new_case_files: list[CaseFile] = swap_file_paths_with_inbox_paths( + file_models=delivery_files.case_files, inbox_dir_path=tmp_path + ) + new_sample_files: list[SampleFile] = swap_file_paths_with_inbox_paths( + file_models=delivery_files.sample_files, inbox_dir_path=tmp_path + ) + + return DeliveryFiles( + delivery_data=delivery_files.delivery_data, + case_files=new_case_files, + sample_files=new_sample_files, + ) + + +@pytest.fixture +def empty_sample() -> None: + return None diff --git a/tests/fixture_plugins/delivery_fixtures/delivery_formatted_files_fixtures.py b/tests/fixture_plugins/delivery_fixtures/delivery_formatted_files_fixtures.py index 89b614b584..2e90df0f80 100644 --- a/tests/fixture_plugins/delivery_fixtures/delivery_formatted_files_fixtures.py +++ b/tests/fixture_plugins/delivery_fixtures/delivery_formatted_files_fixtures.py @@ -2,8 +2,8 @@ import pytest -from cg.services.deliver_files.file_fetcher.models import DeliveryFiles, SampleFile -from cg.services.deliver_files.file_formatter.models import FormattedFile +from cg.services.deliver_files.file_fetcher.models import DeliveryFiles +from cg.services.deliver_files.file_formatter.destination.models import FormattedFile @pytest.fixture @@ -42,6 +42,22 @@ def expected_formatted_analysis_sample_files( return formatted_files +@pytest.fixture +def expected_flat_formatted_analysis_sample_files( + expected_moved_analysis_delivery_files: DeliveryFiles, +) -> list[FormattedFile]: + formatted_files: list[FormattedFile] = [] + for sample_file in expected_moved_analysis_delivery_files.sample_files: + replaced_sample_file_name: str = sample_file.file_path.name.replace( + sample_file.sample_id, sample_file.sample_name + ) + formatted_file_path = Path(sample_file.file_path.parent, replaced_sample_file_name) + formatted_files.append( + FormattedFile(original_path=sample_file.file_path, formatted_path=formatted_file_path) + ) + return formatted_files + + @pytest.fixture def expected_formatted_fastq_sample_files( expected_moved_fastq_delivery_files: DeliveryFiles, @@ -69,10 +85,10 @@ def expected_concatenated_fastq_formatted_files( replaced_sample_file_name: str = sample_file.file_path.name.replace( sample_file.sample_id, sample_file.sample_name ) - replaced_sample_file_name = replaced_sample_file_name.replace("1_R1_1", "1") - replaced_sample_file_name = replaced_sample_file_name.replace("2_R1_1", "1") - replaced_sample_file_name = replaced_sample_file_name.replace("1_R2_1", "2") - replaced_sample_file_name = replaced_sample_file_name.replace("2_R2_1", "2") + replaced_sample_file_name = replaced_sample_file_name.replace("L001_R1_001", "1") + replaced_sample_file_name = replaced_sample_file_name.replace("L002_R1_001", "1") + replaced_sample_file_name = replaced_sample_file_name.replace("L001_R2_001", "2") + replaced_sample_file_name = replaced_sample_file_name.replace("L002_R2_001", "2") formatted_file_path = Path( sample_file.file_path.parent, sample_file.sample_name, replaced_sample_file_name ) @@ -82,6 +98,26 @@ def expected_concatenated_fastq_formatted_files( return formatted_files +@pytest.fixture +def expected_concatenated_fastq_flat_formatted_files( + fastq_concatenation_sample_files_flat, +) -> list[FormattedFile]: + formatted_files: list[FormattedFile] = [] + for sample_file in fastq_concatenation_sample_files_flat: + replaced_sample_file_name: str = sample_file.file_path.name.replace( + sample_file.sample_id, sample_file.sample_name + ) + replaced_sample_file_name = replaced_sample_file_name.replace("L001_R1_001", "1") + replaced_sample_file_name = replaced_sample_file_name.replace("L002_R1_001", "1") + replaced_sample_file_name = replaced_sample_file_name.replace("L001_R2_001", "2") + replaced_sample_file_name = replaced_sample_file_name.replace("L002_R2_001", "2") + formatted_file_path = Path(sample_file.file_path.parent, replaced_sample_file_name) + formatted_files.append( + FormattedFile(original_path=sample_file.file_path, formatted_path=formatted_file_path) + ) + return formatted_files + + @pytest.fixture def empty_case_files() -> list: return [] diff --git a/tests/fixture_plugins/delivery_fixtures/delivery_services_fixtures.py b/tests/fixture_plugins/delivery_fixtures/delivery_services_fixtures.py index 5d81346d36..60d898ed81 100644 --- a/tests/fixture_plugins/delivery_fixtures/delivery_services_fixtures.py +++ b/tests/fixture_plugins/delivery_fixtures/delivery_services_fixtures.py @@ -1,12 +1,14 @@ import pytest from cg.apps.housekeeper.hk import HousekeeperAPI +from cg.services.deliver_files.file_fetcher.analysis_raw_data_service import ( + RawDataAndAnalysisDeliveryFileFetcher, +) +from cg.services.deliver_files.file_formatter.destination.base_service import BaseDeliveryFormatter from cg.services.deliver_files.tag_fetcher.bam_service import ( BamDeliveryTagsFetcher, ) -from cg.services.fastq_concatenation_service.fastq_concatenation_service import ( - FastqConcatenationService, -) +from cg.services.deliver_files.tag_fetcher.fohm_upload_service import FOHMUploadTagsFetcher from cg.services.deliver_files.tag_fetcher.sample_and_case_service import ( SampleAndCaseDeliveryTagsFetcher, ) @@ -16,19 +18,15 @@ from cg.services.deliver_files.file_fetcher.raw_data_service import ( RawDataDeliveryFileFetcher, ) -from cg.services.deliver_files.file_formatter.service import ( - DeliveryFileFormatter, -) -from cg.services.deliver_files.file_formatter.utils.case_service import ( +from cg.services.deliver_files.file_formatter.files.case_service import ( CaseFileFormatter, ) -from cg.services.deliver_files.file_formatter.utils.sample_concatenation_service import ( - SampleFileConcatenationFormatter, -) -from cg.services.deliver_files.file_formatter.utils.sample_service import ( +from cg.services.deliver_files.file_formatter.files.sample_service import ( SampleFileFormatter, - FileManagingService, - SampleFileNameFormatter, + FileManager, +) +from cg.services.deliver_files.file_formatter.path_name.nested_structure import ( + NestedStructurePathFormatter, ) from cg.store.store import Store @@ -89,6 +87,20 @@ def bam_data_delivery_service_no_housekeeper_bundle( ) +@pytest.fixture +def fohm_data_delivery_service( + delivery_fohm_upload_housekeeper_api: HousekeeperAPI, + delivery_store_mutant: Store, +) -> RawDataAndAnalysisDeliveryFileFetcher: + """Fixture to get an instance of FetchFastqDeliveryFilesService.""" + tag_service = FOHMUploadTagsFetcher() + return RawDataAndAnalysisDeliveryFileFetcher( + hk_api=delivery_fohm_upload_housekeeper_api, + status_db=delivery_store_mutant, + tags_fetcher=tag_service, + ) + + @pytest.fixture def analysis_delivery_service( delivery_housekeeper_api: HousekeeperAPI, @@ -118,11 +130,14 @@ def analysis_delivery_service_no_housekeeper_bundle( @pytest.fixture -def generic_delivery_file_formatter() -> DeliveryFileFormatter: +def generic_delivery_file_formatter() -> BaseDeliveryFormatter: """Fixture to get an instance of GenericDeliveryFileFormatter.""" - return DeliveryFileFormatter( + return BaseDeliveryFormatter( sample_file_formatter=SampleFileFormatter( - file_manager=FileManagingService(), file_name_formatter=SampleFileNameFormatter() + file_manager=FileManager(), path_name_formatter=NestedStructurePathFormatter() + ), + case_file_formatter=CaseFileFormatter( + file_manager=FileManager(), + path_name_formatter=NestedStructurePathFormatter(), ), - case_file_formatter=CaseFileFormatter(), ) diff --git a/tests/fixture_plugins/delivery_fixtures/path_fixtures.py b/tests/fixture_plugins/delivery_fixtures/path_fixtures.py index 06b77d6959..22d682b014 100644 --- a/tests/fixture_plugins/delivery_fixtures/path_fixtures.py +++ b/tests/fixture_plugins/delivery_fixtures/path_fixtures.py @@ -9,7 +9,18 @@ @pytest.fixture def delivery_fastq_file(tmp_path: Path, sample_id: str) -> Path: - file = Path(tmp_path, f"{sample_id}_R1_001{FileExtensions.FASTQ_GZ}") + file = Path(tmp_path, f"{sample_id}_L001_R1_001{FileExtensions.FASTQ_GZ}") + file.touch() + return file + + +@pytest.fixture +def delivery_case_fastq_file(tmp_path: Path, sample_id: str) -> Path: + """ + This represents a fastq file stored on a case bundle. Mutant stored file like this in the past. + This fixture servers the purpose to make sure these files are not fetched during delivery. + """ + file = Path(tmp_path, f"{sample_id}_concat_{FileExtensions.FASTQ_GZ}") file.touch() return file @@ -23,7 +34,18 @@ def delivery_bam_file(tmp_path: Path, sample_id: str) -> Path: @pytest.fixture def delivery_another_fastq_file(tmp_path: Path, another_sample_id: str) -> Path: - file = Path(tmp_path, f"{another_sample_id}_R1_001{FileExtensions.FASTQ_GZ}") + file = Path(tmp_path, f"{another_sample_id}L001_R1_001{FileExtensions.FASTQ_GZ}") + file.touch() + return file + + +@pytest.fixture +def delivery_another_case_fastq_file(tmp_path: Path, another_sample_id: str) -> Path: + """ + This represents a fastq file stored on a case bundle. Mutant stored file like this in the past. + This fixture servers the purpose to make sure these files are not fetched during delivery. + """ + file = Path(tmp_path, f"{another_sample_id}_concat_{FileExtensions.FASTQ_GZ}") file.touch() return file @@ -73,3 +95,31 @@ def delivery_another_cram_file(tmp_path: Path, another_sample_id: str) -> Path: @pytest.fixture def delivery_ticket_dir_path(tmp_path: Path, ticket_id: str) -> Path: return Path(tmp_path, ticket_id) + + +@pytest.fixture +def delivery_consensus_sample_file(tmp_path: Path, sample_id: str) -> Path: + file = Path(tmp_path, f"{sample_id}_consensus_sample{FileExtensions.VCF}") + file.touch() + return file + + +@pytest.fixture +def delivery_another_consensus_sample_file(tmp_path: Path, another_sample_id: str) -> Path: + file = Path(tmp_path, f"{another_sample_id}_consensus_sample{FileExtensions.VCF}") + file.touch() + return file + + +@pytest.fixture +def delivery_vcf_report_file(tmp_path: Path, sample_id: str) -> Path: + file = Path(tmp_path, f"{sample_id}_vcf_report{FileExtensions.VCF}") + file.touch() + return file + + +@pytest.fixture +def delivery_another_vcf_report_file(tmp_path: Path, another_sample_id: str) -> Path: + file = Path(tmp_path, f"{another_sample_id}_vcf_report{FileExtensions.VCF}") + file.touch() + return file diff --git a/tests/fixture_plugins/fohm/fohm_fixtures.py b/tests/fixture_plugins/fohm/fohm_fixtures.py index 17531570e3..eef17e9f4c 100644 --- a/tests/fixture_plugins/fohm/fohm_fixtures.py +++ b/tests/fixture_plugins/fohm/fohm_fixtures.py @@ -109,7 +109,7 @@ def fohm_upload_api( cg_context: CGConfig, mocker: MockFixture, helpers: StoreHelpers ) -> FOHMUploadAPI: """FOHM upload API fixture.""" - fohm_upload_api = FOHMUploadAPI(cg_context) + fohm_upload_api = FOHMUploadAPI(config=cg_context) # Mock getting Sample object from StatusDB mocker.patch.object( diff --git a/tests/fixtures/orderforms/2184.10.sarscov2.xlsx b/tests/fixtures/orderforms/2184.10.sarscov2.xlsx new file mode 100644 index 0000000000..80b49b58a3 Binary files /dev/null and b/tests/fixtures/orderforms/2184.10.sarscov2.xlsx differ diff --git a/tests/fixtures/orderforms/2184.9.sarscov2.xlsx b/tests/fixtures/orderforms/2184.9.sarscov2.xlsx deleted file mode 100644 index 6f4de53e5d..0000000000 Binary files a/tests/fixtures/orderforms/2184.9.sarscov2.xlsx and /dev/null differ diff --git a/tests/services/__init__.py b/tests/services/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/tests/services/fastq_file_service/conftest.py b/tests/services/fastq_file_service/conftest.py index 4f5b20a92f..06860373a7 100644 --- a/tests/services/fastq_file_service/conftest.py +++ b/tests/services/fastq_file_service/conftest.py @@ -11,29 +11,67 @@ def fastq_file_service(): return FastqConcatenationService() -def create_fastqs_directory(number_forward_reads, number_reverse_reads, tmp_path): +def create_fastqs_directory(tmp_path: Path): fastq_dir = Path(tmp_path, "fastqs") fastq_dir.mkdir() + return fastq_dir + + +def create_fastq_files( + fastq_dir: Path, number_forward_reads: int, number_reverse_reads: int, sample_id: str +): for i in range(number_forward_reads): - file = Path(fastq_dir, f"sample_R1_{i}.fastq.gz") - file.write_text(f"forward read {i}") + file = Path(fastq_dir, f"{sample_id}_R1_{i}.fastq.gz") + file.write_text(f"{sample_id} forward read {i}") for i in range(number_reverse_reads): - file = Path(fastq_dir, f"sample_R2_{i}.fastq.gz") - file.write_text(f"reverse read {i}") + file = Path(fastq_dir, f"{sample_id}_R2_{i}.fastq.gz") + file.write_text(f"{sample_id} reverse read {i}") + + +@pytest.fixture +def fastqs_dir(tmp_path: Path, sample_id: str) -> Path: + fastq_dir: Path = create_fastqs_directory(tmp_path=tmp_path) + create_fastq_files( + fastq_dir=fastq_dir, number_forward_reads=3, number_reverse_reads=3, sample_id=sample_id + ) return fastq_dir @pytest.fixture -def fastqs_dir(tmp_path) -> Path: - return create_fastqs_directory( - number_forward_reads=3, number_reverse_reads=3, tmp_path=tmp_path +def fastq_dir_existing_concatenated_files(tmp_path: Path, sample_id: str) -> Path: + fastq_dir: Path = create_fastqs_directory(tmp_path=tmp_path) + create_fastq_files( + fastq_dir=fastq_dir, number_forward_reads=3, number_reverse_reads=3, sample_id=sample_id ) + forward_output_path = Path(fastq_dir, "forward.fastq.gz") + reverse_output_path = Path(fastq_dir, "reverse.fastq.gz") + forward_output_path.write_text("Existing concatenated forward reads") + reverse_output_path.write_text("Existing concatenated reverse reads") + return fastq_dir @pytest.fixture -def fastqs_forward(tmp_path) -> Path: +def fastqs_forward(tmp_path: Path, sample_id: str) -> Path: """Return a directory with only forward reads.""" - return create_fastqs_directory( - number_forward_reads=3, number_reverse_reads=0, tmp_path=tmp_path + fastq_dir: Path = create_fastqs_directory(tmp_path=tmp_path) + create_fastq_files( + fastq_dir=fastq_dir, number_forward_reads=3, number_reverse_reads=0, sample_id=sample_id ) + return fastq_dir + + +@pytest.fixture +def fastqs_multiple_samples(tmp_path: Path, sample_id: str, another_sample_id: str) -> Path: + """Return a directory with fastq files for multiple samples.""" + fastq_dir: Path = create_fastqs_directory(tmp_path=tmp_path) + create_fastq_files( + fastq_dir=fastq_dir, number_forward_reads=3, number_reverse_reads=3, sample_id=sample_id + ) + create_fastq_files( + fastq_dir=fastq_dir, + number_forward_reads=3, + number_reverse_reads=3, + sample_id=another_sample_id, + ) + return fastq_dir diff --git a/tests/services/fastq_file_service/test_fastq_file_service.py b/tests/services/fastq_file_service/test_fastq_file_service.py index a4dc9e25d1..546438c6d9 100644 --- a/tests/services/fastq_file_service/test_fastq_file_service.py +++ b/tests/services/fastq_file_service/test_fastq_file_service.py @@ -9,7 +9,9 @@ from cg.services.fastq_concatenation_service.utils import generate_concatenated_fastq_delivery_path -def test_empty_directory(fastq_file_service: FastqConcatenationService, tmp_path): +def test_empty_directory( + fastq_file_service: FastqConcatenationService, tmp_path: Path, sample_id: str +): # GIVEN an empty directory # GIVEN output files @@ -18,6 +20,7 @@ def test_empty_directory(fastq_file_service: FastqConcatenationService, tmp_path # WHEN concatenating the reads fastq_file_service.concatenate( + sample_id=sample_id, fastq_directory=tmp_path, forward_output_path=forward_output_path, reverse_output_path=reverse_output_path, @@ -28,7 +31,9 @@ def test_empty_directory(fastq_file_service: FastqConcatenationService, tmp_path assert not reverse_output_path.exists() -def test_concatenate(fastq_file_service: FastqConcatenationService, fastqs_dir: Path): +def test_concatenate( + fastq_file_service: FastqConcatenationService, fastqs_dir: Path, sample_id: str +): # GIVEN a directory with forward and reverse reads # GIVEN output files for the concatenated reads @@ -37,6 +42,7 @@ def test_concatenate(fastq_file_service: FastqConcatenationService, fastqs_dir: # WHEN concatenating the reads fastq_file_service.concatenate( + sample_id=sample_id, fastq_directory=fastqs_dir, forward_output_path=forward_output_path, reverse_output_path=reverse_output_path, @@ -57,19 +63,25 @@ def test_concatenate(fastq_file_service: FastqConcatenationService, fastqs_dir: def test_concatenate_when_output_exists( - fastq_file_service: FastqConcatenationService, fastqs_dir: Path + fastq_file_service: FastqConcatenationService, + fastq_dir_existing_concatenated_files: Path, + sample_id: str, ): + """Test that existing concatenated files are overwritten when already existing.""" # GIVEN a directory with forward and reverse reads - existing_fastq_files = list(fastqs_dir.iterdir()) - existing_forward: Path = existing_fastq_files[0] + forward_output_path = Path(fastq_dir_existing_concatenated_files, "forward.fastq.gz") + reverse_output_path = Path(fastq_dir_existing_concatenated_files, "reverse.fastq.gz") # GIVEN that the forward output file already exists - forward_output_path = existing_forward - reverse_output_path = Path(fastqs_dir, "reverse.fastq.gz") + assert forward_output_path.exists() + assert reverse_output_path.exists() + assert "Existing" in forward_output_path.read_text() + assert "Existing" in reverse_output_path.read_text() # WHEN concatenating the reads fastq_file_service.concatenate( - fastq_directory=fastqs_dir, + sample_id=sample_id, + fastq_directory=fastq_dir_existing_concatenated_files, forward_output_path=forward_output_path, reverse_output_path=reverse_output_path, remove_raw=True, @@ -82,14 +94,16 @@ def test_concatenate_when_output_exists( # THEN the concatenated forward reads only contain forward reads assert "forward" in forward_output_path.read_text() assert "reverse" not in forward_output_path.read_text() + assert "Existing" not in forward_output_path.read_text() # THEN the concatenated reverse reads only contain reverse reads assert "reverse" in reverse_output_path.read_text() assert "forward" not in reverse_output_path.read_text() + assert "Existing" not in reverse_output_path.read_text() def test_concatenate_missing_reverse( - fastq_file_service: FastqConcatenationService, fastqs_forward: Path, tmp_path + fastq_file_service: FastqConcatenationService, fastqs_forward: Path, tmp_path, sample_id: str ): # GIVEN a directory with forward reads only @@ -99,6 +113,7 @@ def test_concatenate_missing_reverse( # WHEN concatenating the reads fastq_file_service.concatenate( + sample_id=sample_id, fastq_directory=fastqs_forward, forward_output_path=forward_output_path, reverse_output_path=reverse_output_path, @@ -111,6 +126,48 @@ def test_concatenate_missing_reverse( assert not reverse_output_path.exists() +def test_concatenate_fastqs_multiple_samples_in_dir( + fastqs_multiple_samples: Path, + fastq_file_service: FastqConcatenationService, + sample_id: str, + another_sample_id: str, + tmp_path: Path, +): + # GIVEN a fastq directory with fastq files for multiple samples that should be concatenated + samples: list[str] = [sample_id, another_sample_id] + + # GIVEN output files for the concatenated reads + for fastq_sample in samples: + forward_output_path = Path(tmp_path, f"{fastq_sample}_forward.fastq.gz") + reverse_output_path = Path(tmp_path, f"{fastq_sample}_reverse.fastq.gz") + + # WHEN concatenating the reads + fastq_file_service.concatenate( + sample_id=fastq_sample, + fastq_directory=fastqs_multiple_samples, + forward_output_path=forward_output_path, + reverse_output_path=reverse_output_path, + remove_raw=True, + ) + + not_current_sample: str = another_sample_id if fastq_sample == sample_id else sample_id + # THEN the output files should exist + assert forward_output_path.exists() + assert reverse_output_path.exists() + + # THEN the concatenated forward reads only contain forward reads + assert "forward" in forward_output_path.read_text() + assert "reverse" not in forward_output_path.read_text() + assert fastq_sample in forward_output_path.read_text() + assert not_current_sample not in forward_output_path.read_text() + + # THEN the concatenated reverse reads only contain reverse reads + assert "reverse" in reverse_output_path.read_text() + assert "forward" not in reverse_output_path.read_text() + assert fastq_sample in reverse_output_path.read_text() + assert not_current_sample not in reverse_output_path.read_text() + + @pytest.mark.parametrize( "fastq_directory, sample_name, direction, expected_output_path", [ diff --git a/tests/services/file_delivery/delivery_file_service/test_service.py b/tests/services/file_delivery/delivery_file_service/test_service.py index 256869e81e..7315fa4514 100644 --- a/tests/services/file_delivery/delivery_file_service/test_service.py +++ b/tests/services/file_delivery/delivery_file_service/test_service.py @@ -1,6 +1,5 @@ from unittest import mock from unittest.mock import Mock - from cg.services.deliver_files.deliver_files_service.deliver_files_service import ( DeliverFilesService, ) @@ -13,7 +12,6 @@ def test_file_delivery_service_no_files(empty_delivery_files: DeliveryFiles): file_delivery_service = DeliverFilesService( delivery_file_manager_service=Mock(), move_file_service=Mock(), - file_filter=Mock(), file_formatter_service=Mock(), rsync_service=Mock(), tb_service=Mock(), diff --git a/tests/services/file_delivery/delivery_file_service/test_service_builder.py b/tests/services/file_delivery/delivery_file_service/test_service_builder.py index 1a16496a0a..29fe6d17af 100644 --- a/tests/services/file_delivery/delivery_file_service/test_service_builder.py +++ b/tests/services/file_delivery/delivery_file_service/test_service_builder.py @@ -5,10 +5,11 @@ from pydantic import BaseModel from cg.constants import DataDelivery, Workflow +from cg.services.deliver_files.constants import DeliveryDestination, DeliveryStructure from cg.services.deliver_files.deliver_files_service.deliver_files_service import ( DeliverFilesService, ) -from cg.services.deliver_files.deliver_files_service.deliver_files_service_factory import ( +from cg.services.deliver_files.factory import ( DeliveryServiceFactory, ) from cg.services.deliver_files.file_fetcher.abstract import FetchDeliveryFilesService @@ -17,12 +18,29 @@ ) from cg.services.deliver_files.file_fetcher.analysis_service import AnalysisDeliveryFileFetcher from cg.services.deliver_files.file_fetcher.raw_data_service import RawDataDeliveryFileFetcher -from cg.services.deliver_files.file_formatter.utils.sample_concatenation_service import ( +from cg.services.deliver_files.file_formatter.files.mutant_service import ( + MutantFileFormatter, +) +from cg.services.deliver_files.file_formatter.files.concatenation_service import ( SampleFileConcatenationFormatter, ) -from cg.services.deliver_files.file_formatter.utils.sample_service import SampleFileFormatter -from cg.services.deliver_files.file_mover.service import DeliveryFilesMover +from cg.services.deliver_files.file_formatter.files.sample_service import ( + SampleFileFormatter, +) +from cg.services.deliver_files.file_formatter.path_name.abstract import PathNameFormatter +from cg.services.deliver_files.file_formatter.path_name.flat_structure import ( + FlatStructurePathFormatter, +) +from cg.services.deliver_files.file_formatter.path_name.nested_structure import ( + NestedStructurePathFormatter, +) +from cg.services.deliver_files.file_mover.abstract import DestinationFilesMover +from cg.services.deliver_files.file_mover.base_service import BaseDestinationFilesMover +from cg.services.deliver_files.file_mover.customer_inbox_service import ( + CustomerInboxDestinationFilesMover, +) from cg.services.deliver_files.tag_fetcher.abstract import FetchDeliveryFileTagsService +from cg.services.deliver_files.tag_fetcher.fohm_upload_service import FOHMUploadTagsFetcher from cg.services.deliver_files.tag_fetcher.sample_and_case_service import ( SampleAndCaseDeliveryTagsFetcher, ) @@ -36,9 +54,14 @@ class DeliveryServiceScenario(BaseModel): delivery_type: DataDelivery expected_tag_fetcher: type[FetchDeliveryFileTagsService] expected_file_fetcher: type[FetchDeliveryFilesService] - expected_file_mover: type[DeliveryFilesMover] - expected_sample_file_formatter: type[SampleFileFormatter | SampleFileConcatenationFormatter] + expected_file_mover: type[DestinationFilesMover] + expected_sample_file_formatter: type[ + SampleFileFormatter | SampleFileConcatenationFormatter | MutantFileFormatter + ] + expected_path_name_formatter: type[PathNameFormatter] store_name: str + delivery_destination: DeliveryDestination + delivery_structure: DeliveryStructure @pytest.mark.parametrize( @@ -50,9 +73,12 @@ class DeliveryServiceScenario(BaseModel): delivery_type=DataDelivery.FASTQ, expected_tag_fetcher=SampleAndCaseDeliveryTagsFetcher, expected_file_fetcher=RawDataDeliveryFileFetcher, - expected_file_mover=DeliveryFilesMover, + expected_file_mover=CustomerInboxDestinationFilesMover, expected_sample_file_formatter=SampleFileConcatenationFormatter, + expected_path_name_formatter=NestedStructurePathFormatter, store_name="microbial_store", + delivery_destination=DeliveryDestination.CUSTOMER, + delivery_structure=DeliveryStructure.NESTED, ), DeliveryServiceScenario( app_tag="VWGDPTR001", @@ -60,9 +86,12 @@ class DeliveryServiceScenario(BaseModel): delivery_type=DataDelivery.ANALYSIS_FILES, expected_tag_fetcher=SampleAndCaseDeliveryTagsFetcher, expected_file_fetcher=AnalysisDeliveryFileFetcher, - expected_file_mover=DeliveryFilesMover, - expected_sample_file_formatter=SampleFileFormatter, + expected_file_mover=CustomerInboxDestinationFilesMover, + expected_sample_file_formatter=MutantFileFormatter, + expected_path_name_formatter=NestedStructurePathFormatter, store_name="mutant_store", + delivery_destination=DeliveryDestination.CUSTOMER, + delivery_structure=DeliveryStructure.NESTED, ), DeliveryServiceScenario( app_tag="PANKTTR020", @@ -70,16 +99,46 @@ class DeliveryServiceScenario(BaseModel): delivery_type=DataDelivery.FASTQ_ANALYSIS, expected_tag_fetcher=SampleAndCaseDeliveryTagsFetcher, expected_file_fetcher=RawDataAndAnalysisDeliveryFileFetcher, - expected_file_mover=DeliveryFilesMover, + expected_file_mover=CustomerInboxDestinationFilesMover, expected_sample_file_formatter=SampleFileFormatter, + expected_path_name_formatter=NestedStructurePathFormatter, store_name="applications_store", + delivery_destination=DeliveryDestination.CUSTOMER, + delivery_structure=DeliveryStructure.NESTED, + ), + DeliveryServiceScenario( + app_tag="VWGDPTR001", + data_analysis=Workflow.MUTANT, + delivery_type=DataDelivery.ANALYSIS_FILES, + expected_tag_fetcher=FOHMUploadTagsFetcher, + expected_file_fetcher=AnalysisDeliveryFileFetcher, + expected_file_mover=BaseDestinationFilesMover, + expected_sample_file_formatter=MutantFileFormatter, + expected_path_name_formatter=FlatStructurePathFormatter, + store_name="mutant_store", + delivery_destination=DeliveryDestination.FOHM, + delivery_structure=DeliveryStructure.FLAT, + ), + DeliveryServiceScenario( + app_tag="VWGDPTR001", + data_analysis=Workflow.MUTANT, + delivery_type=DataDelivery.ANALYSIS_FILES, + expected_tag_fetcher=SampleAndCaseDeliveryTagsFetcher, + expected_file_fetcher=AnalysisDeliveryFileFetcher, + expected_file_mover=BaseDestinationFilesMover, + expected_sample_file_formatter=MutantFileFormatter, + expected_path_name_formatter=FlatStructurePathFormatter, + store_name="mutant_store", + delivery_destination=DeliveryDestination.BASE, + delivery_structure=DeliveryStructure.FLAT, ), ], - ids=["microbial-fastq", "SARS-COV2", "Targeted"], + ids=["microbial-fastq", "SARS-COV2", "Targeted", "FOHM Upload", "base"], ) def test_build_delivery_service(scenario: DeliveryServiceScenario, request: FixtureRequest): # GIVEN a delivery service builder with mocked store and hk_api builder = DeliveryServiceFactory( + lims_api=MagicMock(), store=request.getfixturevalue(scenario.store_name), hk_api=MagicMock(), rsync_service=MagicMock(), @@ -96,7 +155,11 @@ def test_build_delivery_service(scenario: DeliveryServiceScenario, request: Fixt ] # WHEN building a delivery service - delivery_service: DeliverFilesService = builder.build_delivery_service(case=case_mock) + delivery_service: DeliverFilesService = builder.build_delivery_service( + case=case_mock, + delivery_destination=scenario.delivery_destination, + delivery_structure=scenario.delivery_structure, + ) # THEN the correct file formatter and file fetcher services are used assert isinstance(delivery_service.file_manager.tags_fetcher, scenario.expected_tag_fetcher) @@ -106,3 +169,12 @@ def test_build_delivery_service(scenario: DeliveryServiceScenario, request: Fixt delivery_service.file_formatter.sample_file_formatter, scenario.expected_sample_file_formatter, ) + if not isinstance(delivery_service.file_formatter.sample_file_formatter, MutantFileFormatter): + assert isinstance( + delivery_service.file_formatter.sample_file_formatter.path_name_formatter, + scenario.expected_path_name_formatter, + ) + assert isinstance( + delivery_service.file_formatter.case_file_formatter.path_name_formatter, + scenario.expected_path_name_formatter, + ) diff --git a/tests/services/file_delivery/file_fetcher/test_file_fetching_service.py b/tests/services/file_delivery/file_fetcher/test_file_fetching_service.py index 50b770bcfc..2974d6aa66 100644 --- a/tests/services/file_delivery/file_fetcher/test_file_fetching_service.py +++ b/tests/services/file_delivery/file_fetcher/test_file_fetching_service.py @@ -8,16 +8,19 @@ @pytest.mark.parametrize( - "expected_delivery_files,delivery_file_service", + "expected_delivery_files,delivery_file_service,sample_id_to_fetch", [ - ("expected_fastq_delivery_files", "raw_data_delivery_service"), - ("expected_analysis_delivery_files", "analysis_delivery_service"), - ("expected_bam_delivery_files", "bam_data_delivery_service"), + ("expected_fohm_delivery_files", "fohm_data_delivery_service", "empty_sample"), + ("expected_fastq_delivery_files", "raw_data_delivery_service", "empty_sample"), + ("expected_analysis_delivery_files", "analysis_delivery_service", "empty_sample"), + ("expected_bam_delivery_files", "bam_data_delivery_service", "empty_sample"), + ("expected_bam_delivery_files_single_sample", "bam_data_delivery_service", "sample_id"), ], ) def test_get_files_to_deliver( expected_delivery_files: DeliveryFiles, delivery_file_service: FetchDeliveryFilesService, + sample_id_to_fetch: str | None, case_id: str, request, ): @@ -25,9 +28,12 @@ def test_get_files_to_deliver( # GIVEN a case id, samples that are present in Housekeeper and a delivery service delivery_file_service = request.getfixturevalue(delivery_file_service) expected_delivery_files = request.getfixturevalue(expected_delivery_files) + sample_id: str | None = request.getfixturevalue(sample_id_to_fetch) # WHEN getting the files to deliver - delivery_files: DeliveryFiles = delivery_file_service.get_files_to_deliver(case_id) + delivery_files: DeliveryFiles = delivery_file_service.get_files_to_deliver( + case_id=case_id, sample_id=sample_id + ) # THEN assert that the files to deliver are fetched assert delivery_files == expected_delivery_files diff --git a/tests/services/file_delivery/file_filter/test_sample_filter_service.py b/tests/services/file_delivery/file_filter/test_sample_filter_service.py deleted file mode 100644 index 200f43c0ad..0000000000 --- a/tests/services/file_delivery/file_filter/test_sample_filter_service.py +++ /dev/null @@ -1,22 +0,0 @@ -from cg.services.deliver_files.file_fetcher.models import DeliveryFiles -from cg.services.deliver_files.file_filter.sample_service import SampleFileFilter - - -def test_filter_delivery_files(expected_fastq_delivery_files: DeliveryFiles, sample_id: str): - """Test to filter delivery files.""" - - # GIVEN a delivery files object with multiple sample ids and a filter delivery files service - filter_service = SampleFileFilter() - samples_ids: list[str] = [ - sample.sample_id for sample in expected_fastq_delivery_files.sample_files - ] - assert len(set(samples_ids)) > 1 - - # WHEN filtering the delivery files - filtered_delivery_files = filter_service.filter_delivery_files( - expected_fastq_delivery_files, sample_id - ) - - # THEN assert that the delivery files only contains the sample with the given sample id - for sample_file in filtered_delivery_files.sample_files: - assert sample_file.sample_id == sample_id diff --git a/tests/services/file_delivery/file_formatter/__init__.py b/tests/services/file_delivery/file_formatter/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/tests/services/file_delivery/file_formatter/destination/__init__.py b/tests/services/file_delivery/file_formatter/destination/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/tests/services/file_delivery/file_formatter/test_formatting_service.py b/tests/services/file_delivery/file_formatter/destination/test_formatting_service.py similarity index 87% rename from tests/services/file_delivery/file_formatter/test_formatting_service.py rename to tests/services/file_delivery/file_formatter/destination/test_formatting_service.py index d7a012842a..218ad69c58 100644 --- a/tests/services/file_delivery/file_formatter/test_formatting_service.py +++ b/tests/services/file_delivery/file_formatter/destination/test_formatting_service.py @@ -8,12 +8,12 @@ CaseFile, DeliveryMetaData, ) -from cg.services.deliver_files.file_formatter.abstract import ( - DeliveryFileFormattingService, +from cg.services.deliver_files.file_formatter.destination.abstract import ( + DeliveryDestinationFormatter, ) import pytest -from cg.services.deliver_files.file_formatter.models import ( +from cg.services.deliver_files.file_formatter.destination.models import ( FormattedFiles, FormattedFile, ) @@ -39,7 +39,7 @@ ], ) def test_reformat_files( - formatter_service: DeliveryFileFormattingService, + formatter_service: DeliveryDestinationFormatter, formatted_case_files: list[FormattedFile], formatted_sample_files: list[FormattedFile], case_files: list[CaseFile], @@ -70,10 +70,10 @@ def test_reformat_files( expected_formatted_files = FormattedFiles(files=files) with mock.patch( - "cg.services.deliver_files.file_formatter.utils.sample_service.SampleFileFormatter.format_files", + "cg.services.deliver_files.file_formatter.files.sample_service.SampleFileFormatter.format_files", return_value=formatted_sample_files, ), mock.patch( - "cg.services.deliver_files.file_formatter.utils.case_service.CaseFileFormatter.format_files", + "cg.services.deliver_files.file_formatter.files.case_service.CaseFileFormatter.format_files", return_value=formatted_case_files, ): # WHEN reformatting the delivery files diff --git a/tests/services/file_delivery/file_formatter/files/__init__.py b/tests/services/file_delivery/file_formatter/files/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/tests/services/file_delivery/file_formatter/files/test_formatter_utils.py b/tests/services/file_delivery/file_formatter/files/test_formatter_utils.py new file mode 100644 index 0000000000..ce440e10d7 --- /dev/null +++ b/tests/services/file_delivery/file_formatter/files/test_formatter_utils.py @@ -0,0 +1,139 @@ +import os +from unittest.mock import Mock +import pytest +from pathlib import Path + +from cg.services.deliver_files.file_formatter.files.abstract import FileFormatter +from cg.services.deliver_files.file_formatter.files.mutant_service import ( + MutantFileFormatter, +) +from cg.services.fastq_concatenation_service.fastq_concatenation_service import ( + FastqConcatenationService, +) +from cg.services.deliver_files.file_fetcher.models import ( + CaseFile, + SampleFile, +) +from cg.services.deliver_files.file_formatter.destination.models import FormattedFile +from cg.services.deliver_files.file_formatter.files.case_service import ( + CaseFileFormatter, +) +from cg.services.deliver_files.file_formatter.files.concatenation_service import ( + SampleFileConcatenationFormatter, +) +from cg.services.deliver_files.file_formatter.files.sample_service import ( + SampleFileFormatter, + FileManager, +) +from cg.services.deliver_files.file_formatter.path_name.flat_structure import ( + FlatStructurePathFormatter, +) +from cg.services.deliver_files.file_formatter.path_name.nested_structure import ( + NestedStructurePathFormatter, +) + + +@pytest.mark.parametrize( + "moved_files,expected_formatted_files,file_formatter", + [ + ( + "expected_moved_analysis_case_delivery_files", + "expected_formatted_analysis_case_files", + CaseFileFormatter( + file_manager=FileManager(), path_name_formatter=NestedStructurePathFormatter() + ), + ), + ( + "expected_moved_analysis_sample_delivery_files", + "expected_formatted_analysis_sample_files", + SampleFileFormatter( + file_manager=FileManager(), path_name_formatter=NestedStructurePathFormatter() + ), + ), + ( + "fastq_concatenation_sample_files", + "expected_concatenated_fastq_formatted_files", + SampleFileConcatenationFormatter( + file_manager=FileManager(), + path_name_formatter=NestedStructurePathFormatter(), + concatenation_service=FastqConcatenationService(), + ), + ), + ( + "fastq_concatenation_sample_files_flat", + "expected_concatenated_fastq_flat_formatted_files", + SampleFileConcatenationFormatter( + file_manager=FileManager(), + path_name_formatter=FlatStructurePathFormatter(), + concatenation_service=FastqConcatenationService(), + ), + ), + ], +) +def test_file_formatters( + moved_files: list[CaseFile | SampleFile], + expected_formatted_files: list[FormattedFile], + file_formatter: FileFormatter, + request, +): + # GIVEN existing case files, a case file formatter and a ticket directory path and a customer inbox + moved_files: list[CaseFile | SampleFile] = request.getfixturevalue(moved_files) + expected_formatted_files: list[FormattedFile] = request.getfixturevalue( + expected_formatted_files + ) + delivery_path: Path = moved_files[0].file_path.parent + + os.makedirs(delivery_path, exist_ok=True) + + for moved_file in moved_files: + moved_file.file_path.touch() + + # WHEN formatting the case files + formatted_files: list[FormattedFile] = file_formatter.format_files( + moved_files=moved_files, + delivery_path=delivery_path, + ) + + # THEN the case files should be formatted + assert formatted_files == expected_formatted_files + for file in formatted_files: + assert file.formatted_path.exists() + assert not file.original_path.exists() + + +def test_mutant_file_formatter( + mutant_moved_files: list[SampleFile], + expected_mutant_formatted_files: list[FormattedFile], + lims_naming_metadata: str, +): + # GIVEN existing ticket directory path and a customer inbox + ticket_dir_path: Path = mutant_moved_files[0].file_path.parent + + os.makedirs(ticket_dir_path, exist_ok=True) + + for moved_file in mutant_moved_files: + moved_file.file_path.touch() + + lims_mock = Mock() + lims_mock.get_sample_region_and_lab_code.return_value = lims_naming_metadata + file_formatter = MutantFileFormatter( + file_manager=FileManager(), + file_formatter=SampleFileConcatenationFormatter( + file_manager=FileManager(), + path_name_formatter=NestedStructurePathFormatter(), + concatenation_service=FastqConcatenationService(), + ), + lims_api=lims_mock, + ) + + # WHEN formatting the files + formatted_files: list[FormattedFile] = file_formatter.format_files( + moved_files=mutant_moved_files, + delivery_path=ticket_dir_path, + ) + + # THEN the files should be formatted + assert formatted_files == expected_mutant_formatted_files + for file in formatted_files: + assert file.formatted_path.exists() + assert not file.original_path.exists() diff --git a/tests/services/file_delivery/file_formatter/path_name_formatters/__init__.py b/tests/services/file_delivery/file_formatter/path_name_formatters/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/tests/services/file_delivery/file_formatter/path_name_formatters/test_path_name_formatters.py b/tests/services/file_delivery/file_formatter/path_name_formatters/test_path_name_formatters.py new file mode 100644 index 0000000000..c43e3aa9f7 --- /dev/null +++ b/tests/services/file_delivery/file_formatter/path_name_formatters/test_path_name_formatters.py @@ -0,0 +1,54 @@ +import pytest + +from cg.services.deliver_files.file_fetcher.models import SampleFile +from cg.services.deliver_files.file_formatter.destination.models import FormattedFile +from cg.services.deliver_files.file_formatter.path_name.flat_structure import ( + FlatStructurePathFormatter, +) +from cg.services.deliver_files.file_formatter.path_name.nested_structure import ( + NestedStructurePathFormatter, +) + + +@pytest.mark.parametrize( + "sample_files,expected_formatted_files,path_name_formatter", + [ + ( + "expected_moved_analysis_sample_delivery_files", + "expected_formatted_analysis_sample_files", + NestedStructurePathFormatter(), + ), + ( + "expected_moved_analysis_sample_delivery_files", + "expected_flat_formatted_analysis_sample_files", + FlatStructurePathFormatter(), + ), + ], +) +def test_path_name_formatters( + sample_files: list[SampleFile], + expected_formatted_files: list[FormattedFile], + path_name_formatter, + request, +): + # GIVEN existing sample files and a sample file formatter + sample_files: list[SampleFile] = request.getfixturevalue(sample_files) + expected_formatted_files: list[FormattedFile] = request.getfixturevalue( + expected_formatted_files + ) + + # WHEN formatting the sample files + formatted_files: list[FormattedFile] = [ + FormattedFile( + formatted_path=path_name_formatter.format_file_path( + file_path=sample_file.file_path, + provided_name=sample_file.sample_name, + provided_id=sample_file.sample_id, + ), + original_path=sample_file.file_path, + ) + for sample_file in sample_files + ] + + # THEN the sample files should be formatted + assert formatted_files == expected_formatted_files diff --git a/tests/services/file_delivery/file_formatter/utils/test_formatter_utils.py b/tests/services/file_delivery/file_formatter/utils/test_formatter_utils.py deleted file mode 100644 index 2245fb7f78..0000000000 --- a/tests/services/file_delivery/file_formatter/utils/test_formatter_utils.py +++ /dev/null @@ -1,80 +0,0 @@ -import os -import pytest -from pathlib import Path - -from cg.services.fastq_concatenation_service.fastq_concatenation_service import ( - FastqConcatenationService, -) -from cg.services.deliver_files.file_fetcher.models import ( - CaseFile, - SampleFile, -) -from cg.services.deliver_files.file_formatter.models import FormattedFile -from cg.services.deliver_files.file_formatter.utils.case_service import ( - CaseFileFormatter, -) -from cg.services.deliver_files.file_formatter.utils.sample_concatenation_service import ( - SampleFileConcatenationFormatter, -) -from cg.services.deliver_files.file_formatter.utils.sample_service import ( - SampleFileFormatter, - FileManagingService, - SampleFileNameFormatter, -) - - -@pytest.mark.parametrize( - "moved_files,expected_formatted_files,file_formatter", - [ - ( - "expected_moved_analysis_case_delivery_files", - "expected_formatted_analysis_case_files", - CaseFileFormatter(), - ), - ( - "expected_moved_analysis_sample_delivery_files", - "expected_formatted_analysis_sample_files", - SampleFileFormatter( - file_manager=FileManagingService(), file_name_formatter=SampleFileNameFormatter() - ), - ), - ( - "fastq_concatenation_sample_files", - "expected_concatenated_fastq_formatted_files", - SampleFileConcatenationFormatter( - file_manager=FileManagingService(), - file_formatter=SampleFileNameFormatter(), - concatenation_service=FastqConcatenationService(), - ), - ), - ], -) -def test_file_formatter_utils( - moved_files: list[CaseFile | SampleFile], - expected_formatted_files: list[FormattedFile], - file_formatter: CaseFileFormatter | SampleFileFormatter | SampleFileConcatenationFormatter, - request, -): - # GIVEN existing case files, a case file formatter and a ticket directory path and a customer inbox - moved_files: list[CaseFile | SampleFile] = request.getfixturevalue(moved_files) - expected_formatted_files: list[FormattedFile] = request.getfixturevalue( - expected_formatted_files - ) - ticket_dir_path: Path = moved_files[0].file_path.parent - - os.makedirs(ticket_dir_path, exist_ok=True) - - for moved_file in moved_files: - moved_file.file_path.touch() - - # WHEN formatting the case files - formatted_files: list[FormattedFile] = file_formatter.format_files( - moved_files=moved_files, - ticket_dir_path=ticket_dir_path, - ) - - # THEN the case files should be formatted - assert formatted_files == expected_formatted_files - for file in formatted_files: - assert file.formatted_path.exists() - assert not file.original_path.exists() diff --git a/tests/services/file_delivery/file_mover/test_file_mover_service.py b/tests/services/file_delivery/file_mover/test_file_mover_service.py index 068a771835..4b8ab40c93 100644 --- a/tests/services/file_delivery/file_mover/test_file_mover_service.py +++ b/tests/services/file_delivery/file_mover/test_file_mover_service.py @@ -3,21 +3,37 @@ import pytest from cg.services.deliver_files.file_fetcher.models import DeliveryFiles -from cg.services.deliver_files.file_mover.service import ( - DeliveryFilesMover, +from cg.services.deliver_files.file_mover.customer_inbox_service import ( + CustomerInboxDestinationFilesMover, ) +from cg.services.deliver_files.file_mover.base_service import BaseDestinationFilesMover +from cg.services.deliver_files.utils import FileMover, FileManager @pytest.mark.parametrize( - "expected_moved_delivery_files,delivery_files", + "expected_moved_delivery_files,delivery_files,move_files_service", [ - ("expected_moved_fastq_delivery_files", "expected_fastq_delivery_files"), - ("expected_moved_analysis_delivery_files", "expected_analysis_delivery_files"), + ( + "expected_moved_fastq_delivery_files", + "expected_fastq_delivery_files", + CustomerInboxDestinationFilesMover(FileMover(FileManager())), + ), + ( + "expected_moved_analysis_delivery_files", + "expected_analysis_delivery_files", + CustomerInboxDestinationFilesMover(FileMover(FileManager())), + ), + ( + "expected_moved_upload_files", + "expected_upload_files", + BaseDestinationFilesMover(FileMover(FileManager())), + ), ], ) def test_move_files( expected_moved_delivery_files: DeliveryFiles, delivery_files: DeliveryFiles, + move_files_service: CustomerInboxDestinationFilesMover, tmp_path, request, ): @@ -28,7 +44,6 @@ def test_move_files( delivery_files: DeliveryFiles = request.getfixturevalue(delivery_files) # WHEN moving the delivery files - move_files_service = DeliveryFilesMover() moved_delivery_files: DeliveryFiles = move_files_service.move_files( delivery_files=delivery_files, delivery_base_path=tmp_path ) diff --git a/tests/services/file_delivery/tag_fetcher/test_tag_service.py b/tests/services/file_delivery/tag_fetcher/test_tag_service.py index 6e54fdc73f..e1b541b15f 100644 --- a/tests/services/file_delivery/tag_fetcher/test_tag_service.py +++ b/tests/services/file_delivery/tag_fetcher/test_tag_service.py @@ -10,6 +10,7 @@ from cg.services.deliver_files.tag_fetcher.exc import ( FetchDeliveryFileTagsError, ) +from cg.services.deliver_files.tag_fetcher.fohm_upload_service import FOHMUploadTagsFetcher from cg.services.deliver_files.tag_fetcher.models import DeliveryFileTags from cg.services.deliver_files.tag_fetcher.sample_and_case_service import ( SampleAndCaseDeliveryTagsFetcher, @@ -64,3 +65,15 @@ def test_bam_delivery_tags_fetcher(): # THEN assert that the tags are fetched assert tags.case_tags is None assert tags.sample_tags == [{"bam"}] + + +def test_fohm_upload_tags_fetcher(): + # GIVEN a tag fetcher + test_fetcher = FOHMUploadTagsFetcher() + + # WHEN fetching the tags for the files to deliver + tags: DeliveryFileTags = test_fetcher.fetch_tags(Workflow.MUTANT) + + # THEN assert that the tags are fetched + assert tags.case_tags is None + assert tags.sample_tags == [{"consensus-sample"}, {"vcf-report"}] diff --git a/tests/store/crud/conftest.py b/tests/store/crud/conftest.py index 7cc7c61389..d884fc947d 100644 --- a/tests/store/crud/conftest.py +++ b/tests/store/crud/conftest.py @@ -501,7 +501,6 @@ def order_balsamic(helpers: StoreHelpers, store: Store) -> Order: customer_id=2, ticket_id=3, order_date=datetime.now(), - workflow=Workflow.BALSAMIC, ) order.cases.append(case) return order diff --git a/tests/store_helpers.py b/tests/store_helpers.py index 16fc12df91..19ac29ddff 100644 --- a/tests/store_helpers.py +++ b/tests/store_helpers.py @@ -515,7 +515,6 @@ def add_order( customer_id: int, ticket_id: int, order_date: datetime = datetime(year=2023, month=12, day=24), - workflow: Workflow = Workflow.MIP_DNA, ) -> Order: order = Order( customer_id=customer_id,