diff --git a/cg/apps/demultiplex/sample_sheet/api.py b/cg/apps/demultiplex/sample_sheet/api.py index be04e69815..9790ea0a4b 100644 --- a/cg/apps/demultiplex/sample_sheet/api.py +++ b/cg/apps/demultiplex/sample_sheet/api.py @@ -17,6 +17,7 @@ from cg.io.controller import WriteFile, WriteStream from cg.meta.demultiplex.housekeeper_storage_functions import ( add_and_include_sample_sheet_path_to_housekeeper, + delete_file_from_housekeeper, ) from cg.models.flow_cell.flow_cell import FlowCellDirectoryData @@ -76,13 +77,24 @@ def get_valid_sample_sheet_path(self, sample_sheet_path: Path) -> Path | None: LOG.warning(f"Sample sheet with path {sample_sheet_path} does not exist") def get_valid_sample_sheet_path_from_hk(self, flow_cell_id: str) -> Path | None: - """Return the sample sheet path from Housekeeper if is valid and exists.""" + """ + Return the sample sheet path from Housekeeper if is valid and exists. If it is invalid, + deletes it from Housekeeper database and bundle. + """ + LOG.info("Getting sample sheet from Housekeeper") try: sample_sheet_path: Path | None = self.hk_api.get_sample_sheet_path(flow_cell_id) except HousekeeperFileMissingError: LOG.warning(f"Sample sheet for flow cell {flow_cell_id} not found in Housekeeper") return - return self.get_valid_sample_sheet_path(sample_sheet_path) + if correct_sheet_path := self.get_valid_sample_sheet_path(sample_sheet_path): + return correct_sheet_path + else: + LOG.warning( + f"Sample sheet for flow cell {flow_cell_id} in Housekeeper is not valid. " + f"Deleting invalid sample sheet from Housekeeper" + ) + delete_file_from_housekeeper(file_path=sample_sheet_path, hk_api=self.hk_api) def get_sample_sheet_content(self, flow_cell: FlowCellDirectoryData) -> list[list[str]]: """Return the sample sheet content for a flow cell.""" diff --git a/cg/meta/demultiplex/housekeeper_storage_functions.py b/cg/meta/demultiplex/housekeeper_storage_functions.py index a9da2eebe2..df1796a376 100644 --- a/cg/meta/demultiplex/housekeeper_storage_functions.py +++ b/cg/meta/demultiplex/housekeeper_storage_functions.py @@ -48,9 +48,9 @@ def store_undetermined_fastq_files( flow_cell: FlowCellDirectoryData, hk_api: HousekeeperAPI, store: Store ) -> None: """Store undetermined fastq files for non-pooled samples in Housekeeper.""" - non_pooled_lanes_and_samples: list[tuple[int, str]] = ( - flow_cell.sample_sheet.get_non_pooled_lanes_and_samples() - ) + non_pooled_lanes_and_samples: list[ + tuple[int, str] + ] = flow_cell.sample_sheet.get_non_pooled_lanes_and_samples() for lane, sample_id in non_pooled_lanes_and_samples: undetermined_fastqs: list[Path] = get_undetermined_fastqs( @@ -176,6 +176,12 @@ def add_and_include_sample_sheet_path_to_housekeeper( ) +def delete_file_from_housekeeper(file_path: Path, hk_api: HousekeeperAPI) -> None: + """Delete a file from Housekeeper database and disk given its path.""" + file: File = hk_api.files(path=file_path.as_posix()).first() + hk_api.delete_file(file_id=file.id) + + def delete_sequencing_data_from_housekeeper(flow_cell_id: str, hk_api: HousekeeperAPI) -> None: """Delete FASTQ, SPRING and metadata files associated with a flow cell from Housekeeper.""" tag_combinations: list[set[str]] = [