diff --git a/cg/cli/demultiplex/base.py b/cg/cli/demultiplex/base.py index 0f2149865e..fac5baeec0 100644 --- a/cg/cli/demultiplex/base.py +++ b/cg/cli/demultiplex/base.py @@ -7,7 +7,6 @@ confirm_flow_cell_sync, copy_novaseqx_flow_cells, create_manifest_files, - delete_flow_cell, demultiplex_all, demultiplex_flow_cell, ) @@ -27,7 +26,6 @@ def demultiplex_cmd_group(): for sub_cmd in [ create_manifest_files, confirm_flow_cell_sync, - delete_flow_cell, demultiplex_flow_cell, demultiplex_all, finish_group, diff --git a/cg/cli/demultiplex/demux.py b/cg/cli/demultiplex/demux.py index 7414077463..3ecd5a3968 100644 --- a/cg/cli/demultiplex/demux.py +++ b/cg/cli/demultiplex/demux.py @@ -13,7 +13,6 @@ ) from cg.constants.demultiplexing import OPTION_BCL_CONVERTER, DemultiplexingDirsAndFiles from cg.exc import FlowCellError -from cg.meta.demultiplex.delete_demultiplex_api import DeleteDemuxAPI from cg.meta.demultiplex.utils import ( create_manifest_file, is_flow_cell_sync_confirmed, @@ -121,66 +120,6 @@ def demultiplex_flow_cell( ) -@click.command(name="delete-flow-cell") -@click.option( - "-f", - "--flow-cell-name", - required=True, - help="The name of the flow cell you want to delete, e.g. HVKJCDRXX", -) -@click.option( - "--demultiplexing-dir", is_flag=True, help="Delete flow cell demultiplexed dir on file system" -) -@click.option("--housekeeper", is_flag=True, help="Delete flow cell in housekeeper") -@click.option("--init-files", is_flag=True, help="Delete flow cell init-files") -@click.option("--run-dir", is_flag=True, help="Delete flow cell run on file system") -@click.option( - "--sample-lane-sequencing-metrics", - is_flag=True, - help="Delete flow cell in sample lane sequencing metrics", -) -@click.option( - "--status-db", - is_flag=True, - help="Delete flow cell in status-db, if passed all other entries are also deleted", -) -@click.option("--yes", is_flag=True, help="Pass yes to click confirm") -@DRY_RUN -@click.pass_obj -def delete_flow_cell( - context: CGConfig, - dry_run: bool, - demultiplexing_dir: bool, - housekeeper: bool, - init_files: bool, - run_dir: bool, - sample_lane_sequencing_metrics: bool, - status_db: bool, - yes: bool, - flow_cell_name: str, -): - """Delete a flow cell. If --status-db is passed then all other options will be treated as True.""" - - delete_demux_api: DeleteDemuxAPI = DeleteDemuxAPI( - config=context, dry_run=dry_run, flow_cell_name=flow_cell_name - ) - - if yes or click.confirm( - f"Are you sure you want to delete the flow cell from the following databases:\n" - f"Housekeeper={True if status_db else housekeeper}\nInit_files={True if status_db else init_files}\n" - f"Run-dir={True if status_db else run_dir}\nStatusdb={status_db}\n" - f"\nSample-lane-sequencing-metrics={True if sample_lane_sequencing_metrics else sample_lane_sequencing_metrics}" - ): - delete_demux_api.delete_flow_cell( - demultiplexing_dir=demultiplexing_dir, - housekeeper=housekeeper, - init_files=init_files, - sample_lane_sequencing_metrics=sample_lane_sequencing_metrics, - run_dir=run_dir, - status_db=status_db, - ) - - @click.command(name="copy-completed-flow-cell") @click.pass_obj def copy_novaseqx_flow_cells(context: CGConfig): diff --git a/cg/meta/demultiplex/delete_demultiplex_api.py b/cg/meta/demultiplex/delete_demultiplex_api.py deleted file mode 100644 index 24df1a953a..0000000000 --- a/cg/meta/demultiplex/delete_demultiplex_api.py +++ /dev/null @@ -1,279 +0,0 @@ -import itertools -import logging -import shutil -from glob import glob -from pathlib import Path -from typing import Iterable - -from housekeeper.store.models import File - -from cg.apps.demultiplex.demultiplex_api import DemultiplexingAPI -from cg.apps.housekeeper.hk import HousekeeperAPI -from cg.constants.housekeeper_tags import SequencingFileTag -from cg.exc import DeleteDemuxError -from cg.models.cg_config import CGConfig -from cg.store import Store -from cg.store.models import Flowcell, Sample - -LOG = logging.getLogger(__name__) - - -class DeleteDemuxAPI: - """Class to handle wiping out a flow cell before restart/start""" - - def __init__(self, config: CGConfig, dry_run: bool, flow_cell_name: str): - self.dry_run: bool = self.set_dry_run(dry_run=dry_run) - self.flow_cell_name = flow_cell_name - self.housekeeper_api: HousekeeperAPI = config.housekeeper_api - self.status_db: Store = config.status_db - self.demux_api: DemultiplexingAPI = config.demultiplex_api - self.samples_on_flow_cell: list[Sample] = [] - self.demultiplexing_out_path: Path = self.get_path_for_flow_cell( - base_path=self.demux_api.demultiplexed_runs_dir - ) - self.run_path: Path = self.get_path_for_flow_cell(base_path=self.demux_api.flow_cells_dir) - LOG.debug("DeleteDemuxAPI: API initiated") - - def get_path_for_flow_cell( - self, - base_path: Path, - ) -> Path: - """ - Return the path to the run dir or demultiplex dir for the given flow cell name - depending on the base_path input. - """ - flow_cell_path = next(base_path.rglob(pattern=f"*{self.flow_cell_name}"), None) - if flow_cell_path and flow_cell_path.exists(): - return flow_cell_path - else: - LOG.warning( - f"DeleteDemuxAPI: Path {flow_cell_path} not found for {self.flow_cell_name}" - ) - - @property - def status_db_presence(self) -> bool: - """Update about the presence of given flow cell in status_db""" - return bool(self.status_db.get_flow_cell_by_name(flow_cell_name=self.flow_cell_name)) - - @staticmethod - def set_dry_run(dry_run: bool) -> bool: - """Set dry run flag for API""" - LOG.debug(f"DeleteDemuxAPI: Setting dry run mode to {dry_run}") - return dry_run - - def _set_samples_on_flow_cell(self) -> None: - """Set a list of samples related to a flow cell in status-db.""" - flow_cell = self.status_db.get_flow_cell_by_name(flow_cell_name=self.flow_cell_name) - self.samples_on_flow_cell: list[Sample] = flow_cell.samples - - def active_samples_on_flow_cell(self) -> list[str] | None: - """Check if there are any active cases related to samples of a flow cell.""" - active_samples_on_flow_cell: list[str] = [ - sample.internal_id - for sample in self.samples_on_flow_cell - if self.status_db.has_active_cases_for_sample(internal_id=sample.internal_id) - ] - if active_samples_on_flow_cell: - return active_samples_on_flow_cell - - def _delete_sample_sheet_housekeeper(self) -> None: - """Delete the presence of all sample sheets related to a flow cell in Housekeeper.""" - sample_sheet_files: list[File] = self.housekeeper_api.get_sample_sheets_from_latest_version( - self.flow_cell_name - ) - if sample_sheet_files: - for file in sample_sheet_files: - self.housekeeper_api.delete_file(file_id=file.id) - LOG.info(f"DeleteDemuxAPI-Housekeeper: Deleted {file.path} from housekeeper") - else: - LOG.info( - f"DeleteDemuxAPI-Housekeeper: No sample sheets found with tag: {self.flow_cell_name}" - ) - - def _delete_files_if_related_in_housekeeper_by_tag(self, sample: Sample, tags: list[str]): - """Delete any existing fastq related to sample""" - - housekeeper_files: Iterable[File] = self.housekeeper_api.files( - bundle=sample.internal_id, tags=tags - ) - if not housekeeper_files: - LOG.info(f"Could not find {tags} for {sample.internal_id}") - else: - for housekeeper_file in housekeeper_files: - self.housekeeper_api.delete_file(file_id=housekeeper_file.id) - - def _delete_fastq_and_spring_housekeeper(self) -> None: - """Delete the presence of any spring/fastq files in Housekeeper related to samples on the flow cell.""" - - tag_combinations: list[list[str]] = [ - [SequencingFileTag.FASTQ.value, self.flow_cell_name], - [SequencingFileTag.SPRING.value, self.flow_cell_name], - [SequencingFileTag.SPRING_METADATA.value, self.flow_cell_name], - ] - for tags, sample in itertools.product(tag_combinations, self.samples_on_flow_cell): - self._delete_files_if_related_in_housekeeper_by_tag(sample=sample, tags=tags) - - def delete_flow_cell_housekeeper(self) -> None: - """Delete any presence of a flow cell in housekeeper. Including Sample sheets AND fastq-files""" - - if self.dry_run: - LOG.info( - f"DeleteDemuxAPI-Housekeeper: Would delete sample sheet files with tag {self.flow_cell_name}" - ) - LOG.info( - f"DeleteDemuxAPI-Housekeeper: Would delete fastq and spring files related to flow cell {self.flow_cell_name}" - ) - else: - LOG.info( - f"DeleteDemuxAPI-Housekeeper: Deleting sample sheet files with tag {self.flow_cell_name}" - ) - self._delete_sample_sheet_housekeeper() - LOG.info( - f"DeleteDemuxAPI-Housekeeper: Deleting fastq and spring files related to flowcell {self.flow_cell_name}" - ) - self._delete_fastq_and_spring_housekeeper() - - def delete_flow_cell_in_status_db(self) -> None: - """Delete a flow cell in Status db.""" - if self.dry_run: - LOG.info(f"DeleteDemuxAPI-StatusDB: Would remove {self.flow_cell_name}") - else: - self.status_db.delete_flow_cell(flow_cell_id=self.flow_cell_name) - LOG.info(f"DeleteDemuxAPI-StatusDB: Deleted flowcell {self.flow_cell_name}") - - def delete_flow_cell_sample_lane_sequencing_metrics(self) -> None: - if self.dry_run: - LOG.info( - f"Would delete entries for Flow Cell: {self.flow_cell_name} in the Sample Lane Sequencing Metrics table" - ) - else: - LOG.info( - f"Delete entries for Flow Cell: {self.flow_cell_name} in the Sample Lane Sequencing Metrics table" - ) - self.status_db.delete_flow_cell_entries_in_sample_lane_sequencing_metrics( - flow_cell_name=self.flow_cell_name - ) - - def _delete_demultiplexing_dir_hasta(self, demultiplexing_dir: bool) -> None: - """delete demultiplexing directory on server.""" - if demultiplexing_dir and self.demultiplexing_out_path.exists(): - LOG.info( - f"DeleteDemuxAPI-Hasta: Removing flow cell demultiplexing directory: {self.demultiplexing_out_path}" - ) - shutil.rmtree(self.demultiplexing_out_path, ignore_errors=False) - else: - LOG.info( - f"DeleteDemuxAPI-Hasta: No demultiplexing directory found for {self.flow_cell_name}" - ) - - def _delete_run_dir_hasta(self, run_dir: bool) -> None: - """delete flow cell run directory on server.""" - if run_dir and self.run_path.exists(): - LOG.info(f"DeleteDemuxAPI-Hasta: Removing flow cell run directory: {self.run_path}") - shutil.rmtree(path=self.run_path, ignore_errors=False) - else: - LOG.info(f"DeleteDemuxAPI-Hasta: No run directory found for {self.flow_cell_name}") - - def delete_flow_cell_hasta( - self, - demultiplexing_dir: bool, - run_dir: bool, - ) -> None: - """Delete a flow cells presence on the server, if flow cell is removed from demultiplexing dir and run - dir set status to removed.""" - if self.dry_run: - LOG.info( - f"DeleteDemuxAPI-Hasta: Would have removed the following directory: {self.demultiplexing_out_path}\n" - f"DeleteDemuxAPI-Hasta: Would have removed the following directory: {self.run_path}" - ) - return - self.set_flow_cell_status_statusdb_to_removed( - demultiplexing_dir=demultiplexing_dir, - run_dir=run_dir, - status_db=self.status_db_presence, - ) - self._delete_demultiplexing_dir_hasta(demultiplexing_dir=demultiplexing_dir) - self._delete_run_dir_hasta(run_dir=run_dir) - - def set_flow_cell_status_statusdb_to_removed( - self, demultiplexing_dir: bool, run_dir: bool, status_db: bool - ) -> None: - if demultiplexing_dir and run_dir and status_db: - flow_cell_obj: Flowcell = self.status_db.get_flow_cell_by_name( - flow_cell_name=self.flow_cell_name - ) - flow_cell_obj.status = "removed" - self.status_db.session.commit() - - def delete_demux_init_files(self): - """Delete previous traces of slurm job ids.""" - slurm_job_id_file_path: Path = Path(self.run_path, "slurm_job_ids.yaml") - demux_script_file_path: Path = Path(self.run_path, "demux-novaseq.sh") - try: - error_log_path, log_path = glob( - f"{self.run_path}/{self.flow_cell_name}_demultiplex.std*" - ) - demux_init_files: list[Path] = [ - slurm_job_id_file_path, - demux_script_file_path, - Path(error_log_path), - Path(log_path), - ] - except ValueError: - LOG.info(f"DeleteDemuxAPI-Init-files: No demultiplexing logs found in: {self.run_path}") - demux_init_files: list[Path] = [ - slurm_job_id_file_path, - demux_script_file_path, - ] - if self.dry_run: - LOG.info(f"DeleteDemuxAPI-Init-files: Would have removed {demux_init_files}") - for init_file in demux_init_files: - if init_file.is_file(): - LOG.info(f"DeleteDemuxAPI: Removing {init_file}") - init_file.unlink() - else: - LOG.info(f"DeleteDemuxAPI: Did not find {init_file}") - - def check_active_samples(self) -> None: - """Check, and raise, if there are active samples on a flow cell.""" - if self.status_db_presence: - self._set_samples_on_flow_cell() - active_samples_on_flow_cell: list[str] = self.active_samples_on_flow_cell() - if active_samples_on_flow_cell: - LOG.warning( - f"There are active analyses using data from this flowcell.\n" - f"Before restarting the demultiplexing process - please cancel the ongoing analyses of the " - f"following sample(s): \n{active_samples_on_flow_cell}" - ) - raise DeleteDemuxError - - def delete_flow_cell( - self, - demultiplexing_dir: bool, - run_dir: bool, - housekeeper: bool, - sample_lane_sequencing_metrics: bool, - init_files: bool, - status_db: bool, - ) -> None: - """Master command to delete the presence of a flow cell in all services.""" - self.check_active_samples() - if status_db: - self.delete_flow_cell_hasta( - demultiplexing_dir=True, - run_dir=True, - ) - self.delete_flow_cell_housekeeper() - self.delete_flow_cell_in_status_db() - - if demultiplexing_dir or run_dir: - self.delete_flow_cell_hasta( - demultiplexing_dir=demultiplexing_dir, - run_dir=run_dir, - ) - if init_files and not run_dir: - self.delete_demux_init_files() - if housekeeper: - self.delete_flow_cell_housekeeper() - if sample_lane_sequencing_metrics: - self.delete_flow_cell_sample_lane_sequencing_metrics() diff --git a/cg/meta/demultiplex/demux_post_processing.py b/cg/meta/demultiplex/demux_post_processing.py index 6d3baa14e5..d94ce117a2 100644 --- a/cg/meta/demultiplex/demux_post_processing.py +++ b/cg/meta/demultiplex/demux_post_processing.py @@ -5,9 +5,11 @@ from cg.apps.housekeeper.hk import HousekeeperAPI from cg.exc import FlowCellError, MissingFilesError from cg.meta.demultiplex.housekeeper_storage_functions import ( + delete_sequencing_data_from_housekeeper, store_flow_cell_data_in_housekeeper, ) from cg.meta.demultiplex.status_db_storage_functions import ( + delete_sequencing_metrics_from_statusdb, store_flow_cell_data_in_status_db, store_sample_data_in_status_db, store_sequencing_metrics_in_status_db, @@ -25,6 +27,7 @@ class DemuxPostProcessingAPI: """Post demultiplexing API class.""" def __init__(self, config: CGConfig) -> None: + self.config: CGConfig = config self.flow_cells_dir: Path = Path(config.flow_cells_dir) self.demultiplexed_runs_dir: Path = Path(config.demultiplexed_flow_cells_dir) self.status_db: Store = config.status_db @@ -82,6 +85,8 @@ def finish_flow_cell( LOG.warning(f"Flow cell {flow_cell_directory_name} will be skipped: {e}") return + self.delete_flow_cell_data(flow_cell.id) + try: self.store_flow_cell_data(flow_cell) except Exception as e: @@ -125,3 +130,8 @@ def get_all_demultiplexed_flow_cell_dirs(self) -> list[Path]: LOG.debug(f"Found directory {flow_cell_dir}") demultiplex_flow_cells.append(flow_cell_dir) return demultiplex_flow_cells + + def delete_flow_cell_data(self, flow_cell_id: str) -> None: + """Delete flow cell data from status db and housekeeper.""" + delete_sequencing_metrics_from_statusdb(flow_cell_id=flow_cell_id, store=self.status_db) + delete_sequencing_data_from_housekeeper(flow_cell_id=flow_cell_id, hk_api=self.hk_api) diff --git a/cg/meta/demultiplex/housekeeper_storage_functions.py b/cg/meta/demultiplex/housekeeper_storage_functions.py index 5e32efd256..7ea3580c1a 100644 --- a/cg/meta/demultiplex/housekeeper_storage_functions.py +++ b/cg/meta/demultiplex/housekeeper_storage_functions.py @@ -1,6 +1,9 @@ """Functions interacting with housekeeper in the DemuxPostProcessingAPI.""" import logging from pathlib import Path +from typing import Iterable + +from housekeeper.store.models import File from cg.apps.housekeeper.hk import HousekeeperAPI from cg.constants.housekeeper_tags import SequencingFileTag @@ -170,3 +173,16 @@ def add_sample_sheet_path_to_housekeeper( LOG.error( f"Sample sheet for flow cell {flow_cell_name} in {flow_cell_directory} was not found, error: {e}" ) + + +def delete_sequencing_data_from_housekeeper(flow_cell_id: str, hk_api: HousekeeperAPI) -> None: + """Delete FASTQ, SPRING and metadata files associated with a flow cell from Housekeeper.""" + tag_combinations: list[set[str]] = [ + {SequencingFileTag.FASTQ, flow_cell_id}, + {SequencingFileTag.SPRING, flow_cell_id}, + {SequencingFileTag.SPRING_METADATA, flow_cell_id}, + ] + for tags in tag_combinations: + housekeeper_files: Iterable[File] = hk_api.files(tags=tags) + for housekeeper_file in housekeeper_files: + hk_api.delete_file(file_id=housekeeper_file.id) diff --git a/cg/meta/demultiplex/status_db_storage_functions.py b/cg/meta/demultiplex/status_db_storage_functions.py index 85581d9362..8d483974a6 100644 --- a/cg/meta/demultiplex/status_db_storage_functions.py +++ b/cg/meta/demultiplex/status_db_storage_functions.py @@ -158,3 +158,12 @@ def update_sample_sequencing_date(sample: Sample, sequenced_at: datetime) -> Non if not sample.last_sequenced_at or sample.last_sequenced_at < sequenced_at: LOG.debug(f"Updating sample {sample.internal_id} with new sequencing date .") sample.last_sequenced_at = sequenced_at + + +def delete_sequencing_metrics_from_statusdb(flow_cell_id: str, store: Store) -> None: + sequencing_metrics: list[ + SampleLaneSequencingMetrics + ] = store.get_sample_lane_sequencing_metrics_by_flow_cell_name(flow_cell_id) + for metric in sequencing_metrics: + store.session.delete(metric) + store.session.commit() diff --git a/tests/cli/demultiplex/test_demultiplex_flowcell.py b/tests/cli/demultiplex/test_demultiplex_flowcell.py index 000e1e0f28..afac9aee7d 100644 --- a/tests/cli/demultiplex/test_demultiplex_flowcell.py +++ b/tests/cli/demultiplex/test_demultiplex_flowcell.py @@ -5,11 +5,7 @@ from click import testing from cg.apps.demultiplex.demultiplex_api import DemultiplexingAPI -from cg.cli.demultiplex.demux import ( - delete_flow_cell, - demultiplex_all, - demultiplex_flow_cell, -) +from cg.cli.demultiplex.demux import demultiplex_all, demultiplex_flow_cell from cg.constants.demultiplexing import DemultiplexingDirsAndFiles from cg.meta.demultiplex.housekeeper_storage_functions import ( add_sample_sheet_path_to_housekeeper, @@ -216,61 +212,3 @@ def test_is_demultiplexing_complete(tmp_flow_cell_directory_bcl2fastq: Path): # THEN the property should return true assert flow_cell.is_demultiplexing_complete - - -def test_delete_flow_cell_dry_run_status_db( - cli_runner: testing.CliRunner, - tmp_flow_cell_directory_bcl2fastq: Path, - demultiplex_context: CGConfig, - tmp_flow_cell_demux_base_path: Path, - tmp_flow_cell_run_base_path: Path, - bcl2fastq_flow_cell_id: str, - caplog, -): - """Test if logic work - call all true if status_db passed.""" - caplog.set_level(logging.DEBUG) - - demultiplex_context.demultiplex_api.flow_cells_dir = tmp_flow_cell_run_base_path - demultiplex_context.demultiplex_api.demultiplexed_runs_dir = tmp_flow_cell_demux_base_path - Path(tmp_flow_cell_run_base_path, f"some_prefix_1100_{bcl2fastq_flow_cell_id}").mkdir( - parents=True, exist_ok=True - ) - Path(tmp_flow_cell_demux_base_path, f"some_prefix_1100_{bcl2fastq_flow_cell_id}").mkdir( - parents=True, exist_ok=True - ) - # GIVEN a flow cell to be deleted - assert bcl2fastq_flow_cell_id in tmp_flow_cell_directory_bcl2fastq.name - - # WHEN deleting a flowcell from status db in dry run mode - result: testing.Result = cli_runner.invoke( - delete_flow_cell, - [ - "-f", - bcl2fastq_flow_cell_id, - "--status-db", - "--dry-run", - "--yes", - ], - obj=demultiplex_context, - ) - - # THEN the code should be executed successfully - assert result.exit_code == 0 - - # THEN it should be notified that it was going to remove all but init-files - assert ( - f"DeleteDemuxAPI-Housekeeper: Would delete sample sheet files with tag {bcl2fastq_flow_cell_id}" - in caplog.text - ) - assert ( - f"DeleteDemuxAPI-Housekeeper: Would delete fastq and spring files related to flow cell {bcl2fastq_flow_cell_id}" - in caplog.text - ) - assert f"DeleteDemuxAPI-StatusDB: Would remove {bcl2fastq_flow_cell_id}" in caplog.text - - assert ( - "DeleteDemuxAPI-Hasta: Would have removed the following directory: " - f"{demultiplex_context.demultiplex_api.demultiplexed_runs_dir / Path(f'some_prefix_1100_{bcl2fastq_flow_cell_id}')}\n" - f"DeleteDemuxAPI-Hasta: Would have removed the following directory: {demultiplex_context.demultiplex_api.flow_cells_dir / Path(f'some_prefix_1100_{bcl2fastq_flow_cell_id}')}" - ) in caplog.text - assert "DeleteDemuxAPI-Init-files: Would have removed" not in caplog.text diff --git a/tests/meta/demultiplex/conftest.py b/tests/meta/demultiplex/conftest.py index 83187c1fa6..03babd4f0d 100644 --- a/tests/meta/demultiplex/conftest.py +++ b/tests/meta/demultiplex/conftest.py @@ -8,7 +8,6 @@ from cg.apps.housekeeper.hk import HousekeeperAPI from cg.constants.demultiplexing import DemultiplexingDirsAndFiles -from cg.meta.demultiplex.delete_demultiplex_api import DeleteDemuxAPI from cg.meta.demultiplex.demux_post_processing import DemuxPostProcessingAPI from cg.meta.demultiplex.housekeeper_storage_functions import ( add_sample_sheet_path_to_housekeeper, @@ -206,119 +205,6 @@ def flow_cell_name_housekeeper_api( return flow_cell_housekeeper_api -@pytest.fixture(name="populated_delete_demux_context") -def populated_delete_demux_context( - cg_context: CGConfig, - flow_cell_name_housekeeper_api: HousekeeperAPI, - populated_flow_cell_store: Store, -) -> CGConfig: - """Return a populated context to remove flow cells from using the DeleteDemuxAPI.""" - populated_delete_demux_context = cg_context - populated_delete_demux_context.status_db_ = populated_flow_cell_store - populated_delete_demux_context.housekeeper_api_ = flow_cell_name_housekeeper_api - return populated_delete_demux_context - - -@pytest.fixture(name="populated_sample_lane_seq_demux_context") -def populated_sample_lane_seq_demux_context( - cg_context: CGConfig, - flow_cell_name_housekeeper_api: HousekeeperAPI, - store_with_sequencing_metrics: Store, -) -> CGConfig: - """Return a populated context to remove flow cells from using the DeleteDemuxAPI.""" - populated_wipe_demux_context = cg_context - populated_wipe_demux_context.status_db_ = store_with_sequencing_metrics - populated_wipe_demux_context.housekeeper_api_ = flow_cell_name_housekeeper_api - return populated_wipe_demux_context - - -@pytest.fixture(name="active_delete_demux_context") -def active_delete_demux_context( - cg_context: CGConfig, active_flow_cell_store: Store, tmp_flow_cell_run_base_path: Path -) -> CGConfig: - """Return a populated context to remove flow cells from using the DeleteDemuxAPI.""" - active_delete_demux_context = cg_context - active_delete_demux_context.status_db_ = active_flow_cell_store - active_delete_demux_context.demultiplex_api.flow_cells_dir = tmp_flow_cell_run_base_path - active_delete_demux_context.demultiplex_api.demultiplexed_runs_dir = tmp_flow_cell_run_base_path - return active_delete_demux_context - - -@pytest.fixture(name="populated_delete_demultiplex_api") -def populated_delete_demultiplex_api( - populated_delete_demux_context: CGConfig, - bcl2fastq_flow_cell_id: str, - tmp_flow_cell_run_base_path: Path, - tmp_flow_cell_demux_base_path: Path, -) -> DeleteDemuxAPI: - """Return an initialized populated DeleteDemuxAPI.""" - populated_delete_demux_context.demultiplex_api.flow_cells_dir = tmp_flow_cell_run_base_path - populated_delete_demux_context.demultiplex_api.demultiplexed_runs_dir = ( - tmp_flow_cell_demux_base_path - ) - Path(tmp_flow_cell_run_base_path, f"some_prefix_1100_{bcl2fastq_flow_cell_id}").mkdir( - parents=True, exist_ok=True - ) - Path(tmp_flow_cell_demux_base_path, f"some_prefix_1100_{bcl2fastq_flow_cell_id}").mkdir( - parents=True, exist_ok=True - ) - return DeleteDemuxAPI( - config=populated_delete_demux_context, - flow_cell_name=bcl2fastq_flow_cell_id, - dry_run=False, - ) - - -@pytest.fixture(name="populated_sample_lane_sequencing_metrics_demultiplex_api") -def populated_sample_lane_sequencing_metrics_demultiplex_api( - populated_sample_lane_seq_demux_context: CGConfig, bcl2fastq_flow_cell_id -) -> DeleteDemuxAPI: - """Return an initialized populated DeleteDemuxAPI.""" - return DeleteDemuxAPI( - config=populated_sample_lane_seq_demux_context, - dry_run=False, - flow_cell_name=bcl2fastq_flow_cell_id, - ) - - -@pytest.fixture(name="active_delete_demultiplex_api") -def active_delete_demultiplex_api( - active_delete_demux_context: CGConfig, - bcl2fastq_flow_cell_id: str, - tmp_flow_cell_run_base_path: Path, -) -> DeleteDemuxAPI: - """Return an instantiated DeleteDemuxAPI with active samples on a flow cell.""" - active_delete_demux_context.demultiplex_api.flow_cells_dir = tmp_flow_cell_run_base_path - active_delete_demux_context.demultiplex_api.demultiplexed_runs_dir = tmp_flow_cell_run_base_path - Path(tmp_flow_cell_run_base_path, f"some_prefix_1100_{bcl2fastq_flow_cell_id}").mkdir( - parents=True, exist_ok=True - ) - return DeleteDemuxAPI( - config=active_delete_demux_context, - flow_cell_name=bcl2fastq_flow_cell_id, - dry_run=False, - ) - - -@pytest.fixture(name="delete_demultiplex_api") -def delete_demultiplex_api( - cg_context: CGConfig, - bcl2fastq_flow_cell_id: str, - tmp_flow_cell_run_base_path: Path, -) -> DeleteDemuxAPI: - """Return an initialized DeleteDemuxAPI.""" - cg_context.demultiplex_api.flow_cells_dir = tmp_flow_cell_run_base_path - cg_context.demultiplex_api.demultiplexed_runs_dir = tmp_flow_cell_run_base_path - Path(tmp_flow_cell_run_base_path, f"some_prefix_1100_{bcl2fastq_flow_cell_id}").mkdir( - parents=True, exist_ok=True - ) - return DeleteDemuxAPI( - config=cg_context, - dry_run=False, - flow_cell_name=bcl2fastq_flow_cell_id, - ) - - @pytest.fixture(scope="session") def flow_cell_name_demultiplexed_with_bcl_convert() -> str: return "HY7FFDRX2" @@ -353,29 +239,6 @@ def flow_cell_name_demultiplexed_with_bcl_convert_on_sequencer() -> str: return "22522YLT3" -@pytest.fixture(name="demultiplexing_init_files") -def tmp_demultiplexing_init_files( - bcl2fastq_flow_cell_id: str, populated_delete_demultiplex_api: DeleteDemuxAPI -) -> list[Path]: - """Return a list of demultiplexing init files present in the run directory.""" - run_path: Path = populated_delete_demultiplex_api.run_path - slurm_job_id_file_path: Path = Path(run_path, "slurm_job_ids.yaml") - demux_script_file_path: Path = Path(run_path, "demux-novaseq.sh") - error_log_path: Path = Path(run_path, f"{bcl2fastq_flow_cell_id}_demultiplex.stderr") - log_path: Path = Path(run_path, f"{bcl2fastq_flow_cell_id}_demultiplex.stdout") - - demultiplexing_init_files: list[Path] = [ - slurm_job_id_file_path, - demux_script_file_path, - error_log_path, - log_path, - ] - - for file in demultiplexing_init_files: - file.touch() - return demultiplexing_init_files - - @pytest.fixture(scope="session") def bcl2fastq_folder_structure(tmp_path_factory, cg_dir: Path) -> Path: """Return a folder structure that resembles a bcl2fastq run folder.""" diff --git a/tests/meta/demultiplex/test_delete_demultiplex_api.py b/tests/meta/demultiplex/test_delete_demultiplex_api.py deleted file mode 100644 index 32da80caba..0000000000 --- a/tests/meta/demultiplex/test_delete_demultiplex_api.py +++ /dev/null @@ -1,383 +0,0 @@ -import logging -from pathlib import Path - -import pytest - -from cg.apps.housekeeper.hk import HousekeeperAPI -from cg.exc import DeleteDemuxError -from cg.meta.demultiplex.delete_demultiplex_api import DeleteDemuxAPI -from cg.models.cg_config import CGConfig -from cg.store.api import Store -from cg.store.models import Flowcell, Sample -from tests.store_helpers import StoreHelpers - - -def test_initiate_delete_demux_api( - caplog, - cg_context: CGConfig, - flow_cell_name: str, - tmp_flow_cell_run_base_path: Path, - bcl2fastq_flow_cell_id: str, -): - """Test to initialize the DeleteDemuxAPI""" - - caplog.set_level(logging.DEBUG) - - # GIVEN a correct config - config = cg_context - config.flow_cells_dir = tmp_flow_cell_run_base_path - config.demultiplexed_flow_cells_dir = tmp_flow_cell_run_base_path - Path(tmp_flow_cell_run_base_path, f"some_prefix_1100_{bcl2fastq_flow_cell_id}").mkdir( - parents=True, exist_ok=True - ) - # WHEN initializing the DeleteDemuxAPI - DeleteDemuxAPI( - config=config, - flow_cell_name=bcl2fastq_flow_cell_id, - dry_run=True, - ) - - # THEN the API should be correctly initialized - assert "DeleteDemuxAPI: API initiated" in caplog.text - - -def test_get_presence_status_status_db( - caplog, - helpers: StoreHelpers, - delete_demultiplex_api: DeleteDemuxAPI, - bcl2fastq_flow_cell_id: str, -): - """Test to see if the presence of a flow cell is detected in status-db.""" - caplog.set_level(logging.INFO) - # GIVEN DeleteDemuxAPI objects, one with amd one without a flowcell in status-db - delete_demux_api: DeleteDemuxAPI = delete_demultiplex_api - - # WHEN the flowcell name is parsed and fetching the presence of a flowcell in either context - empty_presence: bool = delete_demux_api.status_db_presence - - # THEN there should be an appropriate presence in both cases - assert not empty_presence - - # WHEN adding a flowcell into the statusdb and checking its updated presence - helpers.add_flow_cell( - store=delete_demux_api.status_db, - flow_cell_name=bcl2fastq_flow_cell_id, - sequencer_type="novaseq", - ) - populated_presence: bool = delete_demux_api.status_db_presence - - # THEN the presence should be updated - assert populated_presence - - -def test_set_dry_run_delete_demux_api( - caplog, - cg_context: CGConfig, - bcl2fastq_flow_cell_id: str, - tmp_flow_cell_run_base_path: Path, -): - """Test to test function to set the API to run in dry run mode""" - - caplog.set_level(logging.DEBUG) - cg_context.demultiplex_api.flow_cells_dir = tmp_flow_cell_run_base_path - cg_context.demultiplex_api.demultiplexed_runs_dir = tmp_flow_cell_run_base_path - Path(tmp_flow_cell_run_base_path, f"some_prefix_1100_{bcl2fastq_flow_cell_id}").mkdir( - parents=True, exist_ok=True - ) - # WHEN setting the dry_run mode on a DeleteDemuxAPI - delete_demultiplex_api: DeleteDemuxAPI = DeleteDemuxAPI( - config=cg_context, - dry_run=True, - flow_cell_name=bcl2fastq_flow_cell_id, - ) - - # THEN the dry run parameter should be set to True and it should be logged - assert delete_demultiplex_api.dry_run - assert "DeleteDemuxAPI: Setting dry run mode to True" in caplog.text - - -def test_no_active_samples_on_flow_cell( - populated_delete_demultiplex_api: DeleteDemuxAPI, bcl2fastq_flow_cell_id: str -): - """Test if the function to find no active samples works correctly.""" - - # GIVEN a flow cell with no active samples related to it - store: Store = populated_delete_demultiplex_api.status_db - flow_cell = store.get_flow_cell_by_name(flow_cell_name=bcl2fastq_flow_cell_id) - samples_on_flow_cell: list[Sample] = flow_cell.samples - - assert samples_on_flow_cell - for sample in samples_on_flow_cell: - active: bool = store.has_active_cases_for_sample(internal_id=sample.internal_id) - assert not active - - # WHEN checking for active samples on flowcell - populated_delete_demultiplex_api._set_samples_on_flow_cell() - active_samples_on_flow_cell: list[ - str - ] | None = populated_delete_demultiplex_api.active_samples_on_flow_cell() - - # THEN the no samples on the flowcell should be found active - assert not active_samples_on_flow_cell - - -def test_active_samples_on_flow_cell( - active_flow_cell_store: Store, - bcl2fastq_flow_cell_id: str, - active_delete_demultiplex_api: DeleteDemuxAPI, -): - """Test if the function to find active samples works correctly.""" - # GIVEN a flow cell with active samples related to it - store: Store = active_flow_cell_store - flow_cell = store.get_flow_cell_by_name(flow_cell_name=bcl2fastq_flow_cell_id) - samples_on_flow_cell: list[Sample] = flow_cell.samples - - assert samples_on_flow_cell - for sample in samples_on_flow_cell: - active: bool = store.has_active_cases_for_sample(internal_id=sample.internal_id) - assert active - - # WHEN checking for active samples on flowcell - active_delete_demultiplex_api._set_samples_on_flow_cell() - active_samples_on_flow_cell: list[ - str - ] | None = active_delete_demultiplex_api.active_samples_on_flow_cell() - - # THEN there should be active samples found - assert any(sample.internal_id in active_samples_on_flow_cell for sample in samples_on_flow_cell) - - -def test_check_active_sample(active_delete_demultiplex_api: DeleteDemuxAPI): - """Test that proper exception is raised when active samples are identified""" - - # GIVEN a DeleteDemuxAPI and a store with active samples related to it - - delete_demux_api: DeleteDemuxAPI = active_delete_demultiplex_api - delete_demux_api.set_dry_run(dry_run=False) - - # WHEN checking if there are active samples on flowcell to be deleted - - with pytest.raises(DeleteDemuxError): - # THEN the proper error should be raised - delete_demux_api.check_active_samples() - - -def test_delete_flow_cell_housekeeper_only_sample_level( - caplog, - cg_context: CGConfig, - tmp_flow_cell_run_base_path: Path, - bcl2fastq_flow_cell_id: str, - populated_flow_cell_store: Store, - sample_level_housekeeper_api: HousekeeperAPI, -): - """Test removing fastqs from Housekeeper when there are only files on sample level - (not on flow cell name). - """ - - caplog.set_level(logging.INFO) - - cg_context.demultiplex_api.flow_cells_dir = tmp_flow_cell_run_base_path - cg_context.demultiplex_api.demultiplexed_runs_dir = tmp_flow_cell_run_base_path - Path(tmp_flow_cell_run_base_path, f"some_prefix_1100_{bcl2fastq_flow_cell_id}").mkdir( - parents=True, exist_ok=True - ) - # GIVEN a DeleteDemuxAPI with a HousekeeperAPI with no files with flow cell name as a tag - - delete_demultiplex_api: DeleteDemuxAPI = DeleteDemuxAPI( - config=cg_context, - dry_run=False, - flow_cell_name=bcl2fastq_flow_cell_id, - ) - delete_demultiplex_api.housekeeper_api = sample_level_housekeeper_api - delete_demultiplex_api._set_samples_on_flow_cell() - - # WHEN wiping files in Housekeeper - - delete_demultiplex_api.delete_flow_cell_housekeeper() - - # THEN you should be notified that there are no files on flow cell names - - assert ( - f"Housekeeper: No sample sheets found with tag: {delete_demultiplex_api.flow_cell_name}" - in caplog.text - ) - - # THEN you should be notified that there were fastq files removed on sample level - assert "Deleting file" in caplog.text - - -def test_delete_flow_cell_housekeeper_flowcell_name( - caplog, - cg_context: CGConfig, - tmp_flow_cell_run_base_path: Path, - tmp_flow_cell_demux_base_path: Path, - flow_cell_name_housekeeper_api: HousekeeperAPI, - bcl2fastq_flow_cell_id: str, - populated_flow_cell_store: Store, - tmp_sample_sheet_path: Path, -): - """Test removing files from Housekeeper using flow cell name as a tag.""" - - caplog.set_level(logging.INFO) - cg_context.housekeeper_api_ = flow_cell_name_housekeeper_api - cg_context.status_db_ = populated_flow_cell_store - cg_context.demultiplex_api.flow_cells_dir = tmp_flow_cell_run_base_path - cg_context.demultiplex_api.demultiplexed_runs_dir = tmp_flow_cell_demux_base_path - Path(tmp_flow_cell_run_base_path, f"some_prefix_1100_{bcl2fastq_flow_cell_id}").mkdir( - parents=True, exist_ok=True - ) - Path(tmp_flow_cell_demux_base_path, f"some_prefix_1100_{bcl2fastq_flow_cell_id}").mkdir( - parents=True, exist_ok=True - ) - # GIVEN - - sample_sheet_file: Path = tmp_sample_sheet_path - - delete_demultiplex_api: DeleteDemuxAPI = DeleteDemuxAPI( - config=cg_context, - dry_run=False, - flow_cell_name=bcl2fastq_flow_cell_id, - ) - delete_demultiplex_api.housekeeper_api = flow_cell_name_housekeeper_api - delete_demultiplex_api._set_samples_on_flow_cell() - - # WHEN - - delete_demultiplex_api.delete_flow_cell_housekeeper() - - # THEN - - assert ( - f"Housekeeper: No files found with tag: {delete_demultiplex_api.flow_cell_name}" - not in caplog.text - ) - assert f"Deleted {sample_sheet_file.as_posix()} from housekeeper" in caplog.text - - # THEN you should be notified that there were fastq files removed on sample level - assert "Deleting file" in caplog.text - - -def test_delete_flow_cell_statusdb( - caplog, - bcl2fastq_flow_cell_id: str, - populated_delete_demultiplex_api: DeleteDemuxAPI, - populated_delete_demux_context: CGConfig, -): - """Test if function to remove flow cell objects from status db is working.""" - - caplog.set_level(logging.INFO) - - # GIVEN a context, with a status db filled with a flow cell object - delete_demux_api: DeleteDemuxAPI = populated_delete_demultiplex_api - delete_demux_api.set_dry_run(dry_run=False) - store = populated_delete_demux_context.status_db - flow_cell: Flowcell = store.get_flow_cell_by_name(flow_cell_name=bcl2fastq_flow_cell_id) - assert flow_cell - - # WHEN removing the flow cell from the database - delete_demux_api.delete_flow_cell_in_status_db() - - # THEN the user should be informed that the flow cell was removed - assert f"StatusDB: Deleted flowcell {delete_demux_api.flow_cell_name}" in caplog.text - - # AND the flow cell should no longer exist in status db - flow_cell: Flowcell = store.get_flow_cell_by_name(flow_cell_name=bcl2fastq_flow_cell_id) - assert not flow_cell - - -def test_delete_flow_cell_hasta( - caplog, - populated_delete_demultiplex_api: DeleteDemuxAPI, -): - """Test if function to remove files from the file system is working""" - - caplog.set_level(logging.INFO) - delete_demux_api: DeleteDemuxAPI = populated_delete_demultiplex_api - - flow_cell_obj: Flowcell = delete_demux_api.status_db.get_flow_cell_by_name( - delete_demux_api.flow_cell_name - ) - delete_demux_api.set_dry_run(dry_run=False) - - # GIVEN an existing demultiplexing and run directory of a flow cell, with a status "ondisk" - - assert delete_demux_api.demultiplexing_out_path.exists() - assert delete_demux_api.run_path.exists() - assert flow_cell_obj.status == "ondisk" - - # WHEN removing said files with the DeleteDemuxAPI - - delete_demux_api.delete_flow_cell_hasta( - demultiplexing_dir=True, - run_dir=True, - ) - - # THEN the demultiplexing directory should be removed - assert ( - f"DeleteDemuxAPI-Hasta: Removing flow cell demultiplexing directory: {delete_demux_api.demultiplexing_out_path}" - in caplog.text - ) - assert delete_demux_api.demultiplexing_out_path.exists() is False - - # THEN the run directory should be removed - assert ( - f"DeleteDemuxAPI-Hasta: Removing flow cell run directory: {delete_demux_api.run_path}" - in caplog.text - ) - assert delete_demux_api.run_path.exists() is False - - # THEN the status of the flow cell in statusdb should be set to removed - assert flow_cell_obj.status == "removed" - - -def test_delete_demultiplexing_init_files( - caplog, demultiplexing_init_files: list[Path], populated_delete_demultiplex_api: DeleteDemuxAPI -): - """Test function to remove demultiplexing init files from the filesystem""" - - # GIVEN a list of paths to existing demultiplexing init filesystem and a initiated DeleteDemuxAPI - delete_demux_api = populated_delete_demultiplex_api - delete_demux_api.set_dry_run(dry_run=False) - - assert all(init_file.exists() for init_file in demultiplexing_init_files) - - # WHEN wiping the existance of mentioned files - - delete_demux_api.delete_demux_init_files() - - # THEN the files should no longer exist - - assert not any(init_file.exists() for init_file in demultiplexing_init_files) - - -def test_delete_flow_cell_sample_lane_sequencing_metrics( - caplog, - populated_sample_lane_sequencing_metrics_demultiplex_api: DeleteDemuxAPI, - populated_sample_lane_seq_demux_context: CGConfig, - flow_cell_name: str, -): - """Test removing objects from sample lane sequencing metrics.""" - - caplog.set_level(logging.INFO) - - # GIVEN a delete demultiplex API with a sequencing metric object - - wipe_demux_api: DeleteDemuxAPI = populated_sample_lane_sequencing_metrics_demultiplex_api - wipe_demux_api.set_dry_run(dry_run=False) - assert wipe_demux_api.status_db.get_sample_lane_sequencing_metrics_by_flow_cell_name( - flow_cell_name=flow_cell_name - ) - - # WHEN wiping the existence of said object - - wipe_demux_api.delete_flow_cell_sample_lane_sequencing_metrics() - - # THEN the object should not exist anymore and the user should be notified - - assert not wipe_demux_api.status_db.get_sample_lane_sequencing_metrics_by_flow_cell_name( - flow_cell_name=flow_cell_name - ) - assert ( - f"Delete entries for Flow Cell: {flow_cell_name} in the Sample Lane Sequencing Metrics table" - in caplog.text - ) diff --git a/tests/meta/demultiplex/test_housekeeper_storage_functions.py b/tests/meta/demultiplex/test_housekeeper_storage_functions.py index be2f23206b..7d730a53fc 100644 --- a/tests/meta/demultiplex/test_housekeeper_storage_functions.py +++ b/tests/meta/demultiplex/test_housekeeper_storage_functions.py @@ -2,6 +2,7 @@ from pathlib import Path +import pytest from housekeeper.store.models import File from mock import MagicMock, call @@ -12,9 +13,11 @@ add_demux_logs_to_housekeeper, add_sample_fastq_files_to_housekeeper, add_sample_sheet_path_to_housekeeper, + delete_sequencing_data_from_housekeeper, ) from cg.models.cg_config import CGConfig from cg.models.flow_cell.flow_cell import FlowCellDirectoryData +from tests.store_helpers import StoreHelpers def test_add_bundle_and_version_if_non_existent(demultiplex_context: CGConfig): @@ -227,3 +230,74 @@ def test_store_fastq_path_in_housekeeper_correct_tags( file: File = populated_housekeeper_api.get_files(bundle=sample_id).first() expected_tags: set[str] = {SequencingFileTag.FASTQ.value, novaseq6000_flow_cell.id, sample_id} assert {tag.name for tag in file.tags} == expected_tags + + +@pytest.mark.parametrize( + "file_tag", + [SequencingFileTag.FASTQ, SequencingFileTag.SPRING, SequencingFileTag.SPRING_METADATA], +) +def test_delete_sequencing_data_from_housekeeper( + file_tag: str, + populated_housekeeper_api, + flow_cell_name: str, + tmp_path: Path, + helpers: StoreHelpers, +): + """Tests that each type of sequencing file is removed when deleting them from Housekeeper.""" + # GIVEN a sample with sequencing files from a flow cell + sample_id: str = "new_sample" + helpers.quick_hk_bundle( + store=populated_housekeeper_api, + bundle_name=sample_id, + files=[tmp_path], + tags=[[file_tag, flow_cell_name, sample_id]], + ) + assert populated_housekeeper_api.files(bundle=sample_id, tags={file_tag}).all() + + # WHEN deleting the sequencing data from housekeeper + delete_sequencing_data_from_housekeeper( + flow_cell_id=flow_cell_name, hk_api=populated_housekeeper_api + ) + + # THEN the sequencing data is deleted from housekeeper + assert not populated_housekeeper_api.files(bundle=sample_id, tags={file_tag}).all() + + +def test_delete_sequencing_data_from_housekeeper_two_flow_cells( + real_housekeeper_api: HousekeeperAPI, + flow_cell_name: str, + tmp_path: Path, + helpers: StoreHelpers, +): + """Tests that the delete_sequencing_data_from_housekeeper function deletes the correct files + from housekeeper when there are files from two flow cells.""" + # GIVEN a sample with sequencing files from two flow cells + sample_id: str = "new_sample" + second_file: Path = tmp_path.with_suffix(".2.fastq.gz") + second_file.touch() + second_flow_cell_name: str = "second-flow-cell" + helpers.quick_hk_bundle( + store=real_housekeeper_api, + bundle_name=sample_id, + files=[tmp_path, second_file], + tags=[ + [SequencingFileTag.FASTQ, flow_cell_name, sample_id], + [SequencingFileTag.FASTQ, second_flow_cell_name, sample_id], + ], + ) + assert ( + len(real_housekeeper_api.files(bundle=sample_id, tags={SequencingFileTag.FASTQ}).all()) == 2 + ) + + # WHEN deleting the sequencing data of one flow cell from housekeeper + delete_sequencing_data_from_housekeeper( + flow_cell_id=flow_cell_name, hk_api=real_housekeeper_api + ) + + # THEN the sequencing data the first flow cell is deleted from housekeeper + assert not real_housekeeper_api.files(bundle=sample_id, tags={flow_cell_name}).all() + + # THEN the sequencing data from the second flow cell should remain + remaining_file: File = real_housekeeper_api.files(bundle=sample_id).one() + assert remaining_file + assert remaining_file.path.endswith(second_file.name) diff --git a/tests/meta/demultiplex/test_status_db_storage_functions.py b/tests/meta/demultiplex/test_status_db_storage_functions.py index 926880ed3a..44598e5fa9 100644 --- a/tests/meta/demultiplex/test_status_db_storage_functions.py +++ b/tests/meta/demultiplex/test_status_db_storage_functions.py @@ -6,12 +6,14 @@ from cg.meta.demultiplex.status_db_storage_functions import ( add_samples_to_flow_cell_in_status_db, add_sequencing_metrics_to_statusdb, + delete_sequencing_metrics_from_statusdb, metric_has_sample_in_statusdb, update_sample_read_count, ) from cg.models.cg_config import CGConfig from cg.store import Store -from cg.store.models import Sample +from cg.store.models import Flowcell, Sample, SampleLaneSequencingMetrics +from tests.store_helpers import StoreHelpers def test_add_single_sequencing_metrics_entry_to_statusdb( @@ -94,3 +96,49 @@ def test_add_samples_to_flow_cell_in_status_db( flow_cell = store_with_sequencing_metrics.get_flow_cell_by_name(flow_cell_name=flow_cell_name) assert flow_cell.samples assert flow_cell.samples[0].internal_id == sample_id + + +def test_delete_sequencing_metrics_from_statusdb_existing_metrics( + store_with_sequencing_metrics: Store, flow_cell_name: str +): + # GIVEN a store with sequencing metrics + store = store_with_sequencing_metrics + + # GIVEN that the flow cell has sequencing metrics + metrics: list[ + SampleLaneSequencingMetrics + ] = store.get_sample_lane_sequencing_metrics_by_flow_cell_name(flow_cell_name) + assert metrics + + # WHEN deleting sequencing metrics from statusdb + delete_sequencing_metrics_from_statusdb(flow_cell_id=flow_cell_name, store=store) + + # THEN the sequencing metrics should be deleted from statusdb + assert not store.get_sample_lane_sequencing_metrics_by_flow_cell_name( + flow_cell_name=flow_cell_name + ) + + +def test_delete_sequencing_metrics_from_statusdb_no_metrics( + store_with_sequencing_metrics: Store, + helpers: StoreHelpers, +): + # GIVEN a store with sequencing metrics + store = store_with_sequencing_metrics + + # GIVEN a new flow cell with no sequencing metrics + flow_cell: Flowcell = helpers.add_flow_cell(store=store) + metrics: list[ + SampleLaneSequencingMetrics + ] = store.get_sample_lane_sequencing_metrics_by_flow_cell_name(flow_cell.name) + assert not metrics + + # WHEN deleting sequencing metrics from statusdb + delete_sequencing_metrics_from_statusdb(flow_cell_id=flow_cell.name, store=store) + + # THEN no errors should be raised + + # THEN the sequencing metrics are still not in statusdb + assert not store.get_sample_lane_sequencing_metrics_by_flow_cell_name( + flow_cell_name=flow_cell.name + ) diff --git a/tests/store_helpers.py b/tests/store_helpers.py index 0eb846cdfc..4aab6e23a6 100644 --- a/tests/store_helpers.py +++ b/tests/store_helpers.py @@ -1,6 +1,7 @@ """Utility functions to simply add test data in a cg store.""" import logging from datetime import datetime +from pathlib import Path from housekeeper.store.models import Bundle, Version @@ -60,6 +61,43 @@ def ensure_hk_bundle(store: HousekeeperAPI, bundle_data: dict, include: bool = F return _bundle + @staticmethod + def format_hk_bundle_dict( + bundle_name: str, files: list[Path], all_tags: list[list[str]] + ) -> dict: + """Creates the dict representation for a housekeeper bundle with necessary values set.""" + return { + "name": bundle_name, + "created_at": datetime.now(), + "expires_at": datetime.now(), + "files": [ + { + "path": file.as_posix(), + "tags": tags, + "archive": False, + } + for file, tags in zip(files, all_tags) + ], + } + + @staticmethod + def quick_hk_bundle( + bundle_name: str, files: list[Path], store: HousekeeperAPI, tags: list[list[str]] + ): + """Adds a bundle to housekeeper with the given files and tags. Returns the new bundle. + + Arguments: + bundle_name = The name of the bundle to be created. + files = A list of files to be added to the bundle. + store = The database instance where the bundle should be added. + tags = A list where each entry is the set of tags for the corresponding file. + The length of this list should be the same as the length of the files list. + """ + bundle_data: dict = StoreHelpers.format_hk_bundle_dict( + bundle_name=bundle_name, files=files, all_tags=tags + ) + return StoreHelpers.ensure_hk_bundle(store=store, bundle_data=bundle_data) + @staticmethod def ensure_hk_version(store: HousekeeperAPI, bundle_data: dict) -> Version: """Utility function to return existing or create an version for tests."""