From 0fc71b09c941445d163be0552c0a82733b5e5566 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Isak=20Ohlsson=20=C3=85ngnell?= <40887124+islean@users.noreply.github.com> Date: Wed, 10 Jan 2024 13:51:57 +0100 Subject: [PATCH] (Archiving) Do not compress archived files (#2814) (minor) ### Added - Method for checking if the Spring file in a CompressionData object is archived. ### Changed - Compression of Fastq -> Spring is not performed when Spring is archived. --- cg/apps/crunchy/crunchy.py | 17 +++-- cg/meta/compress/compress.py | 31 +++++---- tests/apps/crunchy/test_crunchy.py | 65 ++++++++----------- tests/conftest.py | 10 ++- .../meta/compress/test_compress_meta_fastq.py | 64 +++++++++++++----- 5 files changed, 110 insertions(+), 77 deletions(-) diff --git a/cg/apps/crunchy/crunchy.py b/cg/apps/crunchy/crunchy.py index 406515e0be..d0c2c10e04 100644 --- a/cg/apps/crunchy/crunchy.py +++ b/cg/apps/crunchy/crunchy.py @@ -64,25 +64,24 @@ def is_compression_pending(compression_obj: CompressionData) -> bool: def is_fastq_compression_possible(compression_obj: CompressionData) -> bool: """Check if FASTQ compression is possible. - There are three possible answers to this question: - - - Compression is running -> Compression NOT possible - - SPRING archive exists -> Compression NOT possible - - Data is external -> Compression NOT possible - - Not compressed and not running -> Compression IS possible + - Compression is running -> Compression NOT possible + - SPRING file exists on Hasta -> Compression NOT possible + - Data is external -> Compression NOT possible + - Not compressed and + not running -> Compression IS possible """ if CrunchyAPI.is_compression_pending(compression_obj): return False if compression_obj.spring_exists(): - LOG.info("SPRING file found") + LOG.debug("SPRING file found") return False if "external-data" in str(compression_obj.fastq_first): - LOG.info("File is external data and should not be compressed") + LOG.debug("File is external data and should not be compressed") return False - LOG.info("FASTQ compression is possible") + LOG.debug("FASTQ compression is possible") return True diff --git a/cg/meta/compress/compress.py b/cg/meta/compress/compress.py index c2674b9037..f941479e2c 100644 --- a/cg/meta/compress/compress.py +++ b/cg/meta/compress/compress.py @@ -14,7 +14,7 @@ from cg.constants import SequencingFileTag from cg.meta.backup.backup import SpringBackupAPI from cg.meta.compress import files -from cg.models import CompressionData, FileData +from cg.models import CompressionData from cg.store.models import Sample LOG = logging.getLogger(__name__) @@ -73,16 +73,11 @@ def compress_fastq(self, sample_id: str) -> bool: for run_name in sample_fastq: LOG.info(f"Check if compression possible for run {run_name}") compression: CompressionData = sample_fastq[run_name]["compression_data"] - if FileData.is_empty(compression.fastq_first): - LOG.warning(f"Fastq files are empty for {sample_id}: {compression.fastq_first}") - self.delete_fastq_housekeeper( - hk_fastq_first=sample_fastq[run_name]["hk_first"], - hk_fastq_second=sample_fastq[run_name]["hk_second"], - ) - all_ok = False - continue - - if not self.crunchy_api.is_fastq_compression_possible(compression_obj=compression): + is_compression_possible: bool = self._is_fastq_compression_possible( + compression=compression, + sample_id=sample_id, + ) + if not is_compression_possible: LOG.warning(f"FASTQ to SPRING not possible for {sample_id}, run {run_name}") all_ok = False continue @@ -93,6 +88,20 @@ def compress_fastq(self, sample_id: str) -> bool: self.crunchy_api.fastq_to_spring(compression_obj=compression, sample_id=sample_id) return all_ok + def _is_fastq_compression_possible(self, compression: CompressionData, sample_id: str) -> bool: + if self._is_spring_archived(compression): + LOG.debug(f"Found archived Spring file for {sample_id} - compression not possible") + return False + return self.crunchy_api.is_fastq_compression_possible(compression_obj=compression) + + def _is_spring_archived(self, compression_data: CompressionData) -> bool: + spring_file: File | None = self.hk_api.get_file_insensitive_path( + path=compression_data.spring_path + ) + if (not spring_file) or (not spring_file.archive): + return False + return bool(spring_file.archive.archived_at) + def decompress_spring(self, sample_id: str) -> bool: """Decompress SPRING archive for a sample. diff --git a/tests/apps/crunchy/test_crunchy.py b/tests/apps/crunchy/test_crunchy.py index c644a3d2bc..4a4c6e6d1d 100644 --- a/tests/apps/crunchy/test_crunchy.py +++ b/tests/apps/crunchy/test_crunchy.py @@ -56,7 +56,7 @@ def test_is_fastq_compression_possible( assert not spring_file.exists() # WHEN checking if SPRING compression is done - result = crunchy_api.is_fastq_compression_possible(compression_object) + result = crunchy_api.is_fastq_compression_possible(compression_obj=compression_object) # THEN result should be True assert result is True @@ -83,7 +83,7 @@ def test_is_fastq_compression_possible_compression_pending( assert not spring_file.exists() # WHEN checking if SPRING compression is done - result = crunchy_api.is_fastq_compression_possible(compression_object) + result = crunchy_api.is_fastq_compression_possible(compression_obj=compression_object) # THEN result should be False since the compression flag exists assert result is False @@ -107,7 +107,7 @@ def test_is_fastq_compression_possible_spring_exists( assert spring_file.exists() # WHEN checking if SPRING compression is done - result = crunchy_api.is_fastq_compression_possible(compression_object) + result = crunchy_api.is_fastq_compression_possible(compression_obj=compression_object) # THEN result should be False since the compression flag exists assert result is False @@ -116,7 +116,7 @@ def test_is_fastq_compression_possible_spring_exists( def test_is_compression_done( - crunchy_config: dict[str, dict[str, Any]], + real_crunchy_api, spring_metadata_file: Path, compression_object: CompressionData, caplog, @@ -124,14 +124,13 @@ def test_is_compression_done( """Test if compression is done when everything is correct""" caplog.set_level(logging.DEBUG) # GIVEN a crunchy-api, and FASTQ paths - crunchy_api = CrunchyAPI(crunchy_config) # GIVEN no SPRING file exists compression_object.spring_path.touch() assert spring_metadata_file == compression_object.spring_metadata_path assert spring_metadata_file.exists() # WHEN checking if SPRING compression is done - result = crunchy_api.is_fastq_compression_done(compression_object) + result = real_crunchy_api.is_fastq_compression_done(compression_object) # THEN result should be True assert result is True @@ -140,18 +139,17 @@ def test_is_compression_done( def test_is_compression_done_no_spring( - crunchy_config: dict[str, dict[str, Any]], compression_object: CompressionData, caplog + real_crunchy_api: CrunchyAPI, compression_object: CompressionData, caplog ): """Test if compression is done when no SPRING archive""" caplog.set_level(logging.DEBUG) # GIVEN a crunchy-api, and FASTQ paths - crunchy_api = CrunchyAPI(crunchy_config) # GIVEN no SPRING file exists spring_file = compression_object.spring_path assert not spring_file.exists() # WHEN checking if SPRING compression is done - result = crunchy_api.is_fastq_compression_done(compression_object) + result = real_crunchy_api.is_fastq_compression_done(compression_object) # THEN result should be false assert not result @@ -160,20 +158,19 @@ def test_is_compression_done_no_spring( def test_is_compression_done_no_flag_spring( - crunchy_config: dict[str, dict[str, Any]], compression_object: CompressionData, caplog + real_crunchy_api: CrunchyAPI, compression_object: CompressionData, caplog ): """Test if SPRING compression is done when no metadata file""" caplog.set_level(logging.DEBUG) # GIVEN a crunchy-api, and FASTQ paths - crunchy_api = CrunchyAPI(crunchy_config) # GIVEN a existing SPRING file compression_object.spring_path.touch() assert compression_object.spring_path.exists() - # GIVEN a non existing flag file + # GIVEN a non-existing flag file assert not compression_object.spring_metadata_path.exists() # WHEN checking if SPRING compression is done - result = crunchy_api.is_fastq_compression_done(compression_object) + result = real_crunchy_api.is_fastq_compression_done(compression_object) # THEN result should be false assert not result @@ -182,7 +179,7 @@ def test_is_compression_done_no_flag_spring( def test_is_compression_done_spring( - crunchy_config: dict[str, dict[str, Any]], + real_crunchy_api: CrunchyAPI, compression_object: CompressionData, spring_metadata_file: Path, caplog, @@ -190,7 +187,6 @@ def test_is_compression_done_spring( """Test if compression is done when SPRING files exists""" caplog.set_level(logging.DEBUG) # GIVEN a crunchy-api, and FASTQ paths - crunchy_api = CrunchyAPI(crunchy_config) # GIVEN a existing SPRING file compression_object.spring_path.touch() assert compression_object.spring_path.exists() @@ -199,7 +195,7 @@ def test_is_compression_done_spring( assert compression_object.spring_metadata_path.exists() # WHEN checking if SPRING compression is done - result = crunchy_api.is_fastq_compression_done(compression_object) + result = real_crunchy_api.is_fastq_compression_done(compression_object) # THEN result should be True assert result @@ -208,7 +204,7 @@ def test_is_compression_done_spring( def test_is_compression_done_spring_new_files( - crunchy_config: dict[str, dict[str, Any]], + real_crunchy_api: CrunchyAPI, compression_object: CompressionData, spring_metadata_file: Path, caplog, @@ -219,7 +215,6 @@ def test_is_compression_done_spring_new_files( """ caplog.set_level(logging.DEBUG) # GIVEN a crunchy-api, and FASTQ paths - crunchy_api = CrunchyAPI(crunchy_config) # GIVEN a existing SPRING file compression_object.spring_path.touch() assert compression_object.spring_path.exists() @@ -237,7 +232,7 @@ def test_is_compression_done_spring_new_files( assert "updated" in file_info # WHEN checking if SPRING compression is done - result = crunchy_api.is_fastq_compression_done(compression_object) + result = real_crunchy_api.is_fastq_compression_done(compression_object) # THEN result should be False since the updated date < 3 weeks assert result is False @@ -246,7 +241,7 @@ def test_is_compression_done_spring_new_files( def test_is_compression_done_spring_old_files( - crunchy_config: dict[str, dict[str, Any]], + real_crunchy_api: CrunchyAPI, compression_object: CompressionData, spring_metadata_file: Path, caplog, @@ -257,7 +252,6 @@ def test_is_compression_done_spring_old_files( """ caplog.set_level(logging.DEBUG) # GIVEN a crunchy-api, and FASTQ paths - crunchy_api = CrunchyAPI(crunchy_config) # GIVEN a existing SPRING file compression_object.spring_path.touch() assert compression_object.spring_path.exists() @@ -277,7 +271,7 @@ def test_is_compression_done_spring_old_files( ) # WHEN checking if SPRING compression is done - result = crunchy_api.is_fastq_compression_done(compression_object) + result = real_crunchy_api.is_fastq_compression_done(compression_object) # THEN result should be True since the updated date > 3 weeks assert result is True @@ -286,7 +280,7 @@ def test_is_compression_done_spring_old_files( def test_is_spring_decompression_possible_no_fastq( - crunchy_config: dict[str, dict[str, Any]], compression_object: CompressionData, caplog + real_crunchy_api: CrunchyAPI, compression_object: CompressionData, caplog ): """Test if decompression is possible when there are no FASTQ files @@ -294,7 +288,6 @@ def test_is_spring_decompression_possible_no_fastq( """ caplog.set_level(logging.DEBUG) # GIVEN a crunchy-api - crunchy_api = CrunchyAPI(crunchy_config) # GIVEN a existing SPRING file compression_object.spring_path.touch() assert compression_object.spring_path.exists() @@ -305,7 +298,7 @@ def test_is_spring_decompression_possible_no_fastq( assert not compression_object.fastq_second.exists() # WHEN checking if SPRING compression is done - result = crunchy_api.is_spring_decompression_possible(compression_object) + result = real_crunchy_api.is_spring_decompression_possible(compression_object) # THEN result should be True since there are no fastq files assert result is True @@ -314,7 +307,7 @@ def test_is_spring_decompression_possible_no_fastq( def test_is_spring_decompression_possible_no_spring( - crunchy_config: dict[str, dict[str, Any]], compression_object: CompressionData, caplog + real_crunchy_api: CrunchyAPI, compression_object: CompressionData, caplog ): """Test if decompression is possible when there are no SPRING archive @@ -322,10 +315,9 @@ def test_is_spring_decompression_possible_no_spring( """ caplog.set_level(logging.DEBUG) # GIVEN a crunchy-api, and FASTQ paths - crunchy_api = CrunchyAPI(crunchy_config) # WHEN checking if SPRING compression is done - result = crunchy_api.is_spring_decompression_possible(compression_object) + result = real_crunchy_api.is_spring_decompression_possible(compression_object) # THEN result should be False since there is no SPRING archive assert result is False @@ -334,7 +326,7 @@ def test_is_spring_decompression_possible_no_spring( def test_is_spring_decompression_possible_fastq( - crunchy_config: dict[str, dict[str, Any]], compression_object: CompressionData, caplog + real_crunchy_api: CrunchyAPI, compression_object: CompressionData, caplog ): """Test if decompression is possible when there are existing FASTQ files @@ -342,7 +334,6 @@ def test_is_spring_decompression_possible_fastq( """ caplog.set_level(logging.DEBUG) # GIVEN a crunchy-api, and FASTQ paths - crunchy_api = CrunchyAPI(crunchy_config) # GIVEN a existing SPRING file compression_object.spring_path.touch() # GIVEN that the FASTQ files exists @@ -350,7 +341,7 @@ def test_is_spring_decompression_possible_fastq( compression_object.fastq_second.touch() # WHEN checking if SPRING decompression is possible - result = crunchy_api.is_spring_decompression_possible(compression_object) + result = real_crunchy_api.is_spring_decompression_possible(compression_object) # THEN result should be False since the FASTQ files already exists assert result is False @@ -359,31 +350,29 @@ def test_is_spring_decompression_possible_fastq( def test_is_not_pending_when_no_flag_file( - crunchy_config: dict[str, dict[str, Any]], compression_object: CompressionData + real_crunchy_api: CrunchyAPI, compression_object: CompressionData ): """Test if SPRING compression is pending when no flag file""" # GIVEN a crunchy-api, and a FASTQ file - crunchy_api = CrunchyAPI(crunchy_config) - # GIVEN a non existing pending flag + # GIVEN a non-existing pending flag assert not compression_object.pending_path.exists() # WHEN checking if SPRING compression is ongoing - result = crunchy_api.is_compression_pending(compression_object) + result = real_crunchy_api.is_compression_pending(compression_object) # THEN result should be False since the pending flag is not there assert result is False -def test_is_pending(crunchy_config: dict[str, dict[str, Any]], compression_object: CompressionData): +def test_is_pending(real_crunchy_api: CrunchyAPI, compression_object: CompressionData): """Test if SPRING compression is pending when pending file exists""" # GIVEN a crunchy-api, and FASTQ files - crunchy_api = CrunchyAPI(crunchy_config) # GIVEN a existing pending flag compression_object.pending_path.touch() assert compression_object.pending_path.exists() # WHEN checking if SPRING compression is pending - result = crunchy_api.is_compression_pending(compression_object) + result = real_crunchy_api.is_compression_pending(compression_object) # THEN result should be True since the pending_path exists assert result is True diff --git a/tests/conftest.py b/tests/conftest.py index 7031d7c179..470cb447ad 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -14,12 +14,15 @@ from housekeeper.store.models import File, Version from requests import Response +from cg.apps.crunchy import CrunchyAPI from cg.apps.demultiplex.demultiplex_api import DemultiplexingAPI from cg.apps.demultiplex.sample_sheet.sample_models import ( FlowCellSampleBcl2Fastq, FlowCellSampleBCLConvert, ) -from cg.apps.demultiplex.sample_sheet.sample_sheet_creator import SampleSheetCreatorBCLConvert +from cg.apps.demultiplex.sample_sheet.sample_sheet_creator import ( + SampleSheetCreatorBCLConvert, +) from cg.apps.downsample.downsample import DownsampleAPI from cg.apps.gens import GensAPI from cg.apps.gt import GenotypeAPI @@ -381,6 +384,11 @@ def crunchy_config() -> dict[str, dict[str, Any]]: } +@pytest.fixture +def real_crunchy_api(crunchy_config) -> CrunchyAPI: + return CrunchyAPI(crunchy_config) + + @pytest.fixture def hk_config_dict(root_path: Path): """Housekeeper configs.""" diff --git a/tests/meta/compress/test_compress_meta_fastq.py b/tests/meta/compress/test_compress_meta_fastq.py index 1431fb6796..49fb08cf95 100644 --- a/tests/meta/compress/test_compress_meta_fastq.py +++ b/tests/meta/compress/test_compress_meta_fastq.py @@ -1,5 +1,8 @@ """Tests for FASTQ part of meta compress api""" import logging +from unittest import mock + +from cg.meta.compress import CompressAPI def test_compress_case_fastq_one_sample(populated_compress_fastq_api, sample, caplog): @@ -10,14 +13,15 @@ def test_compress_case_fastq_one_sample(populated_compress_fastq_api, sample, ca # GIVEN a populated compress api # WHEN Compressing the bam files for the case - res = compress_api.compress_fastq(sample) + with mock.patch.object(CompressAPI, "_is_spring_archived", return_value=False): + result = compress_api.compress_fastq(sample) - # THEN assert compression succeded - assert res is True - # THEN assert that the correct information is communicated - assert "Compressing" in caplog.text - # THEN assert that the correct information is communicated - assert "to SPRING format" in caplog.text + # THEN assert compression succeded + assert result is True + # THEN assert that the correct information is communicated + assert "Compressing" in caplog.text + # THEN assert that the correct information is communicated + assert "to SPRING format" in caplog.text def test_compress_fastq_compression_done( @@ -34,15 +38,16 @@ def test_compress_fastq_compression_done( compression_object.spring_path.touch() # WHEN Compressing the bam files for the case - res = compress_api.compress_fastq(sample) + with mock.patch.object(CompressAPI, "_is_spring_archived", return_value=False): + result = compress_api.compress_fastq(sample) - # THEN assert compression succeded - assert res is False - # THEN assert that the correct information is communicated - assert f"FASTQ to SPRING not possible for {sample}" in caplog.text + # THEN assert compression succeded + assert result is False + # THEN assert that the correct information is communicated + assert f"FASTQ to SPRING not possible for {sample}" in caplog.text -def test_compress_case_fastq_compression_pending( +def test_compress_sample_fastq_compression_pending( populated_compress_fastq_api, sample, compression_object, caplog ): """Test to compress all FASTQ files for a sample when compression is pending @@ -55,9 +60,32 @@ def test_compress_case_fastq_compression_pending( compression_object.pending_path.touch() # WHEN compressing the FASTQ files for the case - res = compress_api.compress_fastq(sample) + with mock.patch.object(CompressAPI, "_is_spring_archived", return_value=False): + result = compress_api.compress_fastq(sample) + + # THEN assert compression returns False + assert result is False + # THEN assert that the correct information is communicated + assert f"FASTQ to SPRING not possible for {sample}" in caplog.text + + +def test_compress_sample_fastq_archived_spring_file( + populated_compress_fastq_api, sample, compression_object, caplog +): + """Test to compress all FASTQ files for a sample when the Spring file is archived + + The program should not compress any files since the Spring file already exists + """ + caplog.set_level(logging.DEBUG) + compress_api = populated_compress_fastq_api + # GIVEN that the pending flag exists + compression_object.pending_path.touch() + + # WHEN compressing the FASTQ files for the case + with mock.patch.object(CompressAPI, "_is_spring_archived", return_value=True): + result = compress_api.compress_fastq(sample) - # THEN assert compression returns False - assert res is False - # THEN assert that the correct information is communicated - assert f"FASTQ to SPRING not possible for {sample}" in caplog.text + # THEN assert compression returns False + assert result is False + # THEN assert that the correct information is communicated + assert f"FASTQ to SPRING not possible for {sample}" in caplog.text