Skip to content

Commit

Permalink
(Archiving) Do not compress archived files (#2814) (minor)
Browse files Browse the repository at this point in the history
### Added

- Method for checking if the Spring file in a CompressionData object is archived.

### Changed

- Compression of Fastq -> Spring is not performed when Spring is archived.
  • Loading branch information
islean authored Jan 10, 2024
1 parent adae4a2 commit 0fc71b0
Show file tree
Hide file tree
Showing 5 changed files with 110 additions and 77 deletions.
17 changes: 8 additions & 9 deletions cg/apps/crunchy/crunchy.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,25 +64,24 @@ def is_compression_pending(compression_obj: CompressionData) -> bool:
def is_fastq_compression_possible(compression_obj: CompressionData) -> bool:
"""Check if FASTQ compression is possible.
There are three possible answers to this question:
- Compression is running -> Compression NOT possible
- SPRING archive exists -> Compression NOT possible
- Data is external -> Compression NOT possible
- Not compressed and not running -> Compression IS possible
- Compression is running -> Compression NOT possible
- SPRING file exists on Hasta -> Compression NOT possible
- Data is external -> Compression NOT possible
- Not compressed and
not running -> Compression IS possible
"""
if CrunchyAPI.is_compression_pending(compression_obj):
return False

if compression_obj.spring_exists():
LOG.info("SPRING file found")
LOG.debug("SPRING file found")
return False

if "external-data" in str(compression_obj.fastq_first):
LOG.info("File is external data and should not be compressed")
LOG.debug("File is external data and should not be compressed")
return False

LOG.info("FASTQ compression is possible")
LOG.debug("FASTQ compression is possible")

return True

Expand Down
31 changes: 20 additions & 11 deletions cg/meta/compress/compress.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
from cg.constants import SequencingFileTag
from cg.meta.backup.backup import SpringBackupAPI
from cg.meta.compress import files
from cg.models import CompressionData, FileData
from cg.models import CompressionData
from cg.store.models import Sample

LOG = logging.getLogger(__name__)
Expand Down Expand Up @@ -73,16 +73,11 @@ def compress_fastq(self, sample_id: str) -> bool:
for run_name in sample_fastq:
LOG.info(f"Check if compression possible for run {run_name}")
compression: CompressionData = sample_fastq[run_name]["compression_data"]
if FileData.is_empty(compression.fastq_first):
LOG.warning(f"Fastq files are empty for {sample_id}: {compression.fastq_first}")
self.delete_fastq_housekeeper(
hk_fastq_first=sample_fastq[run_name]["hk_first"],
hk_fastq_second=sample_fastq[run_name]["hk_second"],
)
all_ok = False
continue

if not self.crunchy_api.is_fastq_compression_possible(compression_obj=compression):
is_compression_possible: bool = self._is_fastq_compression_possible(
compression=compression,
sample_id=sample_id,
)
if not is_compression_possible:
LOG.warning(f"FASTQ to SPRING not possible for {sample_id}, run {run_name}")
all_ok = False
continue
Expand All @@ -93,6 +88,20 @@ def compress_fastq(self, sample_id: str) -> bool:
self.crunchy_api.fastq_to_spring(compression_obj=compression, sample_id=sample_id)
return all_ok

def _is_fastq_compression_possible(self, compression: CompressionData, sample_id: str) -> bool:
if self._is_spring_archived(compression):
LOG.debug(f"Found archived Spring file for {sample_id} - compression not possible")
return False
return self.crunchy_api.is_fastq_compression_possible(compression_obj=compression)

def _is_spring_archived(self, compression_data: CompressionData) -> bool:
spring_file: File | None = self.hk_api.get_file_insensitive_path(
path=compression_data.spring_path
)
if (not spring_file) or (not spring_file.archive):
return False
return bool(spring_file.archive.archived_at)

def decompress_spring(self, sample_id: str) -> bool:
"""Decompress SPRING archive for a sample.
Expand Down
65 changes: 27 additions & 38 deletions tests/apps/crunchy/test_crunchy.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ def test_is_fastq_compression_possible(
assert not spring_file.exists()

# WHEN checking if SPRING compression is done
result = crunchy_api.is_fastq_compression_possible(compression_object)
result = crunchy_api.is_fastq_compression_possible(compression_obj=compression_object)

# THEN result should be True
assert result is True
Expand All @@ -83,7 +83,7 @@ def test_is_fastq_compression_possible_compression_pending(
assert not spring_file.exists()

# WHEN checking if SPRING compression is done
result = crunchy_api.is_fastq_compression_possible(compression_object)
result = crunchy_api.is_fastq_compression_possible(compression_obj=compression_object)

# THEN result should be False since the compression flag exists
assert result is False
Expand All @@ -107,7 +107,7 @@ def test_is_fastq_compression_possible_spring_exists(
assert spring_file.exists()

# WHEN checking if SPRING compression is done
result = crunchy_api.is_fastq_compression_possible(compression_object)
result = crunchy_api.is_fastq_compression_possible(compression_obj=compression_object)

# THEN result should be False since the compression flag exists
assert result is False
Expand All @@ -116,22 +116,21 @@ def test_is_fastq_compression_possible_spring_exists(


def test_is_compression_done(
crunchy_config: dict[str, dict[str, Any]],
real_crunchy_api,
spring_metadata_file: Path,
compression_object: CompressionData,
caplog,
):
"""Test if compression is done when everything is correct"""
caplog.set_level(logging.DEBUG)
# GIVEN a crunchy-api, and FASTQ paths
crunchy_api = CrunchyAPI(crunchy_config)
# GIVEN no SPRING file exists
compression_object.spring_path.touch()
assert spring_metadata_file == compression_object.spring_metadata_path
assert spring_metadata_file.exists()

# WHEN checking if SPRING compression is done
result = crunchy_api.is_fastq_compression_done(compression_object)
result = real_crunchy_api.is_fastq_compression_done(compression_object)

# THEN result should be True
assert result is True
Expand All @@ -140,18 +139,17 @@ def test_is_compression_done(


def test_is_compression_done_no_spring(
crunchy_config: dict[str, dict[str, Any]], compression_object: CompressionData, caplog
real_crunchy_api: CrunchyAPI, compression_object: CompressionData, caplog
):
"""Test if compression is done when no SPRING archive"""
caplog.set_level(logging.DEBUG)
# GIVEN a crunchy-api, and FASTQ paths
crunchy_api = CrunchyAPI(crunchy_config)
# GIVEN no SPRING file exists
spring_file = compression_object.spring_path
assert not spring_file.exists()

# WHEN checking if SPRING compression is done
result = crunchy_api.is_fastq_compression_done(compression_object)
result = real_crunchy_api.is_fastq_compression_done(compression_object)

# THEN result should be false
assert not result
Expand All @@ -160,20 +158,19 @@ def test_is_compression_done_no_spring(


def test_is_compression_done_no_flag_spring(
crunchy_config: dict[str, dict[str, Any]], compression_object: CompressionData, caplog
real_crunchy_api: CrunchyAPI, compression_object: CompressionData, caplog
):
"""Test if SPRING compression is done when no metadata file"""
caplog.set_level(logging.DEBUG)
# GIVEN a crunchy-api, and FASTQ paths
crunchy_api = CrunchyAPI(crunchy_config)
# GIVEN a existing SPRING file
compression_object.spring_path.touch()
assert compression_object.spring_path.exists()
# GIVEN a non existing flag file
# GIVEN a non-existing flag file
assert not compression_object.spring_metadata_path.exists()

# WHEN checking if SPRING compression is done
result = crunchy_api.is_fastq_compression_done(compression_object)
result = real_crunchy_api.is_fastq_compression_done(compression_object)

# THEN result should be false
assert not result
Expand All @@ -182,15 +179,14 @@ def test_is_compression_done_no_flag_spring(


def test_is_compression_done_spring(
crunchy_config: dict[str, dict[str, Any]],
real_crunchy_api: CrunchyAPI,
compression_object: CompressionData,
spring_metadata_file: Path,
caplog,
):
"""Test if compression is done when SPRING files exists"""
caplog.set_level(logging.DEBUG)
# GIVEN a crunchy-api, and FASTQ paths
crunchy_api = CrunchyAPI(crunchy_config)
# GIVEN a existing SPRING file
compression_object.spring_path.touch()
assert compression_object.spring_path.exists()
Expand All @@ -199,7 +195,7 @@ def test_is_compression_done_spring(
assert compression_object.spring_metadata_path.exists()

# WHEN checking if SPRING compression is done
result = crunchy_api.is_fastq_compression_done(compression_object)
result = real_crunchy_api.is_fastq_compression_done(compression_object)

# THEN result should be True
assert result
Expand All @@ -208,7 +204,7 @@ def test_is_compression_done_spring(


def test_is_compression_done_spring_new_files(
crunchy_config: dict[str, dict[str, Any]],
real_crunchy_api: CrunchyAPI,
compression_object: CompressionData,
spring_metadata_file: Path,
caplog,
Expand All @@ -219,7 +215,6 @@ def test_is_compression_done_spring_new_files(
"""
caplog.set_level(logging.DEBUG)
# GIVEN a crunchy-api, and FASTQ paths
crunchy_api = CrunchyAPI(crunchy_config)
# GIVEN a existing SPRING file
compression_object.spring_path.touch()
assert compression_object.spring_path.exists()
Expand All @@ -237,7 +232,7 @@ def test_is_compression_done_spring_new_files(
assert "updated" in file_info

# WHEN checking if SPRING compression is done
result = crunchy_api.is_fastq_compression_done(compression_object)
result = real_crunchy_api.is_fastq_compression_done(compression_object)

# THEN result should be False since the updated date < 3 weeks
assert result is False
Expand All @@ -246,7 +241,7 @@ def test_is_compression_done_spring_new_files(


def test_is_compression_done_spring_old_files(
crunchy_config: dict[str, dict[str, Any]],
real_crunchy_api: CrunchyAPI,
compression_object: CompressionData,
spring_metadata_file: Path,
caplog,
Expand All @@ -257,7 +252,6 @@ def test_is_compression_done_spring_old_files(
"""
caplog.set_level(logging.DEBUG)
# GIVEN a crunchy-api, and FASTQ paths
crunchy_api = CrunchyAPI(crunchy_config)
# GIVEN a existing SPRING file
compression_object.spring_path.touch()
assert compression_object.spring_path.exists()
Expand All @@ -277,7 +271,7 @@ def test_is_compression_done_spring_old_files(
)

# WHEN checking if SPRING compression is done
result = crunchy_api.is_fastq_compression_done(compression_object)
result = real_crunchy_api.is_fastq_compression_done(compression_object)

# THEN result should be True since the updated date > 3 weeks
assert result is True
Expand All @@ -286,15 +280,14 @@ def test_is_compression_done_spring_old_files(


def test_is_spring_decompression_possible_no_fastq(
crunchy_config: dict[str, dict[str, Any]], compression_object: CompressionData, caplog
real_crunchy_api: CrunchyAPI, compression_object: CompressionData, caplog
):
"""Test if decompression is possible when there are no FASTQ files
The function should return true since there are no FASTQ files
"""
caplog.set_level(logging.DEBUG)
# GIVEN a crunchy-api
crunchy_api = CrunchyAPI(crunchy_config)
# GIVEN a existing SPRING file
compression_object.spring_path.touch()
assert compression_object.spring_path.exists()
Expand All @@ -305,7 +298,7 @@ def test_is_spring_decompression_possible_no_fastq(
assert not compression_object.fastq_second.exists()

# WHEN checking if SPRING compression is done
result = crunchy_api.is_spring_decompression_possible(compression_object)
result = real_crunchy_api.is_spring_decompression_possible(compression_object)

# THEN result should be True since there are no fastq files
assert result is True
Expand All @@ -314,18 +307,17 @@ def test_is_spring_decompression_possible_no_fastq(


def test_is_spring_decompression_possible_no_spring(
crunchy_config: dict[str, dict[str, Any]], compression_object: CompressionData, caplog
real_crunchy_api: CrunchyAPI, compression_object: CompressionData, caplog
):
"""Test if decompression is possible when there are no SPRING archive
The function should return False since there is no SPRING archive
"""
caplog.set_level(logging.DEBUG)
# GIVEN a crunchy-api, and FASTQ paths
crunchy_api = CrunchyAPI(crunchy_config)

# WHEN checking if SPRING compression is done
result = crunchy_api.is_spring_decompression_possible(compression_object)
result = real_crunchy_api.is_spring_decompression_possible(compression_object)

# THEN result should be False since there is no SPRING archive
assert result is False
Expand All @@ -334,23 +326,22 @@ def test_is_spring_decompression_possible_no_spring(


def test_is_spring_decompression_possible_fastq(
crunchy_config: dict[str, dict[str, Any]], compression_object: CompressionData, caplog
real_crunchy_api: CrunchyAPI, compression_object: CompressionData, caplog
):
"""Test if decompression is possible when there are existing FASTQ files
The function should return False since there are decompressed FASTQ files
"""
caplog.set_level(logging.DEBUG)
# GIVEN a crunchy-api, and FASTQ paths
crunchy_api = CrunchyAPI(crunchy_config)
# GIVEN a existing SPRING file
compression_object.spring_path.touch()
# GIVEN that the FASTQ files exists
compression_object.fastq_first.touch()
compression_object.fastq_second.touch()

# WHEN checking if SPRING decompression is possible
result = crunchy_api.is_spring_decompression_possible(compression_object)
result = real_crunchy_api.is_spring_decompression_possible(compression_object)

# THEN result should be False since the FASTQ files already exists
assert result is False
Expand All @@ -359,31 +350,29 @@ def test_is_spring_decompression_possible_fastq(


def test_is_not_pending_when_no_flag_file(
crunchy_config: dict[str, dict[str, Any]], compression_object: CompressionData
real_crunchy_api: CrunchyAPI, compression_object: CompressionData
):
"""Test if SPRING compression is pending when no flag file"""
# GIVEN a crunchy-api, and a FASTQ file
crunchy_api = CrunchyAPI(crunchy_config)
# GIVEN a non existing pending flag
# GIVEN a non-existing pending flag
assert not compression_object.pending_path.exists()

# WHEN checking if SPRING compression is ongoing
result = crunchy_api.is_compression_pending(compression_object)
result = real_crunchy_api.is_compression_pending(compression_object)

# THEN result should be False since the pending flag is not there
assert result is False


def test_is_pending(crunchy_config: dict[str, dict[str, Any]], compression_object: CompressionData):
def test_is_pending(real_crunchy_api: CrunchyAPI, compression_object: CompressionData):
"""Test if SPRING compression is pending when pending file exists"""
# GIVEN a crunchy-api, and FASTQ files
crunchy_api = CrunchyAPI(crunchy_config)
# GIVEN a existing pending flag
compression_object.pending_path.touch()
assert compression_object.pending_path.exists()

# WHEN checking if SPRING compression is pending
result = crunchy_api.is_compression_pending(compression_object)
result = real_crunchy_api.is_compression_pending(compression_object)

# THEN result should be True since the pending_path exists
assert result is True
10 changes: 9 additions & 1 deletion tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,12 +14,15 @@
from housekeeper.store.models import File, Version
from requests import Response

from cg.apps.crunchy import CrunchyAPI
from cg.apps.demultiplex.demultiplex_api import DemultiplexingAPI
from cg.apps.demultiplex.sample_sheet.sample_models import (
FlowCellSampleBcl2Fastq,
FlowCellSampleBCLConvert,
)
from cg.apps.demultiplex.sample_sheet.sample_sheet_creator import SampleSheetCreatorBCLConvert
from cg.apps.demultiplex.sample_sheet.sample_sheet_creator import (
SampleSheetCreatorBCLConvert,
)
from cg.apps.downsample.downsample import DownsampleAPI
from cg.apps.gens import GensAPI
from cg.apps.gt import GenotypeAPI
Expand Down Expand Up @@ -381,6 +384,11 @@ def crunchy_config() -> dict[str, dict[str, Any]]:
}


@pytest.fixture
def real_crunchy_api(crunchy_config) -> CrunchyAPI:
return CrunchyAPI(crunchy_config)


@pytest.fixture
def hk_config_dict(root_path: Path):
"""Housekeeper configs."""
Expand Down
Loading

0 comments on commit 0fc71b0

Please sign in to comment.