Skip to content

Commit

Permalink
Merge branch 'master' into rm-deprecated-code
Browse files Browse the repository at this point in the history
  • Loading branch information
henrikstranneheim authored Dec 14, 2023
2 parents d03bb3d + 2627438 commit 0781dc5
Show file tree
Hide file tree
Showing 42 changed files with 818 additions and 287 deletions.
2 changes: 1 addition & 1 deletion .bumpversion.cfg
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
[bumpversion]
current_version = 54.2.0
current_version = 54.4.4
commit = True
tag = True
tag_name = v{new_version}
Expand Down
2 changes: 1 addition & 1 deletion cg/__init__.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
__title__ = "cg"
__version__ = "54.2.0"
__version__ = "54.4.4"
18 changes: 18 additions & 0 deletions cg/cli/archive.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from click.core import ParameterSource

from cg.constants.archiving import DEFAULT_SPRING_ARCHIVE_COUNT
from cg.constants.constants import DRY_RUN
from cg.meta.archive.archive import SpringArchiveAPI
from cg.models.cg_config import CGConfig

Expand Down Expand Up @@ -64,3 +65,20 @@ def update_job_statuses(context: CGConfig):
data_flow_config=context.data_flow,
)
spring_archive_api.update_statuses_for_ongoing_tasks()


@archive.command("delete-file")
@DRY_RUN
@click.pass_obj
@click.argument("file_path", required=True)
def delete_file(context: CGConfig, dry_run: bool, file_path: str):
"""Delete an archived file and remove it from Housekeeper.
The file will not be deleted if it is not confirmed archived.
The file will not be deleted if its archive location can not be determined from the file tags.
"""
spring_archive_api = SpringArchiveAPI(
status_db=context.status_db,
housekeeper_api=context.housekeeper_api,
data_flow_config=context.data_flow,
)
spring_archive_api.delete_file(file_path=file_path, dry_run=dry_run)
2 changes: 1 addition & 1 deletion cg/cli/workflow/rnafusion/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,7 @@ def run(
case_id=case_id, params_file=params_file
),
"name": case_id,
"compute_env": compute_env or analysis_api.compute_env,
"compute_env": compute_env or analysis_api.get_compute_env(case_id=case_id),
"revision": revision or analysis_api.revision,
"wait": "SUBMITTED",
"id": nf_tower_id,
Expand Down
2 changes: 1 addition & 1 deletion cg/cli/workflow/taxprofiler/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,7 @@ def run(
case_id=case_id, params_file=params_file
),
"name": case_id,
"compute_env": compute_env or analysis_api.compute_env,
"compute_env": compute_env or analysis_api.get_compute_env(case_id=case_id),
"revision": revision or analysis_api.revision,
"wait": NfTowerStatus.SUBMITTED,
"id": nf_tower_id,
Expand Down
53 changes: 27 additions & 26 deletions cg/constants/delivery.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""Constants for delivery."""

from cg.constants.constants import Pipeline
from cg.constants.housekeeper_tags import AlignmentFileTag, AnalysisTag, HK_DELIVERY_REPORT_TAG

ONLY_ONE_CASE_PER_TICKET: list[Pipeline] = [
Pipeline.FASTQ,
Expand Down Expand Up @@ -37,8 +38,8 @@
]

BALSAMIC_ANALYSIS_SAMPLE_TAGS: list[set[str]] = [
{"cram"},
{"cram-index"},
{AlignmentFileTag.CRAM},
{AlignmentFileTag.CRAM_INDEX},
]

BALSAMIC_QC_ANALYSIS_CASE_TAGS: list[set[str]] = [
Expand Down Expand Up @@ -95,10 +96,10 @@
]

MIP_DNA_ANALYSIS_SAMPLE_TAGS: list[set[str]] = [
{"bam"},
{"bam-index"},
{"cram"},
{"cram-index"},
{AlignmentFileTag.BAM},
{AlignmentFileTag.BAM_BAI},
{AlignmentFileTag.CRAM},
{AlignmentFileTag.CRAM_INDEX},
]

MIP_RNA_ANALYSIS_CASE_TAGS: list[set[str]] = [
Expand All @@ -114,12 +115,12 @@
]

MIP_RNA_ANALYSIS_SAMPLE_TAGS: list[set[str]] = [
{"fusion", "star-fusion"},
{"fusion", "arriba"},
{"cram"},
{"cram-index"},
{"fusion", "vcf"},
{"fusion", "vcf-index"},
{AnalysisTag.FUSION, AnalysisTag.STARFUSION},
{AnalysisTag.FUSION, AnalysisTag.ARRIBA},
{AlignmentFileTag.CRAM},
{AlignmentFileTag.CRAM_INDEX},
{AnalysisTag.FUSION, "vcf"},
{AnalysisTag.FUSION, "vcf-index"},
{"salmon-quant"},
]

Expand Down Expand Up @@ -150,23 +151,23 @@
]

RNAFUSION_ANALYSIS_CASE_TAGS: list[set[str]] = [
{"fusion", "arriba"},
{"fusion", "star-fusion"},
{"fusion", "fusioncatcher"},
{"fusioncatcher-summary"},
{"fusioninspector"},
{"fusionreport", "research"},
{"fusioninspector-html", "research"},
{"arriba-visualisation", "research"},
{"multiqc-html", "rna"},
{"delivery-report"},
{"vcf-fusion"},
{"gene-counts"},
{AnalysisTag.FUSION, AnalysisTag.ARRIBA},
{AnalysisTag.FUSION, AnalysisTag.STARFUSION},
{AnalysisTag.FUSION, AnalysisTag.FUSIONCATCHER},
{AnalysisTag.FUSIONCATCHER_SUMMARY},
{AnalysisTag.FUSIONINSPECTOR},
{AnalysisTag.FUSIONREPORT, AnalysisTag.RESEARCH},
{AnalysisTag.FUSIONINSPECTOR_HTML, AnalysisTag.RESEARCH},
{AnalysisTag.ARRIBA_VISUALIZATION, AnalysisTag.RESEARCH},
{AnalysisTag.MULTIQC_HTML, AnalysisTag.RNA},
{HK_DELIVERY_REPORT_TAG},
{AnalysisTag.VCF_FUSION},
{AnalysisTag.GENE_COUNTS},
]

RNAFUSION_ANALYSIS_SAMPLE_TAGS: list[set[str]] = [
{"cram"},
{"cram-index"},
{AlignmentFileTag.CRAM},
{AlignmentFileTag.CRAM_INDEX},
]


Expand Down
1 change: 1 addition & 0 deletions cg/constants/gene_panel.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ class GenePanelMasterList(StrEnum):
SOVM: str = "SOVM"
STROKE: str = "STROKE"
AID: str = "AID"
INHERITED_CANCER: str = "Inherited cancer"

@classmethod
def get_panel_names(cls, panels=None) -> list[str]:
Expand Down
19 changes: 19 additions & 0 deletions cg/constants/housekeeper_tags.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,25 @@ class SequencingFileTag(StrEnum):
HK_DELIVERY_REPORT_TAG = "delivery-report"


class AnalysisTag(StrEnum):
"""Tags for analysis files."""

ARRIBA: str = "arriba"
ARRIBA_VISUALIZATION: str = "arriba-visualisation"
FUSION: str = "fusion"
FUSIONCATCHER: str = "fusioncatcher"
FUSIONCATCHER_SUMMARY: str = "fusioncatcher-summary"
FUSIONINSPECTOR: str = "fusioninspector"
FUSIONINSPECTOR_HTML: str = "fusioninspector-html"
FUSIONREPORT: str = "fusionreport"
GENE_COUNTS: str = "gene-counts"
MULTIQC_HTML: str = "multiqc-html"
RESEARCH: str = "research"
RNA: str = "rna"
STARFUSION: str = "star-fusion"
VCF_FUSION: str = "vcf-fusion"


class HkMipAnalysisTag:
CONFIG: list[str] = ["mip-config"]
QC_METRICS: list[str] = ["qc-metrics", "deliverable"]
Expand Down
8 changes: 8 additions & 0 deletions cg/io/gzip.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
import gzip
from pathlib import Path


def read_gzip_first_line(file_path: Path) -> str:
"""Return first line of gzip file."""
with gzip.open(file_path) as file:
return file.readline().decode()
78 changes: 54 additions & 24 deletions cg/meta/archive/archive.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import logging
from typing import Callable, Type

import click
from housekeeper.store.models import Archive, File
from pydantic import BaseModel, ConfigDict

Expand Down Expand Up @@ -252,7 +253,7 @@ def sort_archival_ids_on_archive_location(
jobs_per_location: dict[ArchiveLocations, list[int]] = {}
jobs_and_locations: set[
tuple[int, ArchiveLocations]
] = self.get_unique_archival_ids_and_their_archive_location(archive_entries)
] = self.get_unique_archival_ids_and_archive_locations(archive_entries)

for archive_location in ArchiveLocations:
jobs_per_location[ArchiveLocations(archive_location)] = [
Expand All @@ -262,18 +263,14 @@ def sort_archival_ids_on_archive_location(
]
return jobs_per_location

def get_unique_archival_ids_and_their_archive_location(
def get_unique_archival_ids_and_archive_locations(
self, archive_entries: list[Archive]
) -> set[tuple[int, ArchiveLocations]]:
return set(
[
(
archive.archiving_task_id,
ArchiveLocations(self.get_archive_location_from_file(archive.file)),
)
for archive in archive_entries
]
)
ids_and_locations: set[tuple[int, ArchiveLocations]] = set()
for archive in archive_entries:
if location := self.get_archive_location_from_file(archive.file):
ids_and_locations.add((archive.archiving_task_id, location))
return ids_and_locations

def sort_retrieval_ids_on_archive_location(
self, archive_entries: list[Archive]
Expand All @@ -283,7 +280,7 @@ def sort_retrieval_ids_on_archive_location(
jobs_per_location: dict[ArchiveLocations, list[int]] = {}
jobs_and_locations: set[
tuple[int, ArchiveLocations]
] = self.get_unique_retrieval_ids_and_their_archive_location(archive_entries)
] = self.get_unique_retrieval_ids_and_archive_locations(archive_entries)
for archive_location in ArchiveLocations:
jobs_per_location[ArchiveLocations(archive_location)] = [
job_and_location[0]
Expand All @@ -292,18 +289,51 @@ def sort_retrieval_ids_on_archive_location(
]
return jobs_per_location

def get_unique_retrieval_ids_and_their_archive_location(
def get_unique_retrieval_ids_and_archive_locations(
self, archive_entries: list[Archive]
) -> set[tuple[int, ArchiveLocations]]:
return set(
[
(
archive.retrieval_task_id,
ArchiveLocations(self.get_archive_location_from_file(archive.file)),
)
for archive in archive_entries
]
ids_and_locations: set[tuple[int, ArchiveLocations]] = set()
for archive in archive_entries:
if location := self.get_archive_location_from_file(archive.file):
ids_and_locations.add((archive.retrieval_task_id, location))
return ids_and_locations

@staticmethod
def is_file_archived(file: File) -> bool:
return file.archive and file.archive.archived_at

@staticmethod
def get_archive_location_from_file(file: File) -> ArchiveLocations | None:
for tag_name in [tag.name for tag in file.tags]:
if tag_name in iter(ArchiveLocations):
LOG.info(f"Found archive location {tag_name}")
return tag_name
LOG.warning("No archive location in the file tags")
return None

def delete_file_from_archive_location(
self, file_and_sample: FileAndSample, archive_location: ArchiveLocations
) -> None:
archive_handler: ArchiveHandler = ARCHIVE_HANDLERS[archive_location](self.data_flow_config)
archive_handler.delete_file(file_and_sample)

def delete_file(self, file_path: str, dry_run: bool = False) -> None:
"""Deletes the specified file where it is archived and deletes the Housekeeper record.
Raises:
Click.Abort if yes is not specified or the user does not confirm the deletion."""
file: File = self.housekeeper_api.files(path=file_path).first()
if not self.is_file_archived(file):
LOG.warning(f"No archived file found for file {file_path} - exiting")
return
archive_location: ArchiveLocations | None = self.get_archive_location_from_file(file)
if not archive_location:
LOG.warning("No archive location could be determined - exiting")
return
if dry_run:
click.echo(f"Would have deleted file {file_path} from {archive_location}.")
return
file_and_sample: FileAndSample = self.add_samples_to_files([file])[0]
self.delete_file_from_archive_location(
file_and_sample=file_and_sample, archive_location=archive_location
)

def get_archive_location_from_file(self, file: File) -> str:
return self.status_db.get_sample_by_internal_id(file.version.bundle.name).archive_location
self.housekeeper_api.delete_file(file.id)
Loading

0 comments on commit 0781dc5

Please sign in to comment.