Skip to content

Commit

Permalink
Merge branch 'master' into feat-remove-bcl2fastq
Browse files Browse the repository at this point in the history
  • Loading branch information
diitaz93 authored Dec 14, 2023
2 parents 172b804 + 92f2c04 commit 0a47874
Show file tree
Hide file tree
Showing 47 changed files with 1,227 additions and 684 deletions.
2 changes: 1 addition & 1 deletion .bumpversion.cfg
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
[bumpversion]
current_version = 54.2.0
current_version = 54.4.6
commit = True
tag = True
tag_name = v{new_version}
Expand Down
2 changes: 1 addition & 1 deletion cg/__init__.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
__title__ = "cg"
__version__ = "54.2.0"
__version__ = "54.4.6"
18 changes: 18 additions & 0 deletions cg/cli/archive.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from click.core import ParameterSource

from cg.constants.archiving import DEFAULT_SPRING_ARCHIVE_COUNT
from cg.constants.constants import DRY_RUN
from cg.meta.archive.archive import SpringArchiveAPI
from cg.models.cg_config import CGConfig

Expand Down Expand Up @@ -64,3 +65,20 @@ def update_job_statuses(context: CGConfig):
data_flow_config=context.data_flow,
)
spring_archive_api.update_statuses_for_ongoing_tasks()


@archive.command("delete-file")
@DRY_RUN
@click.pass_obj
@click.argument("file_path", required=True)
def delete_file(context: CGConfig, dry_run: bool, file_path: str):
"""Delete an archived file and remove it from Housekeeper.
The file will not be deleted if it is not confirmed archived.
The file will not be deleted if its archive location can not be determined from the file tags.
"""
spring_archive_api = SpringArchiveAPI(
status_db=context.status_db,
housekeeper_api=context.housekeeper_api,
data_flow_config=context.data_flow,
)
spring_archive_api.delete_file(file_path=file_path, dry_run=dry_run)
2 changes: 1 addition & 1 deletion cg/cli/workflow/rnafusion/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,7 @@ def run(
case_id=case_id, params_file=params_file
),
"name": case_id,
"compute_env": compute_env or analysis_api.compute_env,
"compute_env": compute_env or analysis_api.get_compute_env(case_id=case_id),
"revision": revision or analysis_api.revision,
"wait": "SUBMITTED",
"id": nf_tower_id,
Expand Down
2 changes: 1 addition & 1 deletion cg/cli/workflow/taxprofiler/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,7 @@ def run(
case_id=case_id, params_file=params_file
),
"name": case_id,
"compute_env": compute_env or analysis_api.compute_env,
"compute_env": compute_env or analysis_api.get_compute_env(case_id=case_id),
"revision": revision or analysis_api.revision,
"wait": NfTowerStatus.SUBMITTED,
"id": nf_tower_id,
Expand Down
11 changes: 0 additions & 11 deletions cg/constants/cgstats.py

This file was deleted.

53 changes: 27 additions & 26 deletions cg/constants/delivery.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""Constants for delivery."""

from cg.constants.constants import Pipeline
from cg.constants.housekeeper_tags import AlignmentFileTag, AnalysisTag, HK_DELIVERY_REPORT_TAG

ONLY_ONE_CASE_PER_TICKET: list[Pipeline] = [
Pipeline.FASTQ,
Expand Down Expand Up @@ -37,8 +38,8 @@
]

BALSAMIC_ANALYSIS_SAMPLE_TAGS: list[set[str]] = [
{"cram"},
{"cram-index"},
{AlignmentFileTag.CRAM},
{AlignmentFileTag.CRAM_INDEX},
]

BALSAMIC_QC_ANALYSIS_CASE_TAGS: list[set[str]] = [
Expand Down Expand Up @@ -95,10 +96,10 @@
]

MIP_DNA_ANALYSIS_SAMPLE_TAGS: list[set[str]] = [
{"bam"},
{"bam-index"},
{"cram"},
{"cram-index"},
{AlignmentFileTag.BAM},
{AlignmentFileTag.BAM_BAI},
{AlignmentFileTag.CRAM},
{AlignmentFileTag.CRAM_INDEX},
]

MIP_RNA_ANALYSIS_CASE_TAGS: list[set[str]] = [
Expand All @@ -114,12 +115,12 @@
]

MIP_RNA_ANALYSIS_SAMPLE_TAGS: list[set[str]] = [
{"fusion", "star-fusion"},
{"fusion", "arriba"},
{"cram"},
{"cram-index"},
{"fusion", "vcf"},
{"fusion", "vcf-index"},
{AnalysisTag.FUSION, AnalysisTag.STARFUSION},
{AnalysisTag.FUSION, AnalysisTag.ARRIBA},
{AlignmentFileTag.CRAM},
{AlignmentFileTag.CRAM_INDEX},
{AnalysisTag.FUSION, "vcf"},
{AnalysisTag.FUSION, "vcf-index"},
{"salmon-quant"},
]

Expand Down Expand Up @@ -150,23 +151,23 @@
]

RNAFUSION_ANALYSIS_CASE_TAGS: list[set[str]] = [
{"fusion", "arriba"},
{"fusion", "star-fusion"},
{"fusion", "fusioncatcher"},
{"fusioncatcher-summary"},
{"fusioninspector"},
{"fusionreport", "research"},
{"fusioninspector-html", "research"},
{"arriba-visualisation", "research"},
{"multiqc-html", "rna"},
{"delivery-report"},
{"vcf-fusion"},
{"gene-counts"},
{AnalysisTag.FUSION, AnalysisTag.ARRIBA},
{AnalysisTag.FUSION, AnalysisTag.STARFUSION},
{AnalysisTag.FUSION, AnalysisTag.FUSIONCATCHER},
{AnalysisTag.FUSIONCATCHER_SUMMARY},
{AnalysisTag.FUSIONINSPECTOR},
{AnalysisTag.FUSIONREPORT, AnalysisTag.RESEARCH},
{AnalysisTag.FUSIONINSPECTOR_HTML, AnalysisTag.RESEARCH},
{AnalysisTag.ARRIBA_VISUALIZATION, AnalysisTag.RESEARCH},
{AnalysisTag.MULTIQC_HTML, AnalysisTag.RNA},
{HK_DELIVERY_REPORT_TAG},
{AnalysisTag.VCF_FUSION},
{AnalysisTag.GENE_COUNTS},
]

RNAFUSION_ANALYSIS_SAMPLE_TAGS: list[set[str]] = [
{"cram"},
{"cram-index"},
{AlignmentFileTag.CRAM},
{AlignmentFileTag.CRAM_INDEX},
]


Expand Down
1 change: 1 addition & 0 deletions cg/constants/gene_panel.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ class GenePanelMasterList(StrEnum):
SOVM: str = "SOVM"
STROKE: str = "STROKE"
AID: str = "AID"
INHERITED_CANCER: str = "Inherited cancer"

@classmethod
def get_panel_names(cls, panels=None) -> list[str]:
Expand Down
19 changes: 19 additions & 0 deletions cg/constants/housekeeper_tags.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,25 @@ class SequencingFileTag(StrEnum):
HK_DELIVERY_REPORT_TAG = "delivery-report"


class AnalysisTag(StrEnum):
"""Tags for analysis files."""

ARRIBA: str = "arriba"
ARRIBA_VISUALIZATION: str = "arriba-visualisation"
FUSION: str = "fusion"
FUSIONCATCHER: str = "fusioncatcher"
FUSIONCATCHER_SUMMARY: str = "fusioncatcher-summary"
FUSIONINSPECTOR: str = "fusioninspector"
FUSIONINSPECTOR_HTML: str = "fusioninspector-html"
FUSIONREPORT: str = "fusionreport"
GENE_COUNTS: str = "gene-counts"
MULTIQC_HTML: str = "multiqc-html"
RESEARCH: str = "research"
RNA: str = "rna"
STARFUSION: str = "star-fusion"
VCF_FUSION: str = "vcf-fusion"


class HkMipAnalysisTag:
CONFIG: list[str] = ["mip-config"]
QC_METRICS: list[str] = ["qc-metrics", "deliverable"]
Expand Down
8 changes: 8 additions & 0 deletions cg/io/gzip.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
import gzip
from pathlib import Path


def read_gzip_first_line(file_path: Path) -> str:
"""Return first line of gzip file."""
with gzip.open(file_path) as file:
return file.readline().decode()
80 changes: 55 additions & 25 deletions cg/meta/archive/archive.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,15 @@
import logging
from typing import Callable, Type

import click
from housekeeper.store.models import Archive, File
from pydantic import BaseModel, ConfigDict

from cg.apps.housekeeper.hk import HousekeeperAPI
from cg.constants import SequencingFileTag
from cg.constants.archiving import ArchiveLocations
from cg.exc import ArchiveJobFailedError
from cg.meta.archive.ddn_dataflow import DDNDataFlowClient
from cg.meta.archive.ddn.ddn_data_flow_client import DDNDataFlowClient
from cg.meta.archive.models import ArchiveHandler, FileAndSample, SampleAndDestination
from cg.models.cg_config import DataFlowConfig
from cg.store import Store
Expand Down Expand Up @@ -252,7 +253,7 @@ def sort_archival_ids_on_archive_location(
jobs_per_location: dict[ArchiveLocations, list[int]] = {}
jobs_and_locations: set[
tuple[int, ArchiveLocations]
] = self.get_unique_archival_ids_and_their_archive_location(archive_entries)
] = self.get_unique_archival_ids_and_archive_locations(archive_entries)

for archive_location in ArchiveLocations:
jobs_per_location[ArchiveLocations(archive_location)] = [
Expand All @@ -262,18 +263,14 @@ def sort_archival_ids_on_archive_location(
]
return jobs_per_location

def get_unique_archival_ids_and_their_archive_location(
def get_unique_archival_ids_and_archive_locations(
self, archive_entries: list[Archive]
) -> set[tuple[int, ArchiveLocations]]:
return set(
[
(
archive.archiving_task_id,
ArchiveLocations(self.get_archive_location_from_file(archive.file)),
)
for archive in archive_entries
]
)
ids_and_locations: set[tuple[int, ArchiveLocations]] = set()
for archive in archive_entries:
if location := self.get_archive_location_from_file(archive.file):
ids_and_locations.add((archive.archiving_task_id, location))
return ids_and_locations

def sort_retrieval_ids_on_archive_location(
self, archive_entries: list[Archive]
Expand All @@ -283,7 +280,7 @@ def sort_retrieval_ids_on_archive_location(
jobs_per_location: dict[ArchiveLocations, list[int]] = {}
jobs_and_locations: set[
tuple[int, ArchiveLocations]
] = self.get_unique_retrieval_ids_and_their_archive_location(archive_entries)
] = self.get_unique_retrieval_ids_and_archive_locations(archive_entries)
for archive_location in ArchiveLocations:
jobs_per_location[ArchiveLocations(archive_location)] = [
job_and_location[0]
Expand All @@ -292,18 +289,51 @@ def sort_retrieval_ids_on_archive_location(
]
return jobs_per_location

def get_unique_retrieval_ids_and_their_archive_location(
def get_unique_retrieval_ids_and_archive_locations(
self, archive_entries: list[Archive]
) -> set[tuple[int, ArchiveLocations]]:
return set(
[
(
archive.retrieval_task_id,
ArchiveLocations(self.get_archive_location_from_file(archive.file)),
)
for archive in archive_entries
]
ids_and_locations: set[tuple[int, ArchiveLocations]] = set()
for archive in archive_entries:
if location := self.get_archive_location_from_file(archive.file):
ids_and_locations.add((archive.retrieval_task_id, location))
return ids_and_locations

@staticmethod
def is_file_archived(file: File) -> bool:
return file.archive and file.archive.archived_at

@staticmethod
def get_archive_location_from_file(file: File) -> ArchiveLocations | None:
for tag_name in [tag.name for tag in file.tags]:
if tag_name in iter(ArchiveLocations):
LOG.info(f"Found archive location {tag_name}")
return tag_name
LOG.warning("No archive location in the file tags")
return None

def delete_file_from_archive_location(
self, file_and_sample: FileAndSample, archive_location: ArchiveLocations
) -> None:
archive_handler: ArchiveHandler = ARCHIVE_HANDLERS[archive_location](self.data_flow_config)
archive_handler.delete_file(file_and_sample)

def delete_file(self, file_path: str, dry_run: bool = False) -> None:
"""Deletes the specified file where it is archived and deletes the Housekeeper record.
Raises:
Click.Abort if yes is not specified or the user does not confirm the deletion."""
file: File = self.housekeeper_api.files(path=file_path).first()
if not self.is_file_archived(file):
LOG.warning(f"No archived file found for file {file_path} - exiting")
return
archive_location: ArchiveLocations | None = self.get_archive_location_from_file(file)
if not archive_location:
LOG.warning("No archive location could be determined - exiting")
return
if dry_run:
click.echo(f"Would have deleted file {file_path} from {archive_location}.")
return
file_and_sample: FileAndSample = self.add_samples_to_files([file])[0]
self.delete_file_from_archive_location(
file_and_sample=file_and_sample, archive_location=archive_location
)

def get_archive_location_from_file(self, file: File) -> str:
return self.status_db.get_sample_by_internal_id(file.version.bundle.name).archive_location
self.housekeeper_api.delete_file(file.id)
Empty file added cg/meta/archive/ddn/__init__.py
Empty file.
51 changes: 51 additions & 0 deletions cg/meta/archive/ddn/constants.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
from enum import StrEnum

OSTYPE: str = "Unix/MacOS"
ROOT_TO_TRIM: str = "/home"

DESTINATION_ATTRIBUTE: str = "destination"
SOURCE_ATTRIBUTE: str = "source"


class DataflowEndpoints(StrEnum):
"""Enum containing all DDN dataflow endpoints used."""

ARCHIVE_FILES = "files/archive"
DELETE_FILE = "files/delete"
GET_AUTH_TOKEN = "auth/token"
REFRESH_AUTH_TOKEN = "auth/token/refresh"
RETRIEVE_FILES = "files/retrieve"
GET_JOB_STATUS = "activity/jobs/"


class JobStatus(StrEnum):
"""Enum for the different job statuses which can be returned via Miria."""

CANCELED = "Canceled"
COMPLETED = "Completed"
DENIED = "Denied"
CREATION_IN_PROGRESS = "Creation in progress"
IN_QUEUE = "In Queue"
INVALID_LICENSE = "Invalid license"
ON_VALIDATION = "On validation"
REFUSED = "Refused"
RUNNING = "Running"
SUSPENDED = "Suspended"
TERMINATED_ON_ERROR = "Terminated on error"
TERMINATED_ON_WARNING = "Terminated on warning"


FAILED_JOB_STATUSES: list[str] = [
JobStatus.CANCELED,
JobStatus.DENIED,
JobStatus.INVALID_LICENSE,
JobStatus.REFUSED,
JobStatus.TERMINATED_ON_ERROR,
JobStatus.TERMINATED_ON_WARNING,
]
ONGOING_JOB_STATUSES: list[str] = [
JobStatus.CREATION_IN_PROGRESS,
JobStatus.IN_QUEUE,
JobStatus.ON_VALIDATION,
JobStatus.RUNNING,
]
Loading

0 comments on commit 0a47874

Please sign in to comment.