Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin/upd-pymod' into upd-pymod
Browse files Browse the repository at this point in the history
  • Loading branch information
henrikstranneheim committed Oct 9, 2024
2 parents d3bad5c + 564346e commit 21d2994
Show file tree
Hide file tree
Showing 25 changed files with 307 additions and 86 deletions.
2 changes: 1 addition & 1 deletion .bumpversion.cfg
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
[bumpversion]
current_version = 63.3.2
current_version = 63.5.1
commit = True
tag = True
tag_name = v{new_version}
Expand Down
6 changes: 6 additions & 0 deletions .github/workflows/build_dockerfile_on_push.yml
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,9 @@ jobs:
id: buildx
uses: docker/setup-buildx-action@v3

- name: Suppress annotations during build
run: echo "::stop-commands::no-annotations"

- name: Build and push
id: docker_build
uses: docker/build-push-action@v6
Expand All @@ -32,3 +35,6 @@ jobs:
file: ./Dockerfile
push: true
tags: "clinicalgenomics/cg-stage:${{steps.get_branch_name.outputs.branch}}, clinicalgenomics/cg-stage:latest"

- name: Resume annotations
run: echo "::no-annotations::"
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@


def upgrade():
# PacBioSequencingRun
# PacbioSequencingRun
op.add_column(
table_name="pacbio_sequencing_run",
column=sa.Column("barcoded_hifi_reads", sa.BIGINT, nullable=True),
Expand Down Expand Up @@ -57,7 +57,7 @@ def upgrade():
column=sa.Column("unbarcoded_hifi_mean_read_length", sa.BIGINT, nullable=True),
)

# PacBioSampleSequencingMetrics
# PacbioSampleSequencingMetrics
op.add_column(
table_name="pacbio_sample_run_metrics",
column=sa.Column("polymerase_mean_read_length", sa.BIGINT, nullable=True),
Expand All @@ -69,7 +69,7 @@ def upgrade():


def downgrade():
# PacBioSequencingRun
# PacbioSequencingRun
op.drop_column(table_name="pacbio_sequencing_run", column_name="barcoded_hifi_reads")
op.drop_column(table_name="pacbio_sequencing_run", column_name="barcoded_hifi_reads_percentage")
op.drop_column(table_name="pacbio_sequencing_run", column_name="barcoded_hifi_yield")
Expand All @@ -82,7 +82,7 @@ def downgrade():
table_name="pacbio_sequencing_run", column_name="unbarcoded_hifi_mean_read_length"
)

# PacBioSampleSequencingMetrics
# PacbioSampleSequencingMetrics
op.drop_column(
table_name="pacbio_sample_run_metrics", column_name="polymerase_mean_read_length"
)
Expand Down
2 changes: 1 addition & 1 deletion cg/__init__.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
__title__ = "cg"
__version__ = "63.3.2"
__version__ = "63.5.1"
2 changes: 0 additions & 2 deletions cg/apps/crunchy/crunchy.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,8 +45,6 @@ def __init__(self, config: dict):

def set_dry_run(self, dry_run: bool) -> None:
"""Update dry run."""
LOG.info("Updating compress api")
LOG.info(f"Set dry run to {dry_run}")
self.dry_run = dry_run
self.slurm_api.set_dry_run(dry_run=dry_run)

Expand Down
8 changes: 6 additions & 2 deletions cg/apps/housekeeper/hk.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,11 @@

from housekeeper.include import checksum as hk_checksum
from housekeeper.include import include_version
from housekeeper.store.database import create_all_tables, drop_all_tables, initialize_database
from housekeeper.store.database import (
create_all_tables,
drop_all_tables,
initialize_database,
)
from housekeeper.store.models import Archive, Bundle, File, Tag, Version
from housekeeper.store.store import Store
from sqlalchemy.orm import Query
Expand Down Expand Up @@ -273,7 +277,7 @@ def get_latest_bundle_version(self, bundle_name: str) -> Version | None:
"""Get the latest version of a Housekeeper bundle."""
last_version: Version = self.last_version(bundle_name)
if not last_version:
LOG.warning(f"No bundle found for {bundle_name} in Housekeeper")
LOG.debug(f"No bundle found for {bundle_name} in Housekeeper")
return None
LOG.debug(f"Found Housekeeper version object for {bundle_name}: {repr(last_version)}")
return last_version
Expand Down
8 changes: 4 additions & 4 deletions cg/cli/clean.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,17 +23,16 @@
mutant_past_run_dirs,
rnafusion_past_run_dirs,
rsync_past_run_dirs,
tower_past_run_dirs,
)
from cg.constants.cli_options import DRY_RUN, SKIP_CONFIRMATION
from cg.constants.constants import Workflow
from cg.constants.housekeeper_tags import AlignmentFileTag, ScoutTag
from cg.exc import IlluminaCleanRunError, FlowCellError
from cg.exc import FlowCellError, IlluminaCleanRunError
from cg.meta.clean.api import CleanAPI
from cg.services.illumina.cleaning.clean_runs_service import (
IlluminaCleanRunsService,
)
from cg.meta.clean.clean_retrieved_spring_files import CleanRetrievedSpringFilesAPI
from cg.models.cg_config import CGConfig
from cg.services.illumina.cleaning.clean_runs_service import IlluminaCleanRunsService
from cg.store.models import Analysis
from cg.store.store import Store
from cg.utils.date import get_date_days_ago, get_timedelta_from_date
Expand Down Expand Up @@ -72,6 +71,7 @@ def clean():
rnafusion_past_run_dirs,
rsync_past_run_dirs,
microsalt_past_run_dirs,
tower_past_run_dirs,
]:
clean.add_command(sub_cmd)

Expand Down
2 changes: 1 addition & 1 deletion cg/cli/compress/fastq.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ def clean_fastq(context: CGConfig, case_id: str | None, days_back: int, dry_run:
sample_id=sample_id, archive_location=archive_location
)
if not was_cleaned:
LOG.info(f"Skipping individual {sample_id}")
LOG.debug(f"Skipping individual {sample_id}")
continue
cleaned_inds += 1

Expand Down
10 changes: 5 additions & 5 deletions cg/cli/compress/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,13 +73,13 @@ def update_compress_api(

compress_api.set_dry_run(dry_run=dry_run)
if mem:
LOG.info(f"Set Crunchy API SLURM mem to {mem}")
LOG.debug(f"Set Crunchy API SLURM mem to {mem}")
compress_api.crunchy_api.slurm_memory = mem
if hours:
LOG.info(f"Set Crunchy API SLURM hours to {hours}")
LOG.debug(f"Set Crunchy API SLURM hours to {hours}")
compress_api.crunchy_api.slurm_hours = hours
if ntasks:
LOG.info(f"Set Crunchy API SLURM number of tasks to {ntasks}")
LOG.debug(f"Set Crunchy API SLURM number of tasks to {ntasks}")
compress_api.crunchy_api.slurm_number_tasks = ntasks


Expand Down Expand Up @@ -139,7 +139,7 @@ def compress_sample_fastqs_in_cases(
if case_conversion_count >= number_of_conversions:
break

LOG.info(f"Searching for FASTQ files in case {case.internal_id}")
LOG.debug(f"Searching for FASTQ files in case {case.internal_id}")
if not case.links:
continue
for case_link in case.links:
Expand All @@ -159,7 +159,7 @@ def compress_sample_fastqs_in_cases(
sample_id=case_link.sample.internal_id
)
if not case_converted:
LOG.info(f"skipping individual {case_link.sample.internal_id}")
LOG.debug(f"skipping individual {case_link.sample.internal_id}")
continue
individuals_conversion_count += 1
if case_converted:
Expand Down
15 changes: 14 additions & 1 deletion cg/cli/utils.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,13 @@
import re
import shutil

import click

import shutil
from cg.constants import Workflow
from cg.meta.workflow.raredisease import RarediseaseAnalysisAPI
from cg.meta.workflow.rnafusion import RnafusionAnalysisAPI
from cg.meta.workflow.taxprofiler import TaxprofilerAnalysisAPI
from cg.meta.workflow.tomte import TomteAnalysisAPI


def echo_lines(lines: list[str]) -> None:
Expand All @@ -19,3 +24,11 @@ def is_case_name_allowed(name: str) -> bool:
CLICK_CONTEXT_SETTINGS: dict[str, int] = {
"max_content_width": shutil.get_terminal_size().columns - 10
}


TOWER_WORKFLOW_TO_ANALYSIS_API_MAP: dict = {
Workflow.RAREDISEASE: RarediseaseAnalysisAPI,
Workflow.RNAFUSION: RnafusionAnalysisAPI,
Workflow.TAXPROFILER: TaxprofilerAnalysisAPI,
Workflow.TOMTE: TomteAnalysisAPI,
}
23 changes: 21 additions & 2 deletions cg/cli/workflow/commands.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,10 @@
from dateutil.parser import parse as parse_date

from cg.apps.housekeeper.hk import HousekeeperAPI
from cg.cli.utils import TOWER_WORKFLOW_TO_ANALYSIS_API_MAP
from cg.cli.workflow.utils import validate_force_store_option
from cg.constants import EXIT_FAIL, EXIT_SUCCESS
from cg.constants.cli_options import DRY_RUN, SKIP_CONFIRMATION, FORCE, COMMENT
from cg.constants import EXIT_FAIL, EXIT_SUCCESS, Workflow
from cg.constants.cli_options import COMMENT, DRY_RUN, FORCE, SKIP_CONFIRMATION
from cg.constants.observations import LOQUSDB_SUPPORTED_WORKFLOWS
from cg.exc import IlluminaRunsNeededError
from cg.meta.workflow.analysis import AnalysisAPI
Expand All @@ -22,6 +23,7 @@
from cg.meta.workflow.mip_dna import MipDNAAnalysisAPI
from cg.meta.workflow.mip_rna import MipRNAAnalysisAPI
from cg.meta.workflow.mutant import MutantAnalysisAPI
from cg.meta.workflow.nf_analysis import NfAnalysisAPI
from cg.meta.workflow.rnafusion import RnafusionAnalysisAPI
from cg.models.cg_config import CGConfig
from cg.services.deliver_files.delivery_rsync_service.delivery_rsync_service import (
Expand All @@ -31,6 +33,7 @@

ARGUMENT_BEFORE_STR = click.argument("before_str", type=str)
ARGUMENT_CASE_ID = click.argument("case_id", required=True)
ARGUMENT_WORKFLOW = click.argument("workflow", required=True)
OPTION_ANALYSIS_PARAMETERS_CONFIG = click.option(
"--config-artic", type=str, help="Config with computational and lab related settings"
)
Expand Down Expand Up @@ -383,3 +386,19 @@ def microsalt_past_run_dirs(
context.invoke(
past_run_dirs, skip_confirmation=skip_confirmation, dry_run=dry_run, before_str=before_str
)


@click.command("tower-past-run-dirs")
@SKIP_CONFIRMATION
@ARGUMENT_WORKFLOW
@ARGUMENT_BEFORE_STR
@click.pass_context
def tower_past_run_dirs(
context: click.Context, before_str: str, workflow: Workflow, skip_confirmation: bool = False
):
"""Clean up of "old" tower case run dirs."""
if workflow not in TOWER_WORKFLOW_TO_ANALYSIS_API_MAP:
LOG.error(f"Please ensure that the provided workflow {workflow} is using Tower")
raise click.Abort()
analysis_api: NfAnalysisAPI = TOWER_WORKFLOW_TO_ANALYSIS_API_MAP.get(workflow)(context.obj)
analysis_api.clean_past_run_dirs(before_date=before_str, skip_confirmation=skip_confirmation)
4 changes: 2 additions & 2 deletions cg/meta/compress/compress.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ def get_flow_cell_id(self, fastq_path: Path) -> str:

def compress_fastq(self, sample_id: str) -> bool:
"""Compress the FASTQ files for an individual."""
LOG.info(f"Check if FASTQ compression is possible for {sample_id}")
LOG.debug(f"Check if FASTQ compression is possible for {sample_id}")
version: Version = self.hk_api.get_latest_bundle_version(bundle_name=sample_id)
if not version:
return False
Expand Down Expand Up @@ -152,7 +152,7 @@ def clean_fastq(self, sample_id: str, archive_location: str) -> bool:
This means removing compressed FASTQ files and update housekeeper to point to the new SPRING
file and its metadata file.
"""
LOG.info(f"Clean FASTQ files for {sample_id}")
LOG.debug(f"Clean FASTQ files for {sample_id}")
version: Version = self.hk_api.get_latest_bundle_version(bundle_name=sample_id)
if not version:
return False
Expand Down
2 changes: 1 addition & 1 deletion cg/meta/compress/files.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ def get_hk_files_dict(tags: list[str], version_obj: Version) -> dict[Path, File]
file_tags: set[str] = {tag.name for tag in version_file.tags}
if not file_tags.intersection(tags):
continue
LOG.info(f"Found file {version_file.path}")
LOG.debug(f"Found file {version_file.path}")
path_obj: Path = Path(version_file.full_path)
hk_file[path_obj] = version_file
return hk_file
Expand Down
27 changes: 24 additions & 3 deletions cg/meta/workflow/nf_analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,9 @@
from pathlib import Path
from typing import Any, Iterator

from dateutil.parser import parse
from pydantic.v1 import ValidationError

from cg.cli.utils import echo_lines
from cg.constants import Workflow
from cg.constants.constants import (
CaseActions,
Expand Down Expand Up @@ -42,7 +42,7 @@
WorkflowDeliverables,
WorkflowParameters,
)
from cg.store.models import Case, CaseSample, Sample
from cg.store.models import Analysis, Case, CaseSample, Sample
from cg.utils import Process

LOG = logging.getLogger(__name__)
Expand Down Expand Up @@ -384,7 +384,8 @@ def config_case(self, case_id: str, dry_run: bool):
if self.is_managed_variants_required:
vcf_lines: list[str] = self.get_managed_variants(case_id=case_id)
if dry_run:
echo_lines(lines=vcf_lines)
for line in vcf_lines:
LOG.debug(line)
else:
self.write_managed_variants(case_id=case_id, content=vcf_lines)

Expand Down Expand Up @@ -906,3 +907,23 @@ def get_latest_metadata(self, case_id: str) -> NextflowAnalysis:
"""Return analysis output of a Nextflow case."""
qc_metrics: list[MetricsBase] = self.get_multiqc_json_metrics(case_id)
return self.parse_analysis(qc_metrics_raw=qc_metrics)

def clean_past_run_dirs(self, before_date: str, skip_confirmation: bool = False) -> None:
"""Clean past run directories"""
before_date: datetime = parse(before_date)
analyses_to_clean: list[Analysis] = self.get_analyses_to_clean(before_date)
LOG.info(f"Cleaning {len(analyses_to_clean)} analyses created before {before_date}")

for analysis in analyses_to_clean:
case_id = analysis.case.internal_id
case_path = self.get_case_path(case_id)
try:
LOG.info(f"Cleaning output for {case_id}")
self.clean_run_dir(
case_id=case_id, skip_confirmation=skip_confirmation, case_path=case_path
)
except FileNotFoundError:
continue
except Exception as error:
LOG.error(f"Failed to clean directories for case {case_id} - {repr(error)}")
LOG.info(f"Done cleaning {self.workflow} output")
Loading

0 comments on commit 21d2994

Please sign in to comment.