Merge remote-tracking branch 'origin/upd-pymod' into upd-pymod

Clinical-Genomics · Oct 9, 2024 · 21d2994 · 21d2994
2 parents d3bad5c + 564346e
commit 21d2994
Show file tree

Hide file tree

Showing 25 changed files with 307 additions and 86 deletions.
diff --git a/.bumpversion.cfg b/.bumpversion.cfg
@@ -1,5 +1,5 @@
 [bumpversion]
-current_version = 63.3.2
+current_version = 63.5.1
 commit = True
 tag = True
 tag_name = v{new_version}

diff --git a/.github/workflows/build_dockerfile_on_push.yml b/.github/workflows/build_dockerfile_on_push.yml
@@ -24,6 +24,9 @@ jobs:
         id: buildx
         uses: docker/setup-buildx-action@v3
 
+      - name: Suppress annotations during build
+        run: echo "::stop-commands::no-annotations"
+
       - name: Build and push
         id: docker_build
         uses: docker/build-push-action@v6
@@ -32,3 +35,6 @@ jobs:
           file: ./Dockerfile
           push: true
           tags: "clinicalgenomics/cg-stage:${{steps.get_branch_name.outputs.branch}}, clinicalgenomics/cg-stage:latest"
+
+      - name: Resume annotations
+        run: echo "::no-annotations::"
diff --git a/alembic/versions/2024_09_27_d6474325c722_refactor_pacbio_models.py b/alembic/versions/2024_09_27_d6474325c722_refactor_pacbio_models.py
@@ -19,7 +19,7 @@
 
 
 def upgrade():
-    # PacBioSequencingRun
+    # PacbioSequencingRun
     op.add_column(
         table_name="pacbio_sequencing_run",
         column=sa.Column("barcoded_hifi_reads", sa.BIGINT, nullable=True),
@@ -57,7 +57,7 @@ def upgrade():
         column=sa.Column("unbarcoded_hifi_mean_read_length", sa.BIGINT, nullable=True),
     )
 
-    # PacBioSampleSequencingMetrics
+    # PacbioSampleSequencingMetrics
     op.add_column(
         table_name="pacbio_sample_run_metrics",
         column=sa.Column("polymerase_mean_read_length", sa.BIGINT, nullable=True),
@@ -69,7 +69,7 @@ def upgrade():
 
 
 def downgrade():
-    # PacBioSequencingRun
+    # PacbioSequencingRun
     op.drop_column(table_name="pacbio_sequencing_run", column_name="barcoded_hifi_reads")
     op.drop_column(table_name="pacbio_sequencing_run", column_name="barcoded_hifi_reads_percentage")
     op.drop_column(table_name="pacbio_sequencing_run", column_name="barcoded_hifi_yield")
@@ -82,7 +82,7 @@ def downgrade():
         table_name="pacbio_sequencing_run", column_name="unbarcoded_hifi_mean_read_length"
     )
 
-    # PacBioSampleSequencingMetrics
+    # PacbioSampleSequencingMetrics
     op.drop_column(
         table_name="pacbio_sample_run_metrics", column_name="polymerase_mean_read_length"
     )

diff --git a/cg/__init__.py b/cg/__init__.py
@@ -1,2 +1,2 @@
 __title__ = "cg"
-__version__ = "63.3.2"
+__version__ = "63.5.1"
diff --git a/cg/apps/crunchy/crunchy.py b/cg/apps/crunchy/crunchy.py
@@ -45,8 +45,6 @@ def __init__(self, config: dict):
 
     def set_dry_run(self, dry_run: bool) -> None:
         """Update dry run."""
-        LOG.info("Updating compress api")
-        LOG.info(f"Set dry run to {dry_run}")
         self.dry_run = dry_run
         self.slurm_api.set_dry_run(dry_run=dry_run)
 

diff --git a/cg/apps/housekeeper/hk.py b/cg/apps/housekeeper/hk.py
@@ -7,7 +7,11 @@
 
 from housekeeper.include import checksum as hk_checksum
 from housekeeper.include import include_version
-from housekeeper.store.database import create_all_tables, drop_all_tables, initialize_database
+from housekeeper.store.database import (
+    create_all_tables,
+    drop_all_tables,
+    initialize_database,
+)
 from housekeeper.store.models import Archive, Bundle, File, Tag, Version
 from housekeeper.store.store import Store
 from sqlalchemy.orm import Query
@@ -273,7 +277,7 @@ def get_latest_bundle_version(self, bundle_name: str) -> Version | None:
         """Get the latest version of a Housekeeper bundle."""
         last_version: Version = self.last_version(bundle_name)
         if not last_version:
-            LOG.warning(f"No bundle found for {bundle_name} in Housekeeper")
+            LOG.debug(f"No bundle found for {bundle_name} in Housekeeper")
             return None
         LOG.debug(f"Found Housekeeper version object for {bundle_name}: {repr(last_version)}")
         return last_version

diff --git a/cg/cli/clean.py b/cg/cli/clean.py
@@ -23,17 +23,16 @@
     mutant_past_run_dirs,
     rnafusion_past_run_dirs,
     rsync_past_run_dirs,
+    tower_past_run_dirs,
 )
 from cg.constants.cli_options import DRY_RUN, SKIP_CONFIRMATION
 from cg.constants.constants import Workflow
 from cg.constants.housekeeper_tags import AlignmentFileTag, ScoutTag
-from cg.exc import IlluminaCleanRunError, FlowCellError
+from cg.exc import FlowCellError, IlluminaCleanRunError
 from cg.meta.clean.api import CleanAPI
-from cg.services.illumina.cleaning.clean_runs_service import (
-    IlluminaCleanRunsService,
-)
 from cg.meta.clean.clean_retrieved_spring_files import CleanRetrievedSpringFilesAPI
 from cg.models.cg_config import CGConfig
+from cg.services.illumina.cleaning.clean_runs_service import IlluminaCleanRunsService
 from cg.store.models import Analysis
 from cg.store.store import Store
 from cg.utils.date import get_date_days_ago, get_timedelta_from_date
@@ -72,6 +71,7 @@ def clean():
     rnafusion_past_run_dirs,
     rsync_past_run_dirs,
     microsalt_past_run_dirs,
+    tower_past_run_dirs,
 ]:
     clean.add_command(sub_cmd)
 

diff --git a/cg/cli/compress/fastq.py b/cg/cli/compress/fastq.py
@@ -97,7 +97,7 @@ def clean_fastq(context: CGConfig, case_id: str | None, days_back: int, dry_run:
                 sample_id=sample_id, archive_location=archive_location
             )
             if not was_cleaned:
-                LOG.info(f"Skipping individual {sample_id}")
+                LOG.debug(f"Skipping individual {sample_id}")
                 continue
             cleaned_inds += 1
 

diff --git a/cg/cli/compress/helpers.py b/cg/cli/compress/helpers.py
@@ -73,13 +73,13 @@ def update_compress_api(
 
     compress_api.set_dry_run(dry_run=dry_run)
     if mem:
-        LOG.info(f"Set Crunchy API SLURM mem to {mem}")
+        LOG.debug(f"Set Crunchy API SLURM mem to {mem}")
         compress_api.crunchy_api.slurm_memory = mem
     if hours:
-        LOG.info(f"Set Crunchy API SLURM hours to {hours}")
+        LOG.debug(f"Set Crunchy API SLURM hours to {hours}")
         compress_api.crunchy_api.slurm_hours = hours
     if ntasks:
-        LOG.info(f"Set Crunchy API SLURM number of tasks to {ntasks}")
+        LOG.debug(f"Set Crunchy API SLURM number of tasks to {ntasks}")
         compress_api.crunchy_api.slurm_number_tasks = ntasks
 
 
@@ -139,7 +139,7 @@ def compress_sample_fastqs_in_cases(
         if case_conversion_count >= number_of_conversions:
             break
 
-        LOG.info(f"Searching for FASTQ files in case {case.internal_id}")
+        LOG.debug(f"Searching for FASTQ files in case {case.internal_id}")
         if not case.links:
             continue
         for case_link in case.links:
@@ -159,7 +159,7 @@ def compress_sample_fastqs_in_cases(
                 sample_id=case_link.sample.internal_id
             )
             if not case_converted:
-                LOG.info(f"skipping individual {case_link.sample.internal_id}")
+                LOG.debug(f"skipping individual {case_link.sample.internal_id}")
                 continue
             individuals_conversion_count += 1
         if case_converted:

diff --git a/cg/cli/utils.py b/cg/cli/utils.py
@@ -1,8 +1,13 @@
 import re
+import shutil
 
 import click
 
-import shutil
+from cg.constants import Workflow
+from cg.meta.workflow.raredisease import RarediseaseAnalysisAPI
+from cg.meta.workflow.rnafusion import RnafusionAnalysisAPI
+from cg.meta.workflow.taxprofiler import TaxprofilerAnalysisAPI
+from cg.meta.workflow.tomte import TomteAnalysisAPI
 
 
 def echo_lines(lines: list[str]) -> None:
@@ -19,3 +24,11 @@ def is_case_name_allowed(name: str) -> bool:
 CLICK_CONTEXT_SETTINGS: dict[str, int] = {
     "max_content_width": shutil.get_terminal_size().columns - 10
 }
+
+
+TOWER_WORKFLOW_TO_ANALYSIS_API_MAP: dict = {
+    Workflow.RAREDISEASE: RarediseaseAnalysisAPI,
+    Workflow.RNAFUSION: RnafusionAnalysisAPI,
+    Workflow.TAXPROFILER: TaxprofilerAnalysisAPI,
+    Workflow.TOMTE: TomteAnalysisAPI,
+}
diff --git a/cg/cli/workflow/commands.py b/cg/cli/workflow/commands.py
@@ -7,9 +7,10 @@
 from dateutil.parser import parse as parse_date
 
 from cg.apps.housekeeper.hk import HousekeeperAPI
+from cg.cli.utils import TOWER_WORKFLOW_TO_ANALYSIS_API_MAP
 from cg.cli.workflow.utils import validate_force_store_option
-from cg.constants import EXIT_FAIL, EXIT_SUCCESS
-from cg.constants.cli_options import DRY_RUN, SKIP_CONFIRMATION, FORCE, COMMENT
+from cg.constants import EXIT_FAIL, EXIT_SUCCESS, Workflow
+from cg.constants.cli_options import COMMENT, DRY_RUN, FORCE, SKIP_CONFIRMATION
 from cg.constants.observations import LOQUSDB_SUPPORTED_WORKFLOWS
 from cg.exc import IlluminaRunsNeededError
 from cg.meta.workflow.analysis import AnalysisAPI
@@ -22,6 +23,7 @@
 from cg.meta.workflow.mip_dna import MipDNAAnalysisAPI
 from cg.meta.workflow.mip_rna import MipRNAAnalysisAPI
 from cg.meta.workflow.mutant import MutantAnalysisAPI
+from cg.meta.workflow.nf_analysis import NfAnalysisAPI
 from cg.meta.workflow.rnafusion import RnafusionAnalysisAPI
 from cg.models.cg_config import CGConfig
 from cg.services.deliver_files.delivery_rsync_service.delivery_rsync_service import (
@@ -31,6 +33,7 @@
 
 ARGUMENT_BEFORE_STR = click.argument("before_str", type=str)
 ARGUMENT_CASE_ID = click.argument("case_id", required=True)
+ARGUMENT_WORKFLOW = click.argument("workflow", required=True)
 OPTION_ANALYSIS_PARAMETERS_CONFIG = click.option(
     "--config-artic", type=str, help="Config with computational and lab related settings"
 )
@@ -383,3 +386,19 @@ def microsalt_past_run_dirs(
     context.invoke(
         past_run_dirs, skip_confirmation=skip_confirmation, dry_run=dry_run, before_str=before_str
     )
+
+
+@click.command("tower-past-run-dirs")
+@SKIP_CONFIRMATION
+@ARGUMENT_WORKFLOW
+@ARGUMENT_BEFORE_STR
+@click.pass_context
+def tower_past_run_dirs(
+    context: click.Context, before_str: str, workflow: Workflow, skip_confirmation: bool = False
+):
+    """Clean up of "old" tower case run dirs."""
+    if workflow not in TOWER_WORKFLOW_TO_ANALYSIS_API_MAP:
+        LOG.error(f"Please ensure that the provided workflow {workflow} is using Tower")
+        raise click.Abort()
+    analysis_api: NfAnalysisAPI = TOWER_WORKFLOW_TO_ANALYSIS_API_MAP.get(workflow)(context.obj)
+    analysis_api.clean_past_run_dirs(before_date=before_str, skip_confirmation=skip_confirmation)
diff --git a/cg/meta/compress/compress.py b/cg/meta/compress/compress.py
@@ -58,7 +58,7 @@ def get_flow_cell_id(self, fastq_path: Path) -> str:
 
     def compress_fastq(self, sample_id: str) -> bool:
         """Compress the FASTQ files for an individual."""
-        LOG.info(f"Check if FASTQ compression is possible for {sample_id}")
+        LOG.debug(f"Check if FASTQ compression is possible for {sample_id}")
         version: Version = self.hk_api.get_latest_bundle_version(bundle_name=sample_id)
         if not version:
             return False
@@ -152,7 +152,7 @@ def clean_fastq(self, sample_id: str, archive_location: str) -> bool:
         This means removing compressed FASTQ files and update housekeeper to point to the new SPRING
         file and its metadata file.
         """
-        LOG.info(f"Clean FASTQ files for {sample_id}")
+        LOG.debug(f"Clean FASTQ files for {sample_id}")
         version: Version = self.hk_api.get_latest_bundle_version(bundle_name=sample_id)
         if not version:
             return False

diff --git a/cg/meta/compress/files.py b/cg/meta/compress/files.py
@@ -27,7 +27,7 @@ def get_hk_files_dict(tags: list[str], version_obj: Version) -> dict[Path, File]
         file_tags: set[str] = {tag.name for tag in version_file.tags}
         if not file_tags.intersection(tags):
             continue
-        LOG.info(f"Found file {version_file.path}")
+        LOG.debug(f"Found file {version_file.path}")
         path_obj: Path = Path(version_file.full_path)
         hk_file[path_obj] = version_file
     return hk_file

diff --git a/cg/meta/workflow/nf_analysis.py b/cg/meta/workflow/nf_analysis.py
@@ -3,9 +3,9 @@
 from pathlib import Path
 from typing import Any, Iterator
 
+from dateutil.parser import parse
 from pydantic.v1 import ValidationError
 
-from cg.cli.utils import echo_lines
 from cg.constants import Workflow
 from cg.constants.constants import (
     CaseActions,
@@ -42,7 +42,7 @@
     WorkflowDeliverables,
     WorkflowParameters,
 )
-from cg.store.models import Case, CaseSample, Sample
+from cg.store.models import Analysis, Case, CaseSample, Sample
 from cg.utils import Process
 
 LOG = logging.getLogger(__name__)
@@ -384,7 +384,8 @@ def config_case(self, case_id: str, dry_run: bool):
         if self.is_managed_variants_required:
             vcf_lines: list[str] = self.get_managed_variants(case_id=case_id)
             if dry_run:
-                echo_lines(lines=vcf_lines)
+                for line in vcf_lines:
+                    LOG.debug(line)
             else:
                 self.write_managed_variants(case_id=case_id, content=vcf_lines)
 
@@ -906,3 +907,23 @@ def get_latest_metadata(self, case_id: str) -> NextflowAnalysis:
         """Return analysis output of a Nextflow case."""
         qc_metrics: list[MetricsBase] = self.get_multiqc_json_metrics(case_id)
         return self.parse_analysis(qc_metrics_raw=qc_metrics)
+
+    def clean_past_run_dirs(self, before_date: str, skip_confirmation: bool = False) -> None:
+        """Clean past run directories"""
+        before_date: datetime = parse(before_date)
+        analyses_to_clean: list[Analysis] = self.get_analyses_to_clean(before_date)
+        LOG.info(f"Cleaning {len(analyses_to_clean)} analyses created before {before_date}")
+
+        for analysis in analyses_to_clean:
+            case_id = analysis.case.internal_id
+            case_path = self.get_case_path(case_id)
+            try:
+                LOG.info(f"Cleaning output for {case_id}")
+                self.clean_run_dir(
+                    case_id=case_id, skip_confirmation=skip_confirmation, case_path=case_path
+                )
+            except FileNotFoundError:
+                continue
+            except Exception as error:
+                LOG.error(f"Failed to clean directories for case {case_id} - {repr(error)}")
+        LOG.info(f"Done cleaning {self.workflow} output")