Merge branch 'master' into fetche_tissue_type_from_original_sample

Clinical-Genomics · Jan 23, 2025 · f90f756 · f90f756
2 parents 0d23732 + 8cec5df
commit f90f756
Show file tree

Hide file tree

Showing 489 changed files with 20,140 additions and 8,495 deletions.
diff --git a/.bumpversion.cfg b/.bumpversion.cfg
@@ -1,5 +1,5 @@
 [bumpversion]
-current_version = 64.5.14
+current_version = 67.0.3
 commit = True
 tag = True
 tag_name = v{new_version}

diff --git a/.github/workflows/tests_and_coverage.yml b/.github/workflows/tests_and_coverage.yml
@@ -24,16 +24,8 @@ jobs:
         with:
           virtualenvs-create: false
 
-      - name: Cache dependencies
-        uses: actions/cache@v4
-        id: cache
-        with:
-          path: ${{ env.pythonLocation }}
-          key: ${{ env.pythonLocation }}-${{ hashFiles('**/poetry.lock') }}
-
       - name: Install Dependencies
-        if: steps.cache.outputs.cache-hit != 'true'
-        run: poetry install --no-interaction
+        run: poetry install --no-interaction --all-extras
 
       - name: Test with Pytest & Coveralls
         run: |

diff --git a/Dockerfile b/Dockerfile
@@ -23,14 +23,16 @@ ENV TRAILBLAZER_SERVICE_ACCOUNT_AUTH_FILE="auth_file"
 
 
 WORKDIR /home/src/app
-COPY pyproject.toml poetry.lock gunicorn.conf.py ./
+COPY pyproject.toml poetry.lock gunicorn.conf.py README.md ./
 
 RUN pip install --no-cache-dir poetry \
     && poetry config virtualenvs.create false \
-    && poetry install --no-interaction --no-ansi
+    && poetry install --no-interaction --no-ansi --no-root
 
 COPY cg ./cg
 
+RUN poetry install --no-interaction --no-ansi
+
 CMD gunicorn \
     --config gunicorn.conf.py \
     cg.server.auto:app
diff --git a/alembic/versions/2024_12_02_5552c02a4966_add_nallo_to_analysis_options.py b/alembic/versions/2024_12_02_5552c02a4966_add_nallo_to_analysis_options.py
@@ -0,0 +1,105 @@
+"""add-nallo-to-analysis-options
+
+Revision ID: 5552c02a4966
+Revises: 05ffb5e13d7b
+Create Date: 2024-12-02 11:35:31.725343
+
+"""
+
+from enum import StrEnum
+
+from alembic import op
+import sqlalchemy as sa
+from sqlalchemy.dialects import mysql
+from sqlalchemy.orm import DeclarativeBase, Mapped, mapped_column
+
+# revision identifiers, used by Alembic.
+revision = "5552c02a4966"
+down_revision = "05ffb5e13d7b"
+branch_labels = None
+depends_on = None
+
+base_options = (
+    "balsamic",
+    "balsamic-pon",
+    "balsamic-qc",
+    "balsamic-umi",
+    "demultiplex",
+    "raw-data",
+    "fluffy",
+    "microsalt",
+    "mip-dna",
+    "mip-rna",
+    "mutant",
+    "raredisease",
+    "rnafusion",
+    "rsync",
+    "spring",
+    "taxprofiler",
+    "tomte",
+    "jasen",
+)
+
+old_options = sorted(base_options)
+new_options = sorted(base_options + ("nallo",))
+
+old_analysis_enum = mysql.ENUM(*old_options)
+new_analysis_enum = mysql.ENUM(*new_options)
+
+
+class Pipeline(StrEnum):
+    BALSAMIC: str = "balsamic"
+    BALSAMIC_PON: str = "balsamic-pon"
+    BALSAMIC_QC: str = "balsamic-qc"
+    BALSAMIC_UMI: str = "balsamic-umi"
+    DEMULTIPLEX: str = "demultiplex"
+    FLUFFY: str = "fluffy"
+    JASEN: str = "jasen"
+    MICROSALT: str = "microsalt"
+    MIP_DNA: str = "mip-dna"
+    MIP_RNA: str = "mip-rna"
+    MUTANT: str = "mutant"
+    NALLO: str = "nallo"
+    RAREDISEASE: str = "raredisease"
+    RAW_DATA: str = "raw-data"
+    RNAFUSION: str = "rnafusion"
+    RSYNC: str = "rsync"
+    SPRING: str = "spring"
+    TAXPROFILER: str = "taxprofiler"
+    TOMTE: str = "tomte"
+
+
+class Base(DeclarativeBase):
+    pass
+
+
+class Analysis(Base):
+    __tablename__ = "analysis"
+    id = sa.Column(sa.types.Integer, primary_key=True)
+    workflow = sa.Column(sa.types.Enum(*list(Pipeline)))
+
+
+class Case(Base):
+    __tablename__ = "case"
+    id = sa.Column(sa.types.Integer, primary_key=True)
+    data_analysis = sa.Column(sa.types.Enum(*list(Pipeline)))
+    internal_id = sa.Column(sa.types.String)
+
+
+def upgrade():
+    op.alter_column("case", "data_analysis", type_=new_analysis_enum)
+    op.alter_column("analysis", "workflow", type_=new_analysis_enum)
+
+
+def downgrade():
+    bind = op.get_bind()
+    session = sa.orm.Session(bind=bind)
+    for analysis in session.query(Analysis).filter(Analysis.workflow == "nallo"):
+        print(f"Changing pipeline for Case {Case.internal_id} to raw-data")
+        analysis.workflow = "raw-data"
+    for case in session.query(Case).filter(Case.data_analysis == "nallo"):
+        print(f"Changing data_analysis for Case {case.internal_id} to raw-data")
+        case.data_analysis = "raw-data"
+    op.alter_column("case", "data_analysis", type_=old_analysis_enum)
+    op.alter_column("analysis", "workflow", type_=old_analysis_enum)
+    session.commit()
diff --git a/cg/__init__.py b/cg/__init__.py
@@ -1,2 +1,2 @@
 __title__ = "cg"
-__version__ = "64.5.14"
+__version__ = "67.0.3"
diff --git a/cg/apps/demultiplex/demultiplex_api.py b/cg/apps/demultiplex/demultiplex_api.py
@@ -12,8 +12,8 @@
 from cg.apps.tb import TrailblazerAPI
 from cg.constants.constants import FileFormat, Workflow
 from cg.constants.demultiplexing import DemultiplexingDirsAndFiles
-from cg.constants.priority import SlurmQos
-from cg.constants.tb import AnalysisTypes
+from cg.constants.priority import SlurmQos, TrailblazerPriority
+from cg.constants.tb import AnalysisType
 from cg.exc import HousekeeperFileMissingError
 from cg.io.controller import WriteFile
 from cg.models.demultiplex.sbatch import SbatchCommand, SbatchError
@@ -49,6 +49,11 @@ def slurm_quality_of_service(self) -> Literal[SlurmQos.HIGH, SlurmQos.LOW]:
         """Return SLURM quality of service."""
         return SlurmQos.LOW if self.environment == "stage" else SlurmQos.HIGH
 
+    @property
+    def trailblazer_priority(self) -> Literal[TrailblazerPriority.HIGH, TrailblazerPriority.LOW]:
+        """Return Trailblazer quality of service."""
+        return TrailblazerPriority.LOW if self.environment == "stage" else TrailblazerPriority.HIGH
+
     def set_dry_run(self, dry_run: bool) -> None:
         """Set dry run."""
         LOG.debug(f"DemultiplexingAPI: Set dry run to {dry_run}")
@@ -210,10 +215,10 @@ def add_to_trailblazer(
         )
         tb_api.add_pending_analysis(
             case_id=sequencing_run.id,
-            analysis_type=AnalysisTypes.OTHER,
+            analysis_type=AnalysisType.OTHER,
             config_path=sequencing_run.trailblazer_config_path.as_posix(),
             out_dir=sequencing_run.trailblazer_config_path.parent.as_posix(),
-            slurm_quality_of_service=self.slurm_quality_of_service,
+            priority=self.trailblazer_priority,
             email=self.mail,
             workflow=Workflow.DEMULTIPLEX,
         )

diff --git a/cg/apps/demultiplex/sample_sheet/api.py b/cg/apps/demultiplex/sample_sheet/api.py
@@ -1,15 +1,15 @@
 import logging
 from pathlib import Path
 
-import click
+import rich_click as click
 
 from cg.apps.demultiplex.sample_sheet.read_sample_sheet import get_samples_from_content
 from cg.apps.demultiplex.sample_sheet.sample_models import IlluminaSampleIndexSetting
 from cg.apps.demultiplex.sample_sheet.sample_sheet_creator import SampleSheetCreator
 from cg.apps.demultiplex.sample_sheet.sample_sheet_validator import SampleSheetValidator
 from cg.apps.demultiplex.sample_sheet.utils import (
-    delete_sample_sheet_from_housekeeper,
     add_and_include_sample_sheet_path_to_housekeeper,
+    delete_sample_sheet_from_housekeeper,
 )
 from cg.apps.housekeeper.hk import HousekeeperAPI
 from cg.apps.lims import LimsAPI
@@ -160,6 +160,20 @@ def _use_sample_sheet_from_housekeeper(
                 "would have copied it to sequencing run directory"
             )
             return
+
+        try:
+            if sample_sheet_path.samefile(run_directory_data.sample_sheet_path):
+                LOG.info(
+                    "Sample sheet from Housekeeper is the same as the sequencing directory sample sheet"
+                )
+                return
+        except FileNotFoundError:
+            LOG.info(
+                f"Sample sheet or target path does not exist. "
+                f"Housekeeper sample sheet path: {sample_sheet_path}, "
+                f"Target sample sheet path: {run_directory_data.sample_sheet_path}"
+            )
+
         LOG.info("Sample sheet from Housekeeper is valid. Copying it to sequencing run directory")
         link_or_overwrite_file(src=sample_sheet_path, dst=run_directory_data.sample_sheet_path)
 

diff --git a/cg/apps/hermes/hermes_api.py b/cg/apps/hermes/hermes_api.py
@@ -15,7 +15,11 @@ class HermesApi:
     """Class to communicate with hermes"""
 
     def __init__(self, config: dict):
-        self.process = Process(binary=config["hermes"]["binary_path"])
+        self.process = Process(
+            binary=config["hermes"]["binary_path"],
+        )
+        self.container_path: str = config["hermes"]["container_path"]
+        self.container_mount_volume = config["hermes"]["container_mount_volume"]
 
     def convert_deliverables(
         self,
@@ -27,6 +31,10 @@ def convert_deliverables(
         """Convert deliverables file in raw workflow format to CG format with Hermes."""
         LOG.info("Converting workflow deliverables to CG deliverables")
         convert_command = [
+            "run",
+            "--bind",
+            self.container_mount_volume,
+            self.container_path,
             "convert",
             "deliverables",
             "--workflow",

diff --git a/cg/apps/housekeeper/hk.py b/cg/apps/housekeeper/hk.py
@@ -7,11 +7,7 @@
 
 from housekeeper.include import checksum as hk_checksum
 from housekeeper.include import include_version
-from housekeeper.store.database import (
-    create_all_tables,
-    drop_all_tables,
-    initialize_database,
-)
+from housekeeper.store.database import create_all_tables, drop_all_tables, initialize_database
 from housekeeper.store.models import Archive, Bundle, File, Tag, Version
 from housekeeper.store.store import Store
 from sqlalchemy.orm import Query
@@ -27,17 +23,13 @@
 
 
 class HousekeeperAPI:
-    """API to decouple cg code from Housekeeper"""
+    """API to decouple cg code from Housekeeper."""
 
     def __init__(self, config: dict) -> None:
         initialize_database(config["housekeeper"]["database"])
         self._store = Store(config["housekeeper"]["root"])
         self.root_dir: str = config["housekeeper"]["root"]
 
-    def __getattr__(self, name):
-        LOG.warning(f"Called undefined {name} on {self.__class__.__name__}, please wrap")
-        return getattr(self._store, name)
-
     def new_bundle(self, name: str, created_at: datetime = None) -> Bundle:
         """Create a new file bundle."""
         return self._store.new_bundle(name, created_at)

diff --git a/cg/apps/lims/api.py b/cg/apps/lims/api.py
@@ -555,3 +555,11 @@ def _get_negative_controls_from_list(samples: list[Sample]) -> list[Sample]:
             ):
                 negative_controls.append(sample)
         return negative_controls
+
+    def get_sample_region_and_lab_code(self, sample_id: str) -> str:
+        """Return the region code and lab code for a sample formatted as a prefix string."""
+        region_code: str = self.get_sample_attribute(lims_id=sample_id, key="region_code").split(
+            " "
+        )[0]
+        lab_code: str = self.get_sample_attribute(lims_id=sample_id, key="lab_code").split(" ")[0]
+        return f"{region_code}_{lab_code}_"
diff --git a/cg/apps/orderform/excel_orderform_parser.py b/cg/apps/orderform/excel_orderform_parser.py
@@ -12,8 +12,8 @@
 from cg.constants import DataDelivery
 from cg.constants.orderforms import Orderform
 from cg.exc import OrderFormError
+from cg.models.orders.constants import OrderType
 from cg.models.orders.excel_sample import ExcelSample
-from cg.models.orders.order import OrderType
 
 LOG = logging.getLogger(__name__)
 

diff --git a/cg/apps/orderform/json_orderform_parser.py b/cg/apps/orderform/json_orderform_parser.py
@@ -1,8 +1,8 @@
 from cg.apps.orderform.orderform_parser import OrderformParser
 from cg.constants import DataDelivery, Workflow
 from cg.exc import OrderFormError
+from cg.models.orders.constants import OrderType
 from cg.models.orders.json_sample import JsonSample
-from cg.models.orders.order import OrderType
 
 
 class JsonOrderformParser(OrderformParser):

diff --git a/cg/apps/orderform/orderform_parser.py b/cg/apps/orderform/orderform_parser.py
@@ -4,9 +4,10 @@
 
 from pydantic import BaseModel, ConfigDict, constr
 
+from cg.apps.orderform.utils import ORDER_TYPES_WITH_CASES
 from cg.constants import DataDelivery
 from cg.exc import OrderFormError
-from cg.models.orders.order import OrderType
+from cg.models.orders.constants import OrderType
 from cg.models.orders.orderform_schema import OrderCase, Orderform, OrderPool
 from cg.models.orders.sample_base import OrderSample
 from cg.store.models import Customer
@@ -142,10 +143,11 @@ def expand_case(case_id: str, case_samples: list[OrderSample]) -> OrderCase:
 
     def generate_orderform(self) -> Orderform:
         """Generate an orderform"""
-        cases_map: dict[str, list[OrderSample]] = self.group_cases()
         case_objs: list[OrderCase] = []
-        for case_id in cases_map:
-            case_objs.append(self.expand_case(case_id=case_id, case_samples=cases_map[case_id]))
+        if self.project_type in ORDER_TYPES_WITH_CASES:
+            cases_map: dict[str, list[OrderSample]] = self.group_cases()
+            for case_id in cases_map:
+                case_objs.append(self.expand_case(case_id=case_id, case_samples=cases_map[case_id]))
         return Orderform(
             comment=self.order_comment,
             samples=self.samples,

diff --git a/cg/apps/orderform/utils.py b/cg/apps/orderform/utils.py
@@ -1,5 +1,16 @@
+from cg.models.orders.constants import OrderType
 from cg.models.orders.excel_sample import ExcelSample
 
+ORDER_TYPES_WITH_CASES = [
+    OrderType.BALSAMIC,
+    OrderType.BALSAMIC_QC,
+    OrderType.BALSAMIC_UMI,
+    OrderType.MIP_DNA,
+    OrderType.MIP_RNA,
+    OrderType.RNAFUSION,
+    OrderType.TOMTE,
+]
+
 
 def are_all_samples_metagenome(samples: list[ExcelSample]) -> bool:
     """Check if all samples are metagenome samples"""