Merge branch 'master' into write_config_taxprofiler

Clinical-Genomics · Jan 15, 2025 · a99c7aa · a99c7aa
2 parents b2e327f + 6a42e69
commit a99c7aa
Show file tree

Hide file tree

Showing 294 changed files with 4,260 additions and 2,108 deletions.
diff --git a/.bumpversion.cfg b/.bumpversion.cfg
@@ -1,5 +1,5 @@
 [bumpversion]
-current_version = 64.5.8
+current_version = 65.0.13
 commit = True
 tag = True
 tag_name = v{new_version}

diff --git a/Dockerfile b/Dockerfile
@@ -1,11 +1,5 @@
 FROM docker.io/library/python:3.11-slim-bullseye
 
-ENV GUNICORN_WORKERS=1
-ENV GUNICORN_THREADS=1
-ENV GUNICORN_BIND="0.0.0.0:8000"
-ENV GUNICORN_TIMEOUT=400
-
-
 ENV CG_SQL_DATABASE_URI="sqlite:///:memory:"
 ENV CG_SECRET_KEY="key"
 
@@ -29,23 +23,16 @@ ENV TRAILBLAZER_SERVICE_ACCOUNT_AUTH_FILE="auth_file"
 
 
 WORKDIR /home/src/app
-COPY pyproject.toml poetry.lock ./ 
+COPY pyproject.toml poetry.lock gunicorn.conf.py README.md ./
 
 RUN pip install --no-cache-dir poetry \
     && poetry config virtualenvs.create false \
-    && poetry install --no-interaction --no-ansi
+    && poetry install --no-interaction --no-ansi --no-root
 
 COPY cg ./cg
 
+RUN poetry install --no-interaction --no-ansi
+
 CMD gunicorn \
-    --workers=$GUNICORN_WORKERS \
-    --bind=$GUNICORN_BIND  \
-    --threads=$GUNICORN_THREADS \
-    --timeout=$GUNICORN_TIMEOUT \
-    --proxy-protocol \
-    --forwarded-allow-ips="10.0.2.100,127.0.0.1" \
-    --log-syslog \
-    --access-logfile - \
-    --error-logfile - \
-    --log-level="debug" \
-    cg.server.auto:app
+    --config gunicorn.conf.py \
+    cg.server.auto:app
diff --git a/alembic/versions/2024_12_02_5552c02a4966_add_nallo_to_analysis_options.py b/alembic/versions/2024_12_02_5552c02a4966_add_nallo_to_analysis_options.py
@@ -0,0 +1,105 @@
+"""add-nallo-to-analysis-options
+
+Revision ID: 5552c02a4966
+Revises: 05ffb5e13d7b
+Create Date: 2024-12-02 11:35:31.725343
+
+"""
+
+from enum import StrEnum
+
+from alembic import op
+import sqlalchemy as sa
+from sqlalchemy.dialects import mysql
+from sqlalchemy.orm import DeclarativeBase, Mapped, mapped_column
+
+# revision identifiers, used by Alembic.
+revision = "5552c02a4966"
+down_revision = "05ffb5e13d7b"
+branch_labels = None
+depends_on = None
+
+base_options = (
+    "balsamic",
+    "balsamic-pon",
+    "balsamic-qc",
+    "balsamic-umi",
+    "demultiplex",
+    "raw-data",
+    "fluffy",
+    "microsalt",
+    "mip-dna",
+    "mip-rna",
+    "mutant",
+    "raredisease",
+    "rnafusion",
+    "rsync",
+    "spring",
+    "taxprofiler",
+    "tomte",
+    "jasen",
+)
+
+old_options = sorted(base_options)
+new_options = sorted(base_options + ("nallo",))
+
+old_analysis_enum = mysql.ENUM(*old_options)
+new_analysis_enum = mysql.ENUM(*new_options)
+
+
+class Pipeline(StrEnum):
+    BALSAMIC: str = "balsamic"
+    BALSAMIC_PON: str = "balsamic-pon"
+    BALSAMIC_QC: str = "balsamic-qc"
+    BALSAMIC_UMI: str = "balsamic-umi"
+    DEMULTIPLEX: str = "demultiplex"
+    FLUFFY: str = "fluffy"
+    JASEN: str = "jasen"
+    MICROSALT: str = "microsalt"
+    MIP_DNA: str = "mip-dna"
+    MIP_RNA: str = "mip-rna"
+    MUTANT: str = "mutant"
+    NALLO: str = "nallo"
+    RAREDISEASE: str = "raredisease"
+    RAW_DATA: str = "raw-data"
+    RNAFUSION: str = "rnafusion"
+    RSYNC: str = "rsync"
+    SPRING: str = "spring"
+    TAXPROFILER: str = "taxprofiler"
+    TOMTE: str = "tomte"
+
+
+class Base(DeclarativeBase):
+    pass
+
+
+class Analysis(Base):
+    __tablename__ = "analysis"
+    id = sa.Column(sa.types.Integer, primary_key=True)
+    workflow = sa.Column(sa.types.Enum(*list(Pipeline)))
+
+
+class Case(Base):
+    __tablename__ = "case"
+    id = sa.Column(sa.types.Integer, primary_key=True)
+    data_analysis = sa.Column(sa.types.Enum(*list(Pipeline)))
+    internal_id = sa.Column(sa.types.String)
+
+
+def upgrade():
+    op.alter_column("case", "data_analysis", type_=new_analysis_enum)
+    op.alter_column("analysis", "workflow", type_=new_analysis_enum)
+
+
+def downgrade():
+    bind = op.get_bind()
+    session = sa.orm.Session(bind=bind)
+    for analysis in session.query(Analysis).filter(Analysis.workflow == "nallo"):
+        print(f"Changing pipeline for Case {Case.internal_id} to raw-data")
+        analysis.workflow = "raw-data"
+    for case in session.query(Case).filter(Case.data_analysis == "nallo"):
+        print(f"Changing data_analysis for Case {case.internal_id} to raw-data")
+        case.data_analysis = "raw-data"
+    op.alter_column("case", "data_analysis", type_=old_analysis_enum)
+    op.alter_column("analysis", "workflow", type_=old_analysis_enum)
+    session.commit()
diff --git a/cg/__init__.py b/cg/__init__.py
@@ -1,2 +1,2 @@
 __title__ = "cg"
-__version__ = "64.5.8"
+__version__ = "65.0.13"
diff --git a/cg/apps/demultiplex/demultiplex_api.py b/cg/apps/demultiplex/demultiplex_api.py
@@ -12,8 +12,8 @@
 from cg.apps.tb import TrailblazerAPI
 from cg.constants.constants import FileFormat, Workflow
 from cg.constants.demultiplexing import DemultiplexingDirsAndFiles
-from cg.constants.priority import SlurmQos
-from cg.constants.tb import AnalysisTypes
+from cg.constants.priority import SlurmQos, TrailblazerPriority
+from cg.constants.tb import AnalysisType
 from cg.exc import HousekeeperFileMissingError
 from cg.io.controller import WriteFile
 from cg.models.demultiplex.sbatch import SbatchCommand, SbatchError
@@ -49,6 +49,11 @@ def slurm_quality_of_service(self) -> Literal[SlurmQos.HIGH, SlurmQos.LOW]:
         """Return SLURM quality of service."""
         return SlurmQos.LOW if self.environment == "stage" else SlurmQos.HIGH
 
+    @property
+    def trailblazer_priority(self) -> Literal[TrailblazerPriority.HIGH, TrailblazerPriority.LOW]:
+        """Return Trailblazer quality of service."""
+        return TrailblazerPriority.LOW if self.environment == "stage" else TrailblazerPriority.HIGH
+
     def set_dry_run(self, dry_run: bool) -> None:
         """Set dry run."""
         LOG.debug(f"DemultiplexingAPI: Set dry run to {dry_run}")
@@ -210,10 +215,10 @@ def add_to_trailblazer(
         )
         tb_api.add_pending_analysis(
             case_id=sequencing_run.id,
-            analysis_type=AnalysisTypes.OTHER,
+            analysis_type=AnalysisType.OTHER,
             config_path=sequencing_run.trailblazer_config_path.as_posix(),
             out_dir=sequencing_run.trailblazer_config_path.parent.as_posix(),
-            slurm_quality_of_service=self.slurm_quality_of_service,
+            priority=self.trailblazer_priority,
             email=self.mail,
             workflow=Workflow.DEMULTIPLEX,
         )

diff --git a/cg/apps/demultiplex/sample_sheet/api.py b/cg/apps/demultiplex/sample_sheet/api.py
@@ -1,15 +1,15 @@
 import logging
 from pathlib import Path
 
-import click
+import rich_click as click
 
 from cg.apps.demultiplex.sample_sheet.read_sample_sheet import get_samples_from_content
 from cg.apps.demultiplex.sample_sheet.sample_models import IlluminaSampleIndexSetting
 from cg.apps.demultiplex.sample_sheet.sample_sheet_creator import SampleSheetCreator
 from cg.apps.demultiplex.sample_sheet.sample_sheet_validator import SampleSheetValidator
 from cg.apps.demultiplex.sample_sheet.utils import (
-    delete_sample_sheet_from_housekeeper,
     add_and_include_sample_sheet_path_to_housekeeper,
+    delete_sample_sheet_from_housekeeper,
 )
 from cg.apps.housekeeper.hk import HousekeeperAPI
 from cg.apps.lims import LimsAPI
@@ -160,6 +160,20 @@ def _use_sample_sheet_from_housekeeper(
                 "would have copied it to sequencing run directory"
             )
             return
+
+        try:
+            if sample_sheet_path.samefile(run_directory_data.sample_sheet_path):
+                LOG.info(
+                    "Sample sheet from Housekeeper is the same as the sequencing directory sample sheet"
+                )
+                return
+        except FileNotFoundError:
+            LOG.info(
+                f"Sample sheet or target path does not exist. "
+                f"Housekeeper sample sheet path: {sample_sheet_path}, "
+                f"Target sample sheet path: {run_directory_data.sample_sheet_path}"
+            )
+
         LOG.info("Sample sheet from Housekeeper is valid. Copying it to sequencing run directory")
         link_or_overwrite_file(src=sample_sheet_path, dst=run_directory_data.sample_sheet_path)
 

diff --git a/cg/apps/gens.py b/cg/apps/gens.py
@@ -2,7 +2,7 @@
 
 import logging
 
-from cg.constants.constants import PrepCategory
+from cg.constants.sequencing import SeqLibraryPrepCategory
 from cg.store.models import Case
 from cg.utils import Process
 from cg.utils.dict import get_list_from_dictionary
@@ -51,7 +51,8 @@ def load(
     def is_suitable_for_upload(case: Case) -> bool:
         """Check if a cancer case supports Gens upload."""
         return all(
-            sample.prep_category == PrepCategory.WHOLE_GENOME_SEQUENCING for sample in case.samples
+            sample.prep_category == SeqLibraryPrepCategory.WHOLE_GENOME_SEQUENCING
+            for sample in case.samples
         )
 
     def __str__(self):

diff --git a/cg/apps/hermes/hermes_api.py b/cg/apps/hermes/hermes_api.py
@@ -15,7 +15,10 @@ class HermesApi:
     """Class to communicate with hermes"""
 
     def __init__(self, config: dict):
-        self.process = Process(binary=config["hermes"]["binary_path"])
+        self.process = Process(
+            binary=config["hermes"]["binary_path"],
+        )
+        self.container_mount_volume = config["hermes"]["container_mount_volume"]
 
     def convert_deliverables(
         self,
@@ -27,6 +30,10 @@ def convert_deliverables(
         """Convert deliverables file in raw workflow format to CG format with Hermes."""
         LOG.info("Converting workflow deliverables to CG deliverables")
         convert_command = [
+            "run",
+            "--bind",
+            self.container_mount_volume,
+            "/home/proj/stage/singularity_containers/hermes_latest.sif",
             "convert",
             "deliverables",
             "--workflow",

diff --git a/cg/apps/housekeeper/hk.py b/cg/apps/housekeeper/hk.py
@@ -7,11 +7,7 @@
 
 from housekeeper.include import checksum as hk_checksum
 from housekeeper.include import include_version
-from housekeeper.store.database import (
-    create_all_tables,
-    drop_all_tables,
-    initialize_database,
-)
+from housekeeper.store.database import create_all_tables, drop_all_tables, initialize_database
 from housekeeper.store.models import Archive, Bundle, File, Tag, Version
 from housekeeper.store.store import Store
 from sqlalchemy.orm import Query
@@ -27,17 +23,13 @@
 
 
 class HousekeeperAPI:
-    """API to decouple cg code from Housekeeper"""
+    """API to decouple cg code from Housekeeper."""
 
     def __init__(self, config: dict) -> None:
         initialize_database(config["housekeeper"]["database"])
         self._store = Store(config["housekeeper"]["root"])
         self.root_dir: str = config["housekeeper"]["root"]
 
-    def __getattr__(self, name):
-        LOG.warning(f"Called undefined {name} on {self.__class__.__name__}, please wrap")
-        return getattr(self._store, name)
-
     def new_bundle(self, name: str, created_at: datetime = None) -> Bundle:
         """Create a new file bundle."""
         return self._store.new_bundle(name, created_at)

diff --git a/cg/apps/lims/api.py b/cg/apps/lims/api.py
@@ -555,3 +555,11 @@ def _get_negative_controls_from_list(samples: list[Sample]) -> list[Sample]:
             ):
                 negative_controls.append(sample)
         return negative_controls
+
+    def get_sample_region_and_lab_code(self, sample_id: str) -> str:
+        """Return the region code and lab code for a sample formatted as a prefix string."""
+        region_code: str = self.get_sample_attribute(lims_id=sample_id, key="region_code").split(
+            " "
+        )[0]
+        lab_code: str = self.get_sample_attribute(lims_id=sample_id, key="lab_code").split(" ")[0]
+        return f"{region_code}_{lab_code}_"
diff --git a/cg/apps/orderform/excel_orderform_parser.py b/cg/apps/orderform/excel_orderform_parser.py
@@ -25,7 +25,7 @@ class ExcelOrderformParser(OrderformParser):
     SHEET_NAMES: list[str] = ["Orderform", "orderform", "order form", "Order Form"]
     VALID_ORDERFORMS: list[str] = [
         f"{Orderform.MIP_DNA}:{Orderform.get_current_orderform_version(Orderform.MIP_DNA)}",  # Orderform MIP-DNA, Balsamic, sequencing only, MIP-RNA
-        f"{Orderform.MICROSALT}:{Orderform.get_current_orderform_version(Orderform.MICROSALT)}",  # Microbial WGS
+        f"{Orderform.MICROSALT}:{Orderform.get_current_orderform_version(Orderform.MICROSALT)}",  # Microbial WHOLE_GENOME_SEQUENCING
         f"{Orderform.RML}:{Orderform.get_current_orderform_version(Orderform.RML)}",  # Orderform Ready made libraries (RML)
         f"{Orderform.METAGENOME}:{Orderform.get_current_orderform_version(Orderform.METAGENOME)}",  # Microbial meta genomes
         f"{Orderform.SARS_COV_2}:{Orderform.get_current_orderform_version(Orderform.SARS_COV_2)}",  # Orderform SARS-CoV-2

diff --git a/cg/apps/orderform/orderform_parser.py b/cg/apps/orderform/orderform_parser.py
@@ -4,6 +4,7 @@
 
 from pydantic import BaseModel, ConfigDict, constr
 
+from cg.apps.orderform.utils import ORDER_TYPES_WITH_CASES
 from cg.constants import DataDelivery
 from cg.exc import OrderFormError
 from cg.models.orders.order import OrderType
@@ -142,10 +143,11 @@ def expand_case(case_id: str, case_samples: list[OrderSample]) -> OrderCase:
 
     def generate_orderform(self) -> Orderform:
         """Generate an orderform"""
-        cases_map: dict[str, list[OrderSample]] = self.group_cases()
         case_objs: list[OrderCase] = []
-        for case_id in cases_map:
-            case_objs.append(self.expand_case(case_id=case_id, case_samples=cases_map[case_id]))
+        if self.project_type in ORDER_TYPES_WITH_CASES:
+            cases_map: dict[str, list[OrderSample]] = self.group_cases()
+            for case_id in cases_map:
+                case_objs.append(self.expand_case(case_id=case_id, case_samples=cases_map[case_id]))
         return Orderform(
             comment=self.order_comment,
             samples=self.samples,

diff --git a/cg/apps/orderform/utils.py b/cg/apps/orderform/utils.py
@@ -1,5 +1,16 @@
+from cg.models.orders.constants import OrderType
 from cg.models.orders.excel_sample import ExcelSample
 
+ORDER_TYPES_WITH_CASES = [
+    OrderType.BALSAMIC,
+    OrderType.BALSAMIC_QC,
+    OrderType.BALSAMIC_UMI,
+    OrderType.MIP_DNA,
+    OrderType.MIP_RNA,
+    OrderType.RNAFUSION,
+    OrderType.TOMTE,
+]
+
 
 def are_all_samples_metagenome(samples: list[ExcelSample]) -> bool:
     """Check if all samples are metagenome samples"""