Refactor pipeline to workflow config (#2926) (major)

### Changed - Refactor pipeline to workflow config
Clinical-Genomics · Feb 12, 2024 · e94dbd9 · e94dbd9
1 parent 95e42cc
commit e94dbd9
Show file tree

Hide file tree

Showing 30 changed files with 125 additions and 118 deletions.
diff --git a/cg/cli/workflow/nf_analysis.py b/cg/cli/workflow/nf_analysis.py
@@ -2,11 +2,11 @@
 
 import click
 
-from cg.models.cg_config import CGConfig
-from cg.meta.workflow.nf_analysis import NfAnalysisAPI
-from cg.constants.constants import MetaApis
 from cg.cli.workflow.commands import ARGUMENT_CASE_ID, OPTION_DRY
+from cg.constants.constants import MetaApis
 from cg.exc import CgError
+from cg.meta.workflow.nf_analysis import NfAnalysisAPI
+from cg.models.cg_config import CGConfig
 
 OPTION_WORKDIR = click.option(
     "--work-dir",
@@ -43,7 +43,7 @@
 OPTION_PARAMS_FILE = click.option(
     "--params-file",
     type=click.Path(),
-    help="Nextflow pipeline-specific parameter file path",
+    help="Nextflow workflow-specific parameter file path",
 )
 
 OPTION_USE_NEXTFLOW = click.option(
@@ -52,7 +52,7 @@
     is_flag=True,
     default=False,
     show_default=True,
-    help="Execute pipeline using nextflow",
+    help="Execute workflow using nextflow",
 )
 
 OPTION_REVISION = click.option(

diff --git a/cg/meta/workflow/mip_dna.py b/cg/meta/workflow/mip_dna.py
@@ -26,7 +26,7 @@ def conda_env(self) -> str:
 
     @property
     def mip_workflow(self) -> str:
-        return self.config.mip_rd_dna.pipeline
+        return self.config.mip_rd_dna.workflow
 
     @property
     def script(self) -> str:

diff --git a/cg/meta/workflow/mip_rna.py b/cg/meta/workflow/mip_rna.py
@@ -24,7 +24,7 @@ def conda_env(self) -> str:
 
     @property
     def mip_workflow(self) -> str:
-        return self.config.mip_rd_rna.pipeline
+        return self.config.mip_rd_rna.workflow
 
     @property
     def script(self) -> str:

diff --git a/cg/meta/workflow/nf_analysis.py b/cg/meta/workflow/nf_analysis.py
@@ -14,7 +14,10 @@
 from cg.meta.workflow.analysis import AnalysisAPI
 from cg.meta.workflow.nf_handlers import NextflowHandler, NfTowerHandler
 from cg.models.cg_config import CGConfig
-from cg.models.deliverables.metric_deliverables import MetricsBase, MetricsDeliverablesCondition
+from cg.models.deliverables.metric_deliverables import (
+    MetricsBase,
+    MetricsDeliverablesCondition,
+)
 from cg.models.fastq import FastqFileMeta
 from cg.models.nf_analysis import FileDeliverable, PipelineDeliverables
 from cg.models.rnafusion.rnafusion import CommandArgs
@@ -68,7 +71,7 @@ def get_workflow_manager(self) -> str:
         return WorkflowManager.Tower.value
 
     def get_workflow_version(self, case_id: str) -> str:
-        """Get pipeline version from config."""
+        """Get workflow version from config."""
         return self.revision
 
     def get_nextflow_config_content(self) -> str | None:
@@ -237,7 +240,7 @@ def _run_analysis_with_nextflow(
         LOG.info("Workflow will be executed using Nextflow")
         parameters: list[str] = NextflowHandler.get_nextflow_run_parameters(
             case_id=case_id,
-            pipeline_path=self.nfcore_workflow_path,
+            workflow_path=self.nfcore_workflow_path,
             root_dir=self.root_dir,
             command_args=command_args.dict(),
         )
@@ -337,7 +340,7 @@ def get_multiqc_json_path(self, case_id: str) -> Path:
         )
 
     def get_workflow_metrics(self) -> dict:
-        """Get nf-core pipeline metrics constants."""
+        """Get nf-core workflow metrics constants."""
         return {}
 
     def get_multiqc_json_metrics(self, case_id: str) -> list[MetricsBase]:

diff --git a/cg/meta/workflow/nf_handlers.py b/cg/meta/workflow/nf_handlers.py
@@ -114,7 +114,7 @@ def get_variables_to_export() -> dict[str, str]:
 
     @classmethod
     def get_nextflow_run_parameters(
-        cls, case_id: str, pipeline_path: str, root_dir: str, command_args: dict
+        cls, case_id: str, workflow_path: str, root_dir: str, command_args: dict
     ) -> list[str]:
         """Returns a Nextflow run command given a dictionary with arguments."""
 
@@ -135,7 +135,7 @@ def get_nextflow_run_parameters(
             ),
             exclude_true=True,
         )
-        return nextflow_options + ["run", pipeline_path] + run_options
+        return nextflow_options + ["run", workflow_path] + run_options
 
     @staticmethod
     def get_head_job_sbatch_path(case_directory: Path) -> Path:

diff --git a/cg/meta/workflow/rnafusion.py b/cg/meta/workflow/rnafusion.py
@@ -37,13 +37,13 @@ def __init__(
     ):
         super().__init__(config=config, workflow=workflow)
         self.root_dir: str = config.rnafusion.root
-        self.nfcore_workflow_path: str = config.rnafusion.pipeline_path
+        self.nfcore_workflow_path: str = config.rnafusion.workflow_path
         self.references: str = config.rnafusion.references
         self.profile: str = config.rnafusion.profile
         self.conda_env: str = config.rnafusion.conda_env
         self.conda_binary: str = config.rnafusion.conda_binary
         self.tower_binary_path: str = config.tower_binary_path
-        self.tower_workflow: str = config.rnafusion.tower_pipeline
+        self.tower_workflow: str = config.rnafusion.tower_workflow
         self.account: str = config.rnafusion.slurm.account
         self.email: str = config.rnafusion.slurm.mail_user
         self.compute_env_base: str = config.rnafusion.compute_env

diff --git a/cg/meta/workflow/taxprofiler.py b/cg/meta/workflow/taxprofiler.py
@@ -12,7 +12,10 @@
 from cg.models.cg_config import CGConfig
 from cg.models.deliverables.metric_deliverables import MetricsBase, MultiqcDataJson
 from cg.models.fastq import FastqFileMeta
-from cg.models.taxprofiler.taxprofiler import TaxprofilerParameters, TaxprofilerSampleSheetEntry
+from cg.models.taxprofiler.taxprofiler import (
+    TaxprofilerParameters,
+    TaxprofilerSampleSheetEntry,
+)
 from cg.store.models import Case, Sample
 
 LOG = logging.getLogger(__name__)
@@ -29,15 +32,15 @@ def __init__(
     ):
         super().__init__(config=config, workflow=workflow)
         self.root_dir: str = config.taxprofiler.root
-        self.nfcore_workflow_path: str = config.taxprofiler.pipeline_path
+        self.nfcore_workflow_path: str = config.taxprofiler.workflow_path
         self.conda_env: str = config.taxprofiler.conda_env
         self.conda_binary: str = config.taxprofiler.conda_binary
         self.profile: str = config.taxprofiler.profile
         self.revision: str = config.taxprofiler.revision
         self.hostremoval_reference: Path = Path(config.taxprofiler.hostremoval_reference)
         self.databases: Path = Path(config.taxprofiler.databases)
         self.tower_binary_path: str = config.tower_binary_path
-        self.tower_workflow: str = config.taxprofiler.tower_pipeline
+        self.tower_workflow: str = config.taxprofiler.tower_workflow
         self.account: str = config.taxprofiler.slurm.account
         self.email: str = config.taxprofiler.slurm.mail_user
         self.nextflow_binary_path: str = config.taxprofiler.binary_path

diff --git a/cg/models/cg_config.py b/cg/models/cg_config.py
@@ -152,7 +152,7 @@ class MipConfig(BaseModel):
     conda_binary: str | None = None
     conda_env: str
     mip_config: str
-    pipeline: str
+    workflow: str
     root: str
     script: str
 
@@ -162,28 +162,28 @@ class RareDiseaseConfig(CommonAppConfig):
     conda_binary: str | None = None
     conda_env: str
     launch_directory: str
-    pipeline_path: str
+    workflow_path: str
     profile: str
     references: str
     revision: str
     root: str
     slurm: SlurmConfig
-    tower_pipeline: str
+    tower_workflow: str
 
 
 class RnafusionConfig(CommonAppConfig):
     root: str
     references: str
     binary_path: str
-    pipeline_path: str
+    workflow_path: str
     conda_env: str
     compute_env: str
     profile: str
     conda_binary: str | None = None
     launch_directory: str
     revision: str
     slurm: SlurmConfig
-    tower_pipeline: str
+    tower_workflow: str
 
 
 class TaxprofilerConfig(CommonAppConfig):
@@ -193,12 +193,12 @@ class TaxprofilerConfig(CommonAppConfig):
     compute_env: str
     databases: str
     hostremoval_reference: str
-    pipeline_path: str
+    workflow_path: str
     profile: str
     revision: str
     root: str
     slurm: SlurmConfig
-    tower_pipeline: str
+    tower_workflow: str
 
 
 class MicrosaltConfig(BaseModel):

diff --git a/cg/models/nf_analysis.py b/cg/models/nf_analysis.py
@@ -59,6 +59,6 @@ def set_path_as_string(cls, file_path: str | Path) -> str | None:
 
 
 class PipelineDeliverables(BaseModel):
-    """Specification for pipeline deliverables."""
+    """Specification for workflow deliverables."""
 
     files: list[FileDeliverable]
diff --git a/cg/models/report/metadata.py b/cg/models/report/metadata.py
@@ -16,7 +16,7 @@ class SampleMetadataModel(BaseModel):
 
     Attributes:
         million_read_pairs: number of million read pairs obtained; source: StatusDB/sample/reads (/2*10^6)
-        duplicates: fraction of mapped sequence that is marked as duplicate; source: pipeline workflow
+        duplicates: fraction of mapped sequence that is marked as duplicate; source: workflow
     """
 
     million_read_pairs: Annotated[str, BeforeValidator(get_float_as_string)] = NA_FIELD
@@ -28,10 +28,10 @@ class MipDNASampleMetadataModel(SampleMetadataModel):
 
     Attributes:
         bait_set: panel bed used for the analysis; source: LIMS
-        gender: gender estimated by the workflow; source: pipeline workflow
-        mapped_reads: percentage of reads aligned to the reference sequence; source: pipeline workflow
-        mean_target_coverage: mean coverage of a target region; source: pipeline workflow
-        pct_10x: percent of targeted bases that are covered to 10X coverage or more; source: pipeline workflow
+        gender: gender estimated by the workflow; source: workflow
+        mapped_reads: percentage of reads aligned to the reference sequence; source: workflow
+        mean_target_coverage: mean coverage of a target region; source: workflow
+        pct_10x: percent of targeted bases that are covered to 10X coverage or more; source: workflow
     """
 
     bait_set: Annotated[str, BeforeValidator(get_report_string)] = NA_FIELD
@@ -45,8 +45,8 @@ class BalsamicSampleMetadataModel(SampleMetadataModel):
     """Metrics and trending data model associated to a specific BALSAMIC sample.
 
     Attributes:
-            mean_insert_size: mean insert size of the distribution; source: pipeline workflow
-            fold_80: fold 80 base penalty; source: pipeline workflow
+            mean_insert_size: mean insert size of the distribution; source: workflow
+            fold_80: fold 80 base penalty; source: workflow
     """
 
     mean_insert_size: Annotated[str, BeforeValidator(get_float_as_string)] = NA_FIELD
@@ -58,10 +58,10 @@ class BalsamicTargetedSampleMetadataModel(BalsamicSampleMetadataModel):
 
     Attributes:
             bait_set: panel bed used for the analysis; source: LIMS
-            bait_set_version: panel bed version; source: pipeline workflow
-            median_target_coverage: median coverage of a target region in bases; source: pipeline workflow
-            pct_250x: percent of targeted bases that are covered to 250X coverage or more; source: pipeline workflow
-            pct_500x: percent of targeted bases that are covered to 500X coverage or more; source: pipeline workflow
+            bait_set_version: panel bed version; source: workflow
+            median_target_coverage: median coverage of a target region in bases; source: workflow
+            pct_250x: percent of targeted bases that are covered to 250X coverage or more; source: workflow
+            pct_500x: percent of targeted bases that are covered to 500X coverage or more; source: workflow
     """
 
     bait_set: Annotated[str, BeforeValidator(get_report_string)] = NA_FIELD
@@ -76,9 +76,9 @@ class BalsamicWGSSampleMetadataModel(BalsamicSampleMetadataModel):
     """Metrics and trending data model associated to a specific BALSAMIC sample.
 
     Attributes:
-            median_coverage: median coverage in bases of the genome territory; source: pipeline workflow
-            pct_15x: fraction of bases that attained at least 15X sequence coverage; source: pipeline workflow
-            pct_60x: fraction of bases that attained at least 15X sequence coverage; source: pipeline workflow
+            median_coverage: median coverage in bases of the genome territory; source: workflow
+            pct_15x: fraction of bases that attained at least 15X sequence coverage; source: workflow
+            pct_60x: fraction of bases that attained at least 15X sequence coverage; source: workflow
     """
 
     median_coverage: Annotated[str, BeforeValidator(get_float_as_string)] = NA_FIELD
@@ -91,21 +91,21 @@ class RnafusionSampleMetadataModel(SampleMetadataModel):
     """Metrics and trending data model associated to a specific Rnafusion sample.
 
     Attributes:
-        bias_5_3: bias is the ratio between read counts; source: pipeline workflow
-        gc_content: percentage of GC bases calculated on trimmed reads; source: pipeline workflow
+        bias_5_3: bias is the ratio between read counts; source: workflow
+        gc_content: percentage of GC bases calculated on trimmed reads; source: workflow
         input_amount: input amount in ng; source: LIMS
         insert_size: distance between paired-end sequencing reads in a DNA fragment
-        insert_size_peak: insert size length; source: pipeline workflow
-        mapped_reads: percentage of reads aligned to the reference sequence; source: pipeline workflow
-        mean_length_r1: average length of reads that pass QC filters; source: pipeline workflow
-        mrna_bases:  proportion of bases that originate from messenger RNA; source: pipeline workflow
-        pct_adapter: proportion of reads that contain adapter sequences; source: pipeline workflow
-        pct_surviving: percentage of reads that pass quality control filters; source: pipeline workflow
-        q20_rate: proportion of bases with a minimum Phred score of 20; source: pipeline workflow
-        q30_rate: proportion of bases with a minimum Phred score of 30; source: pipeline workflow
-        ribosomal_bases: proportion of bases that originate from ribosomal RNA; source: pipeline workflow
+        insert_size_peak: insert size length; source: workflow
+        mapped_reads: percentage of reads aligned to the reference sequence; source: workflow
+        mean_length_r1: average length of reads that pass QC filters; source: workflow
+        mrna_bases:  proportion of bases that originate from messenger RNA; source: workflow
+        pct_adapter: proportion of reads that contain adapter sequences; source: workflow
+        pct_surviving: percentage of reads that pass quality control filters; source: workflow
+        q20_rate: proportion of bases with a minimum Phred score of 20; source: workflow
+        q30_rate: proportion of bases with a minimum Phred score of 30; source: workflow
+        ribosomal_bases: proportion of bases that originate from ribosomal RNA; source: workflow
         rin: RNA integrity number; source: LIMS
-        uniquely_mapped_reads: percentage of mapped reads; source: pipeline workflow
+        uniquely_mapped_reads: percentage of mapped reads; source: workflow
     """
 
     bias_5_3: Annotated[str, BeforeValidator(get_float_as_string)] = NA_FIELD

diff --git a/cg/models/report/report.py b/cg/models/report/report.py
@@ -65,9 +65,9 @@ class DataAnalysisModel(BaseModel):
         data_delivery: data delivery requested by the customer; source: StatusDB/family/data_delivery
         workflow: actual workflow used for analysis; source: statusDB/analysis/pipeline
         workflow_version: workflow version; source: statusDB/analysis/pipeline_version
-        type: analysis type carried out; source: pipeline workflow
-        genome_build: build version of the genome reference; source: pipeline workflow
-        variant_callers: variant-calling filters; source: pipeline workflow
+        type: analysis type carried out; source: workflow
+        genome_build: build version of the genome reference; source: workflow
+        variant_callers: variant-calling filters; source: workflow
         panels: list of case specific panels; source: StatusDB/family/panels
         scout_files: list of file names uploaded to Scout
     """
@@ -105,7 +105,7 @@ class CaseModel(BaseModel):
         name: case name; source: StatusDB/family/name
         id: case ID; source: StatusDB/family/internal_id
         samples: list of samples associated to a case/family
-        data_analysis: pipeline attributes
+        data_analysis: workflow attributes
         applications: case associated unique applications
     """
 

diff --git a/cg/store/crud/read.py b/cg/store/crud/read.py
@@ -780,7 +780,7 @@ def verify_case_exists(self, case_internal_id: str) -> None:
         LOG.info(f"Case {case_internal_id} exists in Status DB")
 
     def get_running_cases_in_workflow(self, workflow: Workflow) -> list[Case]:
-        """Return all running cases in a pipeline."""
+        """Return all running cases in a workflow."""
         return apply_case_filter(
             cases=self._get_query(table=Case),
             filter_functions=[CaseFilter.FILTER_WITH_WORKFLOW, CaseFilter.FILTER_IS_RUNNING],
@@ -1524,7 +1524,7 @@ def get_analyses_for_case_started_at_before(
     def get_analyses_for_workflow_started_at_before(
         self, workflow: Workflow, started_at_before: datetime
     ) -> list[Analysis]:
-        """Return all analyses for a pipeline started before a certain date."""
+        """Return all analyses for a workflow started before a certain date."""
         filter_functions: list[AnalysisFilter] = [
             AnalysisFilter.FILTER_WITH_WORKFLOW,
             AnalysisFilter.FILTER_STARTED_AT_BEFORE,
@@ -1537,7 +1537,7 @@ def get_analyses_for_workflow_started_at_before(
         ).all()
 
     def get_analyses_started_at_before(self, started_at_before: datetime) -> list[Analysis]:
-        """Return all analyses for a pipeline started before a certain date."""
+        """Return all analyses for a workflow started before a certain date."""
         return apply_analysis_filter(
             filter_functions=[AnalysisFilter.FILTER_STARTED_AT_BEFORE],
             analyses=self._get_query(table=Analysis),