Skip to content

Commit

Permalink
Refactor pipeline to workflow config (#2926) (major)
Browse files Browse the repository at this point in the history
### Changed

- Refactor pipeline to workflow config
  • Loading branch information
henrikstranneheim authored Feb 12, 2024
1 parent 95e42cc commit e94dbd9
Show file tree
Hide file tree
Showing 30 changed files with 125 additions and 118 deletions.
10 changes: 5 additions & 5 deletions cg/cli/workflow/nf_analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,11 @@

import click

from cg.models.cg_config import CGConfig
from cg.meta.workflow.nf_analysis import NfAnalysisAPI
from cg.constants.constants import MetaApis
from cg.cli.workflow.commands import ARGUMENT_CASE_ID, OPTION_DRY
from cg.constants.constants import MetaApis
from cg.exc import CgError
from cg.meta.workflow.nf_analysis import NfAnalysisAPI
from cg.models.cg_config import CGConfig

OPTION_WORKDIR = click.option(
"--work-dir",
Expand Down Expand Up @@ -43,7 +43,7 @@
OPTION_PARAMS_FILE = click.option(
"--params-file",
type=click.Path(),
help="Nextflow pipeline-specific parameter file path",
help="Nextflow workflow-specific parameter file path",
)

OPTION_USE_NEXTFLOW = click.option(
Expand All @@ -52,7 +52,7 @@
is_flag=True,
default=False,
show_default=True,
help="Execute pipeline using nextflow",
help="Execute workflow using nextflow",
)

OPTION_REVISION = click.option(
Expand Down
2 changes: 1 addition & 1 deletion cg/meta/workflow/mip_dna.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ def conda_env(self) -> str:

@property
def mip_workflow(self) -> str:
return self.config.mip_rd_dna.pipeline
return self.config.mip_rd_dna.workflow

@property
def script(self) -> str:
Expand Down
2 changes: 1 addition & 1 deletion cg/meta/workflow/mip_rna.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ def conda_env(self) -> str:

@property
def mip_workflow(self) -> str:
return self.config.mip_rd_rna.pipeline
return self.config.mip_rd_rna.workflow

@property
def script(self) -> str:
Expand Down
11 changes: 7 additions & 4 deletions cg/meta/workflow/nf_analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,10 @@
from cg.meta.workflow.analysis import AnalysisAPI
from cg.meta.workflow.nf_handlers import NextflowHandler, NfTowerHandler
from cg.models.cg_config import CGConfig
from cg.models.deliverables.metric_deliverables import MetricsBase, MetricsDeliverablesCondition
from cg.models.deliverables.metric_deliverables import (
MetricsBase,
MetricsDeliverablesCondition,
)
from cg.models.fastq import FastqFileMeta
from cg.models.nf_analysis import FileDeliverable, PipelineDeliverables
from cg.models.rnafusion.rnafusion import CommandArgs
Expand Down Expand Up @@ -68,7 +71,7 @@ def get_workflow_manager(self) -> str:
return WorkflowManager.Tower.value

def get_workflow_version(self, case_id: str) -> str:
"""Get pipeline version from config."""
"""Get workflow version from config."""
return self.revision

def get_nextflow_config_content(self) -> str | None:
Expand Down Expand Up @@ -237,7 +240,7 @@ def _run_analysis_with_nextflow(
LOG.info("Workflow will be executed using Nextflow")
parameters: list[str] = NextflowHandler.get_nextflow_run_parameters(
case_id=case_id,
pipeline_path=self.nfcore_workflow_path,
workflow_path=self.nfcore_workflow_path,
root_dir=self.root_dir,
command_args=command_args.dict(),
)
Expand Down Expand Up @@ -337,7 +340,7 @@ def get_multiqc_json_path(self, case_id: str) -> Path:
)

def get_workflow_metrics(self) -> dict:
"""Get nf-core pipeline metrics constants."""
"""Get nf-core workflow metrics constants."""
return {}

def get_multiqc_json_metrics(self, case_id: str) -> list[MetricsBase]:
Expand Down
4 changes: 2 additions & 2 deletions cg/meta/workflow/nf_handlers.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,7 @@ def get_variables_to_export() -> dict[str, str]:

@classmethod
def get_nextflow_run_parameters(
cls, case_id: str, pipeline_path: str, root_dir: str, command_args: dict
cls, case_id: str, workflow_path: str, root_dir: str, command_args: dict
) -> list[str]:
"""Returns a Nextflow run command given a dictionary with arguments."""

Expand All @@ -135,7 +135,7 @@ def get_nextflow_run_parameters(
),
exclude_true=True,
)
return nextflow_options + ["run", pipeline_path] + run_options
return nextflow_options + ["run", workflow_path] + run_options

@staticmethod
def get_head_job_sbatch_path(case_directory: Path) -> Path:
Expand Down
4 changes: 2 additions & 2 deletions cg/meta/workflow/rnafusion.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,13 +37,13 @@ def __init__(
):
super().__init__(config=config, workflow=workflow)
self.root_dir: str = config.rnafusion.root
self.nfcore_workflow_path: str = config.rnafusion.pipeline_path
self.nfcore_workflow_path: str = config.rnafusion.workflow_path
self.references: str = config.rnafusion.references
self.profile: str = config.rnafusion.profile
self.conda_env: str = config.rnafusion.conda_env
self.conda_binary: str = config.rnafusion.conda_binary
self.tower_binary_path: str = config.tower_binary_path
self.tower_workflow: str = config.rnafusion.tower_pipeline
self.tower_workflow: str = config.rnafusion.tower_workflow
self.account: str = config.rnafusion.slurm.account
self.email: str = config.rnafusion.slurm.mail_user
self.compute_env_base: str = config.rnafusion.compute_env
Expand Down
9 changes: 6 additions & 3 deletions cg/meta/workflow/taxprofiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,10 @@
from cg.models.cg_config import CGConfig
from cg.models.deliverables.metric_deliverables import MetricsBase, MultiqcDataJson
from cg.models.fastq import FastqFileMeta
from cg.models.taxprofiler.taxprofiler import TaxprofilerParameters, TaxprofilerSampleSheetEntry
from cg.models.taxprofiler.taxprofiler import (
TaxprofilerParameters,
TaxprofilerSampleSheetEntry,
)
from cg.store.models import Case, Sample

LOG = logging.getLogger(__name__)
Expand All @@ -29,15 +32,15 @@ def __init__(
):
super().__init__(config=config, workflow=workflow)
self.root_dir: str = config.taxprofiler.root
self.nfcore_workflow_path: str = config.taxprofiler.pipeline_path
self.nfcore_workflow_path: str = config.taxprofiler.workflow_path
self.conda_env: str = config.taxprofiler.conda_env
self.conda_binary: str = config.taxprofiler.conda_binary
self.profile: str = config.taxprofiler.profile
self.revision: str = config.taxprofiler.revision
self.hostremoval_reference: Path = Path(config.taxprofiler.hostremoval_reference)
self.databases: Path = Path(config.taxprofiler.databases)
self.tower_binary_path: str = config.tower_binary_path
self.tower_workflow: str = config.taxprofiler.tower_pipeline
self.tower_workflow: str = config.taxprofiler.tower_workflow
self.account: str = config.taxprofiler.slurm.account
self.email: str = config.taxprofiler.slurm.mail_user
self.nextflow_binary_path: str = config.taxprofiler.binary_path
Expand Down
14 changes: 7 additions & 7 deletions cg/models/cg_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -152,7 +152,7 @@ class MipConfig(BaseModel):
conda_binary: str | None = None
conda_env: str
mip_config: str
pipeline: str
workflow: str
root: str
script: str

Expand All @@ -162,28 +162,28 @@ class RareDiseaseConfig(CommonAppConfig):
conda_binary: str | None = None
conda_env: str
launch_directory: str
pipeline_path: str
workflow_path: str
profile: str
references: str
revision: str
root: str
slurm: SlurmConfig
tower_pipeline: str
tower_workflow: str


class RnafusionConfig(CommonAppConfig):
root: str
references: str
binary_path: str
pipeline_path: str
workflow_path: str
conda_env: str
compute_env: str
profile: str
conda_binary: str | None = None
launch_directory: str
revision: str
slurm: SlurmConfig
tower_pipeline: str
tower_workflow: str


class TaxprofilerConfig(CommonAppConfig):
Expand All @@ -193,12 +193,12 @@ class TaxprofilerConfig(CommonAppConfig):
compute_env: str
databases: str
hostremoval_reference: str
pipeline_path: str
workflow_path: str
profile: str
revision: str
root: str
slurm: SlurmConfig
tower_pipeline: str
tower_workflow: str


class MicrosaltConfig(BaseModel):
Expand Down
2 changes: 1 addition & 1 deletion cg/models/nf_analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,6 @@ def set_path_as_string(cls, file_path: str | Path) -> str | None:


class PipelineDeliverables(BaseModel):
"""Specification for pipeline deliverables."""
"""Specification for workflow deliverables."""

files: list[FileDeliverable]
52 changes: 26 additions & 26 deletions cg/models/report/metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ class SampleMetadataModel(BaseModel):
Attributes:
million_read_pairs: number of million read pairs obtained; source: StatusDB/sample/reads (/2*10^6)
duplicates: fraction of mapped sequence that is marked as duplicate; source: pipeline workflow
duplicates: fraction of mapped sequence that is marked as duplicate; source: workflow
"""

million_read_pairs: Annotated[str, BeforeValidator(get_float_as_string)] = NA_FIELD
Expand All @@ -28,10 +28,10 @@ class MipDNASampleMetadataModel(SampleMetadataModel):
Attributes:
bait_set: panel bed used for the analysis; source: LIMS
gender: gender estimated by the workflow; source: pipeline workflow
mapped_reads: percentage of reads aligned to the reference sequence; source: pipeline workflow
mean_target_coverage: mean coverage of a target region; source: pipeline workflow
pct_10x: percent of targeted bases that are covered to 10X coverage or more; source: pipeline workflow
gender: gender estimated by the workflow; source: workflow
mapped_reads: percentage of reads aligned to the reference sequence; source: workflow
mean_target_coverage: mean coverage of a target region; source: workflow
pct_10x: percent of targeted bases that are covered to 10X coverage or more; source: workflow
"""

bait_set: Annotated[str, BeforeValidator(get_report_string)] = NA_FIELD
Expand All @@ -45,8 +45,8 @@ class BalsamicSampleMetadataModel(SampleMetadataModel):
"""Metrics and trending data model associated to a specific BALSAMIC sample.
Attributes:
mean_insert_size: mean insert size of the distribution; source: pipeline workflow
fold_80: fold 80 base penalty; source: pipeline workflow
mean_insert_size: mean insert size of the distribution; source: workflow
fold_80: fold 80 base penalty; source: workflow
"""

mean_insert_size: Annotated[str, BeforeValidator(get_float_as_string)] = NA_FIELD
Expand All @@ -58,10 +58,10 @@ class BalsamicTargetedSampleMetadataModel(BalsamicSampleMetadataModel):
Attributes:
bait_set: panel bed used for the analysis; source: LIMS
bait_set_version: panel bed version; source: pipeline workflow
median_target_coverage: median coverage of a target region in bases; source: pipeline workflow
pct_250x: percent of targeted bases that are covered to 250X coverage or more; source: pipeline workflow
pct_500x: percent of targeted bases that are covered to 500X coverage or more; source: pipeline workflow
bait_set_version: panel bed version; source: workflow
median_target_coverage: median coverage of a target region in bases; source: workflow
pct_250x: percent of targeted bases that are covered to 250X coverage or more; source: workflow
pct_500x: percent of targeted bases that are covered to 500X coverage or more; source: workflow
"""

bait_set: Annotated[str, BeforeValidator(get_report_string)] = NA_FIELD
Expand All @@ -76,9 +76,9 @@ class BalsamicWGSSampleMetadataModel(BalsamicSampleMetadataModel):
"""Metrics and trending data model associated to a specific BALSAMIC sample.
Attributes:
median_coverage: median coverage in bases of the genome territory; source: pipeline workflow
pct_15x: fraction of bases that attained at least 15X sequence coverage; source: pipeline workflow
pct_60x: fraction of bases that attained at least 15X sequence coverage; source: pipeline workflow
median_coverage: median coverage in bases of the genome territory; source: workflow
pct_15x: fraction of bases that attained at least 15X sequence coverage; source: workflow
pct_60x: fraction of bases that attained at least 15X sequence coverage; source: workflow
"""

median_coverage: Annotated[str, BeforeValidator(get_float_as_string)] = NA_FIELD
Expand All @@ -91,21 +91,21 @@ class RnafusionSampleMetadataModel(SampleMetadataModel):
"""Metrics and trending data model associated to a specific Rnafusion sample.
Attributes:
bias_5_3: bias is the ratio between read counts; source: pipeline workflow
gc_content: percentage of GC bases calculated on trimmed reads; source: pipeline workflow
bias_5_3: bias is the ratio between read counts; source: workflow
gc_content: percentage of GC bases calculated on trimmed reads; source: workflow
input_amount: input amount in ng; source: LIMS
insert_size: distance between paired-end sequencing reads in a DNA fragment
insert_size_peak: insert size length; source: pipeline workflow
mapped_reads: percentage of reads aligned to the reference sequence; source: pipeline workflow
mean_length_r1: average length of reads that pass QC filters; source: pipeline workflow
mrna_bases: proportion of bases that originate from messenger RNA; source: pipeline workflow
pct_adapter: proportion of reads that contain adapter sequences; source: pipeline workflow
pct_surviving: percentage of reads that pass quality control filters; source: pipeline workflow
q20_rate: proportion of bases with a minimum Phred score of 20; source: pipeline workflow
q30_rate: proportion of bases with a minimum Phred score of 30; source: pipeline workflow
ribosomal_bases: proportion of bases that originate from ribosomal RNA; source: pipeline workflow
insert_size_peak: insert size length; source: workflow
mapped_reads: percentage of reads aligned to the reference sequence; source: workflow
mean_length_r1: average length of reads that pass QC filters; source: workflow
mrna_bases: proportion of bases that originate from messenger RNA; source: workflow
pct_adapter: proportion of reads that contain adapter sequences; source: workflow
pct_surviving: percentage of reads that pass quality control filters; source: workflow
q20_rate: proportion of bases with a minimum Phred score of 20; source: workflow
q30_rate: proportion of bases with a minimum Phred score of 30; source: workflow
ribosomal_bases: proportion of bases that originate from ribosomal RNA; source: workflow
rin: RNA integrity number; source: LIMS
uniquely_mapped_reads: percentage of mapped reads; source: pipeline workflow
uniquely_mapped_reads: percentage of mapped reads; source: workflow
"""

bias_5_3: Annotated[str, BeforeValidator(get_float_as_string)] = NA_FIELD
Expand Down
8 changes: 4 additions & 4 deletions cg/models/report/report.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,9 +65,9 @@ class DataAnalysisModel(BaseModel):
data_delivery: data delivery requested by the customer; source: StatusDB/family/data_delivery
workflow: actual workflow used for analysis; source: statusDB/analysis/pipeline
workflow_version: workflow version; source: statusDB/analysis/pipeline_version
type: analysis type carried out; source: pipeline workflow
genome_build: build version of the genome reference; source: pipeline workflow
variant_callers: variant-calling filters; source: pipeline workflow
type: analysis type carried out; source: workflow
genome_build: build version of the genome reference; source: workflow
variant_callers: variant-calling filters; source: workflow
panels: list of case specific panels; source: StatusDB/family/panels
scout_files: list of file names uploaded to Scout
"""
Expand Down Expand Up @@ -105,7 +105,7 @@ class CaseModel(BaseModel):
name: case name; source: StatusDB/family/name
id: case ID; source: StatusDB/family/internal_id
samples: list of samples associated to a case/family
data_analysis: pipeline attributes
data_analysis: workflow attributes
applications: case associated unique applications
"""

Expand Down
6 changes: 3 additions & 3 deletions cg/store/crud/read.py
Original file line number Diff line number Diff line change
Expand Up @@ -780,7 +780,7 @@ def verify_case_exists(self, case_internal_id: str) -> None:
LOG.info(f"Case {case_internal_id} exists in Status DB")

def get_running_cases_in_workflow(self, workflow: Workflow) -> list[Case]:
"""Return all running cases in a pipeline."""
"""Return all running cases in a workflow."""
return apply_case_filter(
cases=self._get_query(table=Case),
filter_functions=[CaseFilter.FILTER_WITH_WORKFLOW, CaseFilter.FILTER_IS_RUNNING],
Expand Down Expand Up @@ -1524,7 +1524,7 @@ def get_analyses_for_case_started_at_before(
def get_analyses_for_workflow_started_at_before(
self, workflow: Workflow, started_at_before: datetime
) -> list[Analysis]:
"""Return all analyses for a pipeline started before a certain date."""
"""Return all analyses for a workflow started before a certain date."""
filter_functions: list[AnalysisFilter] = [
AnalysisFilter.FILTER_WITH_WORKFLOW,
AnalysisFilter.FILTER_STARTED_AT_BEFORE,
Expand All @@ -1537,7 +1537,7 @@ def get_analyses_for_workflow_started_at_before(
).all()

def get_analyses_started_at_before(self, started_at_before: datetime) -> list[Analysis]:
"""Return all analyses for a pipeline started before a certain date."""
"""Return all analyses for a workflow started before a certain date."""
return apply_analysis_filter(
filter_functions=[AnalysisFilter.FILTER_STARTED_AT_BEFORE],
analyses=self._get_query(table=Analysis),
Expand Down
Loading

0 comments on commit e94dbd9

Please sign in to comment.