Skip to content

Commit

Permalink
add discriminator to nexflow qc models
Browse files Browse the repository at this point in the history
  • Loading branch information
diitaz93 committed Jan 28, 2025
1 parent 8cba7f7 commit 0b657db
Show file tree
Hide file tree
Showing 8 changed files with 39 additions and 12 deletions.
9 changes: 7 additions & 2 deletions cg/meta/workflow/nf_analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -930,20 +930,25 @@ def get_gene_panel(self, case_id: str, dry_run: bool = False) -> list[str]:
dry_run=dry_run,
)

def parse_analysis(self, qc_metrics_raw: list[MetricsBase], **kwargs) -> NextflowAnalysis:
def parse_analysis(
self, qc_metrics_raw: list[MetricsBase], workflow: str, **kwargs
) -> NextflowAnalysis:
"""Parse Nextflow output analysis files and return an analysis model."""
sample_metrics: dict[str, dict] = {}
for metric in qc_metrics_raw:
try:
sample_metrics[metric.id].update({metric.name.lower(): metric.value})
except KeyError:
sample_metrics[metric.id] = {metric.name.lower(): metric.value}
sample_metrics[metric.id]["type"] = workflow
return NextflowAnalysis(sample_metrics=sample_metrics)

def get_latest_metadata(self, case_id: str) -> NextflowAnalysis:
"""Return analysis output of a Nextflow case."""
qc_metrics: list[MetricsBase] = self.get_multiqc_json_metrics(case_id)
return self.parse_analysis(qc_metrics_raw=qc_metrics)
case: Case = self.status_db.get_case_by_internal_id(case_id)
workflow: str = case.data_analysis
return self.parse_analysis(qc_metrics_raw=qc_metrics, workflow=workflow)

def clean_past_run_dirs(self, before_date: str, skip_confirmation: bool = False) -> None:
"""Clean past run directories"""
Expand Down
13 changes: 9 additions & 4 deletions cg/models/analysis.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,17 @@
from pydantic import BaseModel, ConfigDict
from typing import Annotated

from pydantic import BaseModel, ConfigDict, Field

from cg.models.raredisease.raredisease import RarediseaseQCMetrics
from cg.models.rnafusion.rnafusion import RnafusionQCMetrics
from cg.models.taxprofiler.taxprofiler import TaxprofilerQCMetrics
from cg.models.tomte.tomte import TomteQCMetrics

DiscriminatedMetric = Annotated[
RarediseaseQCMetrics | RnafusionQCMetrics | TaxprofilerQCMetrics | TomteQCMetrics,
Field(discriminator="type"),
]


class AnalysisModel(BaseModel):
"""Metadata analysis model"""
Expand All @@ -15,6 +22,4 @@ class AnalysisModel(BaseModel):
class NextflowAnalysis(AnalysisModel):
"""Nextflow's analysis results model."""

sample_metrics: dict[
str, RarediseaseQCMetrics | RnafusionQCMetrics | TaxprofilerQCMetrics | TomteQCMetrics
]
sample_metrics: dict[str, DiscriminatedMetric]
6 changes: 5 additions & 1 deletion cg/models/raredisease/raredisease.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
from enum import StrEnum
from typing import Literal

from cg.constants.constants import SexOptions
from pydantic import Field

from cg.constants.constants import SexOptions, Workflow
from cg.models.nf_analysis import NextflowSampleSheetEntry, WorkflowParameters
from cg.models.qc_metrics import QCMetrics

Expand All @@ -12,6 +15,7 @@ class RarediseaseQCMetrics(QCMetrics):
percent_duplicates: float
predicted_sex_sex_check: SexOptions
total_reads: int
type: Literal[Workflow.RAREDISEASE]


class RarediseaseSampleSheetEntry(NextflowSampleSheetEntry):
Expand Down
5 changes: 4 additions & 1 deletion cg/models/rnafusion/rnafusion.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
from cg.constants.constants import Strandedness
from typing import Literal

from cg.constants.constants import Strandedness, Workflow
from cg.models.nf_analysis import NextflowSampleSheetEntry, WorkflowParameters
from cg.models.qc_metrics import QCMetrics

Expand All @@ -19,6 +21,7 @@ class RnafusionQCMetrics(QCMetrics):
pct_duplication: float
read_pairs_examined: float
uniquely_mapped_percent: float
type: Literal[Workflow.RNAFUSION]


class RnafusionParameters(WorkflowParameters):
Expand Down
8 changes: 6 additions & 2 deletions cg/models/taxprofiler/taxprofiler.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,15 @@
from pathlib import Path
from typing import Literal

from pydantic import BaseModel, Field
from pydantic import Field

from cg.constants import Workflow
from cg.constants.sequencing import SequencingPlatform
from cg.models.nf_analysis import NextflowSampleSheetEntry, WorkflowParameters
from cg.models.qc_metrics import QCMetrics


class TaxprofilerQCMetrics(BaseModel):
class TaxprofilerQCMetrics(QCMetrics):
"""Taxprofiler QC metrics."""

after_filtering_gc_content: float
Expand All @@ -17,6 +20,7 @@ class TaxprofilerQCMetrics(BaseModel):
pct_duplication: float
raw_total_sequences: float
reads_mapped: float
type: Literal[Workflow.TAXPROFILER]


class TaxprofilerParameters(WorkflowParameters):
Expand Down
4 changes: 3 additions & 1 deletion cg/models/tomte/tomte.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
from enum import StrEnum
from pathlib import Path
from typing import Literal

from pydantic import field_validator

from cg.constants.constants import GenomeVersion, Strandedness
from cg.constants.constants import GenomeVersion, Strandedness, Workflow
from cg.constants.sample_sources import SourceType
from cg.models.nf_analysis import NextflowSampleSheetEntry, WorkflowParameters
from cg.models.qc_metrics import QCMetrics
Expand Down Expand Up @@ -87,3 +88,4 @@ class TomteQCMetrics(QCMetrics):
pct_ribosomal_bases: float
pct_surviving: float
uniquely_mapped_percent: float
type: Literal[Workflow.TOMTE]
1 change: 1 addition & 0 deletions tests/meta/workflow/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -281,6 +281,7 @@ def rnafusion_metrics() -> dict[str, float]:
"pct_duplication": 14.8643,
"read_pairs_examined": 72391566.0,
"uniquely_mapped_percent": 91.02,
"type": Workflow.RNAFUSION,
}


Expand Down
5 changes: 4 additions & 1 deletion tests/meta/workflow/test_rnafusion.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
"""Module for Rnafusion analysis API tests."""

from cg.constants import Workflow
from cg.meta.workflow.rnafusion import RnafusionAnalysisAPI
from cg.models.analysis import NextflowAnalysis
from cg.models.cg_config import CGConfig
Expand All @@ -21,7 +22,9 @@ def test_parse_analysis(
qc_metrics: list[MetricsBase] = analysis_api.get_multiqc_json_metrics(case_id=rnafusion_case_id)

# WHEN extracting the analysis model
analysis_model: NextflowAnalysis = analysis_api.parse_analysis(qc_metrics_raw=qc_metrics)
analysis_model: NextflowAnalysis = analysis_api.parse_analysis(
qc_metrics_raw=qc_metrics, workflow=Workflow.RNAFUSION
)

# THEN the analysis model and its content should have been correctly extracted
assert analysis_model.sample_metrics[sample_id].model_dump() == rnafusion_metrics
Expand Down

0 comments on commit 0b657db

Please sign in to comment.