Skip to content

Commit

Permalink
Update pydantic qcmetrics (#3875)
Browse files Browse the repository at this point in the history
### Update Pydantic model for qc metrics and Balsamic
  • Loading branch information
henrikstranneheim authored Oct 24, 2024
1 parent a8a927b commit 9e134c8
Show file tree
Hide file tree
Showing 7 changed files with 49 additions and 52 deletions.
5 changes: 2 additions & 3 deletions cg/models/analysis.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from pydantic.v1 import BaseModel
from pydantic import BaseModel, ConfigDict

from cg.models.raredisease.raredisease import RarediseaseQCMetrics
from cg.models.rnafusion.rnafusion import RnafusionQCMetrics
Expand All @@ -9,8 +9,7 @@
class AnalysisModel(BaseModel):
"""Metadata analysis model"""

class Config:
arbitrary_types_allowed = True
model_config = ConfigDict(arbitrary_types_allowed=True)


class NextflowAnalysis(AnalysisModel):
Expand Down
36 changes: 19 additions & 17 deletions cg/models/balsamic/config.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from datetime import datetime
from pathlib import Path

from pydantic.v1 import BaseModel, validator
from pydantic import BaseModel, field_validator, ValidationInfo

from cg.constants.constants import SampleType

Expand Down Expand Up @@ -47,16 +47,17 @@ class BalsamicConfigReference(BaseModel):
"""

reference_genome: Path
reference_genome_version: str | None
reference_genome_version: str | None = None

@validator("reference_genome_version", always=True)
def extract_genome_version_from_path(cls, value: str | None, values: dict) -> str:
@field_validator("reference_genome_version")
@classmethod
def extract_genome_version_from_path(cls, _, info: ValidationInfo) -> str:
"""
Return the genome version from the reference path:
/home/proj/stage/cancer/balsamic_cache/X.X.X/hg19/genome/human_g1k_v37.fasta
"""

return str(values["reference_genome"]).split("/")[-3]
return str(info.data.get("reference_genome")).split("/")[-3]


class BalsamicConfigPanel(BaseModel):
Expand All @@ -70,23 +71,24 @@ class BalsamicConfigPanel(BaseModel):
"""

capture_kit: str
capture_kit_version: str | None
capture_kit_version: str | None = None
chrom: list[str]
pon_cnn: str | None = None

@validator("capture_kit", pre=True)
@field_validator("capture_kit", mode="before")
@classmethod
def get_filename_from_path(cls, path: str) -> str:
"""Return the base name of the provided file path."""
return Path(path).name

@validator("capture_kit_version", always=True)
def get_panel_version_from_filename(
cls, capture_kit_version: str | None, values: dict[str, str | None]
) -> str:
@field_validator("capture_kit_version")
@classmethod
def get_panel_version_from_filename(cls, _, info: ValidationInfo) -> str:
"""Return the panel bed version from its filename (e.g. gicfdna_3.1_hg19_design.bed)."""
return values["capture_kit"].split("_")[-3]
return info.data.get("capture_kit").split("_")[-3]

@validator("pon_cnn", pre=True)
@field_validator("pon_cnn", mode="before")
@classmethod
def get_pon_cnn_name_version_from_filename(cls, pon_cnn: str | None) -> str:
"""Return the CNVkit PON name and version from its filename (gmsmyeloid_5.3_hg19_design_CNVkit_PON_reference_v1.cnn)."""
pon_cnn_filename_split: list[str] = Path(pon_cnn).stem.split("_")
Expand All @@ -110,12 +112,12 @@ class BalsamicConfigQC(BaseModel):
"""

picard_rmdup: bool
adapter: str | None
adapter: str | None = None
quality_trim: bool
adapter_trim: bool
umi_trim: bool
min_seq_length: str | None
umi_trim_length: str | None
min_seq_length: str | None = None
umi_trim_length: str | None = None


class BalsamicVarCaller(BaseModel):
Expand Down Expand Up @@ -149,7 +151,7 @@ class BalsamicConfigJSON(BaseModel):
analysis: BalsamicConfigAnalysis
samples: list[BalsamicConfigSample]
reference: BalsamicConfigReference
panel: BalsamicConfigPanel | None
panel: BalsamicConfigPanel | None = None
QC: BalsamicConfigQC
vcf: dict[str, BalsamicVarCaller]
bioinfo_tools_version: dict[str, list[str]]
52 changes: 24 additions & 28 deletions cg/models/balsamic/metrics.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from pydantic.v1 import validator
from pydantic import field_validator

from cg.models.deliverables.metric_deliverables import MetricCondition, MetricsBase
from cg.models.qc_metrics import QCMetrics
Expand All @@ -17,60 +17,56 @@ class BalsamicMetricsBase(MetricsBase):
condition: balsamic metric validation condition
"""

condition: MetricCondition | None
condition: MetricCondition | None = None


class BalsamicQCMetrics(QCMetrics):
"""BALSAMIC common QC metrics"""

fold_80_base_penalty: float | None
mean_insert_size: float | None
percent_duplication: float | None
fold_80_base_penalty: float | None = None
mean_insert_size: float | None = None
percent_duplication: float | None = None

_percent_duplication = validator("percent_duplication", allow_reuse=True)(
percent_value_validation
)
_percent_duplication: float = field_validator("percent_duplication")(percent_value_validation)


class BalsamicTargetedQCMetrics(BalsamicQCMetrics):
"""BALSAMIC targeted QC metrics"""

mean_target_coverage: float | None
median_target_coverage: float | None
pct_target_bases_50x: float | None
pct_target_bases_100x: float | None
pct_target_bases_250x: float | None
pct_target_bases_500x: float | None
pct_target_bases_1000x: float | None
pct_off_bait: float | None
gc_dropout: float | None

_pct_values = validator(
mean_target_coverage: float | None = None
median_target_coverage: float | None = None
pct_target_bases_50x: float | None = None
pct_target_bases_100x: float | None = None
pct_target_bases_250x: float | None = None
pct_target_bases_500x: float | None = None
pct_target_bases_1000x: float | None = None
pct_off_bait: float | None = None
gc_dropout: float | None = None

_pct_values: float = field_validator(
"pct_target_bases_50x",
"pct_target_bases_100x",
"pct_target_bases_250x",
"pct_target_bases_500x",
"pct_target_bases_1000x",
"pct_off_bait",
allow_reuse=True,
)(percent_value_validation)


class BalsamicWGSQCMetrics(BalsamicQCMetrics):
"""BALSAMIC WGS QC metrics"""

median_coverage: float | None
pct_15x: float | None
pct_30x: float | None
pct_60x: float | None
pct_100x: float | None
pct_pf_reads_improper_pairs: float | None
median_coverage: float | None = None
pct_15x: float | None = None
pct_30x: float | None = None
pct_60x: float | None = None
pct_100x: float | None = None
pct_pf_reads_improper_pairs: float | None = None

_pct_values = validator(
_pct_values: float = field_validator(
"pct_15x",
"pct_30x",
"pct_60x",
"pct_100x",
"pct_pf_reads_improper_pairs",
allow_reuse=True,
)(percent_value_validation)
3 changes: 1 addition & 2 deletions cg/models/deliverables/metric_deliverables.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
import operator
from typing import Annotated, Any, Callable

from pydantic import BaseModel, Field, field_validator
from pydantic_core.core_schema import ValidationInfo
from pydantic import BaseModel, Field, ValidationInfo, field_validator

from cg.constants import PRECISION
from cg.exc import CgError, MetricsQCError
Expand Down
1 change: 1 addition & 0 deletions cg/models/delivery_report/metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -151,6 +151,7 @@ class WTSSampleMetadataModel(SequencingSampleMetadataModel):
uniquely_mapped_reads: Annotated[str, BeforeValidator(get_number_as_string)] = NA_FIELD

@field_validator("rin")
@classmethod
def ensure_rin_thresholds(cls, rin: str) -> str:
if rin != NA_FIELD:
rin_number = float(rin)
Expand Down
2 changes: 1 addition & 1 deletion cg/models/qc_metrics.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from pydantic.v1 import BaseModel
from pydantic import BaseModel


class QCMetrics(BaseModel):
Expand Down
2 changes: 1 addition & 1 deletion tests/meta/workflow/test_rnafusion.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ def test_parse_analysis(
analysis_model: NextflowAnalysis = analysis_api.parse_analysis(qc_metrics_raw=qc_metrics)

# THEN the analysis model and its content should have been correctly extracted
assert analysis_model.sample_metrics[sample_id] == rnafusion_metrics
assert analysis_model.sample_metrics[sample_id].model_dump() == rnafusion_metrics


def test_get_latest_metadata(
Expand Down

0 comments on commit 9e134c8

Please sign in to comment.