Skip to content

Commit

Permalink
Add Balsamic validated metrics to delivery report (#2688)
Browse files Browse the repository at this point in the history
### Added:
- Missing Balsamic metrics used for Balamic QC validation
  • Loading branch information
ivadym authored Nov 21, 2023
1 parent 820aa91 commit 2825552
Show file tree
Hide file tree
Showing 7 changed files with 45 additions and 6 deletions.
2 changes: 2 additions & 0 deletions cg/constants/report.py
Original file line number Diff line number Diff line change
Expand Up @@ -149,11 +149,13 @@
"median_target_coverage",
"pct_250x",
"pct_500x",
"gc_dropout",
]

REQUIRED_SAMPLE_METADATA_BALSAMIC_TO_WGS_FIELDS = _REQUIRED_SAMPLE_METADATA_BALSAMIC_FIELDS + [
"median_coverage",
"pct_60x",
"pct_reads_improper_pairs",
]

REQUIRED_SAMPLE_METADATA_BALSAMIC_TN_WGS_FIELDS = (
Expand Down
4 changes: 4 additions & 0 deletions cg/meta/report/balsamic.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,7 @@ def get_panel_metadata(
duplicates=sample_metrics.percent_duplication if sample_metrics else None,
mean_insert_size=sample_metrics.mean_insert_size if sample_metrics else None,
fold_80=sample_metrics.fold_80_base_penalty if sample_metrics else None,
gc_dropout=sample_metrics.gc_dropout if sample_metrics else None,
)

def get_wgs_metadata(
Expand All @@ -104,6 +105,9 @@ def get_wgs_metadata(
duplicates=self.get_wgs_percent_duplication(sample_metrics=sample_metrics),
mean_insert_size=sample_metrics.mean_insert_size if sample_metrics else None,
fold_80=sample_metrics.fold_80_base_penalty if sample_metrics else None,
pct_reads_improper_pairs=sample_metrics.pct_pf_reads_improper_pairs
if sample_metrics
else None,
)

@staticmethod
Expand Down
21 changes: 15 additions & 6 deletions cg/meta/report/templates/balsamic_report.html
Original file line number Diff line number Diff line change
Expand Up @@ -168,15 +168,17 @@ <h4>Kundinformation</h4>
<th>Läspar [M]</th>
<th>Mediantäckning [baser]</th>
{% if "helgenomsekvensering" in case.data_analysis.type %}
{% if "normal" in case.data_analysis.type %}
<th>Täckningsgrad 15x [%]</th>
<th>Täckningsgrad 60x [%]</th>
{% else %}
<th>Täckningsgrad 60x [%]</th>
{% endif %}
{% if "normal" in case.data_analysis.type %}
<th>Täckningsgrad 15x [%]</th>
<th>Täckningsgrad 60x [%]</th>
{% else %}
<th>Täckningsgrad 60x [%]</th>
{% endif %}
<th>Läsningar med felaktiga par [%]</th>
{% else %}
<th>Täckningsgrad 250x [%]</th>
<th>Täckningsgrad 500x [%]</th>
<th>GC Dropout [%]</th>
{% endif %}
<th>Duplikat [%]</th>
<th>Medelfragmentlängd [baspar]</th>
Expand All @@ -196,10 +198,12 @@ <h4>Kundinformation</h4>
{% else %}
<td>{{ sample.metadata.pct_60x }}</td>
{% endif %}
<td>{{ sample.metadata.pct_reads_improper_pairs }}</td>
{% else %}
<td>{{ sample.metadata.median_target_coverage }}</td>
<td>{{ sample.metadata.pct_250x }}</td>
<td>{{ sample.metadata.pct_500x }}</td>
<td>{{ sample.metadata.gc_dropout }}</td>
{% endif %}
<td>{{ sample.metadata.duplicates }}</td>
<td>{{ sample.metadata.mean_insert_size }}</td>
Expand All @@ -215,6 +219,11 @@ <h4>Kundinformation</h4>
<ins>Duplikat</ins>: Sekvenseringsläsningar som är i duplikat och därmed ej unika sekvenser. Hög mängd duplikat kan tyda på dålig komplexitet av sekvenserad bibliotek eller djup sekvensering.<br>
<ins>Medelfragmentlängd</ins>: Medelstorlek av provbiblioteken som laddats på sekvenseringsinstrument. <200bp kan tyda degraderade provmaterial (t.ex. FFPE), innan biblioteksberedning.<br>
<ins>Fold 80 base penalty</ins>: Jämnhet av täckningsgraden över alla gener i analyspanlen. Ett värde mellan 1.0-1.8 visar god jämnhet.<br>
{% if "helgenomsekvensering" in case.data_analysis.type %}
<ins>Läsningar med felaktiga par</ins>: Andelen (primära) läsningar som inte är korrekt parvis justerade.<br>
{% else %}
<ins>GC dropout</ins>: Ett mått på hur dåligt täckta områden, med >= 50% GC innehåll, är i jämförelse med medelvärdet. Om värdet är 5%, innebär det att 5% av alla läsningar som borde ha kartlagts till områden med GC<=50%, kartlades någon annanstans.<br>
{% endif %}
<br>
Värdena presenterade för <ins>Täckningsgrad</ins> och <ins>Mediantäckning</ins> är efter borttagning av duplikata läsningar.
</caption>
Expand Down
3 changes: 3 additions & 0 deletions cg/models/balsamic/metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ class BalsamicTargetedQCMetrics(BalsamicQCMetrics):
pct_target_bases_500x: Optional[float]
pct_target_bases_1000x: Optional[float]
pct_off_bait: Optional[float]
gc_dropout: Optional[float]

_pct_values = validator(
"percent_duplication",
Expand All @@ -63,11 +64,13 @@ class BalsamicWGSQCMetrics(BalsamicQCMetrics):
pct_30x: Optional[float]
pct_60x: Optional[float]
pct_100x: Optional[float]
pct_pf_reads_improper_pairs: Optional[float]

_pct_values = validator(
"pct_15x",
"pct_30x",
"pct_60x",
"pct_100x",
"pct_pf_reads_improper_pairs",
allow_reuse=True,
)(percent_value_validation)
2 changes: 2 additions & 0 deletions cg/models/report/metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@ class BalsamicTargetedSampleMetadataModel(BalsamicSampleMetadataModel):
median_target_coverage: Annotated[str, BeforeValidator(get_float_as_string)] = NA_FIELD
pct_250x: Annotated[str, BeforeValidator(get_float_as_string)] = NA_FIELD
pct_500x: Annotated[str, BeforeValidator(get_float_as_string)] = NA_FIELD
gc_dropout: Annotated[str, BeforeValidator(get_float_as_string)] = NA_FIELD


class BalsamicWGSSampleMetadataModel(BalsamicSampleMetadataModel):
Expand All @@ -85,6 +86,7 @@ class BalsamicWGSSampleMetadataModel(BalsamicSampleMetadataModel):
median_coverage: Annotated[str, BeforeValidator(get_float_as_string)] = NA_FIELD
pct_15x: Annotated[str, BeforeValidator(get_float_as_string)] = NA_FIELD
pct_60x: Annotated[str, BeforeValidator(get_float_as_string)] = NA_FIELD
pct_reads_improper_pairs: Annotated[str, BeforeValidator(get_float_as_string)] = NA_FIELD


class RnafusionSampleMetadataModel(SampleMetadataModel):
Expand Down
18 changes: 18 additions & 0 deletions tests/fixtures/apps/balsamic/case/metrics_deliverables.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -86,3 +86,21 @@
step: multiqc_picard_dups
value: 0.931044
condition: null
- header: null
id: ACC0000A0
input: ACC0000A0.dedup.hsmetric.txt
name: GC_DROPOUT
step: multiqc_picard_HsMetrics
value: 1.119042
condition:
norm: lt
threshold: 1.0
- header: null
id: ACC0000A1
input: ACC0000A1.dedup.hsmetric.txt
name: GC_DROPOUT
step: multiqc_picard_HsMetrics
value: 1.00692
condition:
norm: lt
threshold: 1.0
1 change: 1 addition & 0 deletions tests/meta/report/test_balsamic_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ def test_get_sample_metadata(
"median_target_coverage": "5323.0",
"pct_250x": "N/A",
"pct_500x": "N/A",
"gc_dropout": "1.01",
}

# WHEN retrieving the sample metadata
Expand Down

0 comments on commit 2825552

Please sign in to comment.