Skip to content

Commit

Permalink
refactor: rename inSilicoPredictors to variantEffect in merged files
Browse files Browse the repository at this point in the history
  • Loading branch information
vivienho committed Feb 10, 2025
1 parent 597cf05 commit 89e1785
Show file tree
Hide file tree
Showing 3 changed files with 20 additions and 20 deletions.
2 changes: 1 addition & 1 deletion src/gentropy/assets/schemas/amino_acid_variants.json
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
},
{
"metadata": {},
"name": "inSilicoPredictors",
"name": "variantEffect",
"nullable": true,
"type": {
"containsNull": true,
Expand Down
14 changes: 7 additions & 7 deletions src/gentropy/dataset/variant_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -284,15 +284,15 @@ def get_distance_to_gene(
def annotate_with_amino_acid_consequences(
self: VariantIndex, annotation: AminoAcidVariants
) -> VariantIndex:
"""Enriching in silico predictors with amino-acid derived predicted consequences.
"""Enriching variant effect assessments with amino-acid derived predicted consequences.
Args:
annotation (AminoAcidVariants): amio-acid level variant consequences.
Returns:
VariantIndex: where amino-acid causing variants are enriched with extra annotation
"""
w = Window.partitionBy("variantId").orderBy(f.size("inSilicoPredictors").desc())
w = Window.partitionBy("variantId").orderBy(f.size("variantEffect").desc())

return VariantIndex(
_df=self.df
Expand All @@ -313,17 +313,17 @@ def annotate_with_amino_acid_consequences(
)
# Joining with amino-acid predictions:
.join(
annotation.df.withColumnRenamed("inSilicoPredictors", "annotations"),
annotation.df.withColumnRenamed("variantEffect", "annotations"),
on=["uniprotAccession", "aminoAcidChange"],
how="left",
)
# Merge predictors:
.withColumn(
"inSilicoPredictors",
"variantEffect",
f.when(
f.col("annotations").isNotNull(),
f.array_union("inSilicoPredictors", "annotations"),
).otherwise(f.col("inSilicoPredictors")),
f.array_union("variantEffect", "annotations"),
).otherwise(f.col("variantEffect")),
)
# Dropping unused columns:
.drop("uniprotAccession", "aminoAcidChange", "annotations")
Expand Down Expand Up @@ -460,7 +460,7 @@ def _rescaleColumnValue(

@classmethod
def _normalise_foldx(
cls: type[InSilicoPredictorNormaliser], score: Column
cls: type[VariantEffectNormaliser], score: Column
) -> Column:
"""Normalise FoldX ddG energies.
Expand Down
24 changes: 12 additions & 12 deletions src/gentropy/datasource/open_targets/foldex_integration.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,26 +8,26 @@

from gentropy.common.spark_helpers import enforce_schema
from gentropy.dataset.amino_acid_variants import AminoAcidVariants
from gentropy.dataset.variant_index import InSilicoPredictorNormaliser
from gentropy.dataset.variant_index import VariantEffectNormaliser


class OpenTargetsFoldX:
"""Class to parser FoldX dataset generated by the OTAR2081 project."""

INSILICO_SCHEMA = AminoAcidVariants.get_schema()[
"inSilicoPredictors"
VARIANT_EFFECT_SCHEMA = AminoAcidVariants.get_schema()[
"variantEffect"
].dataType.elementType

@staticmethod
@enforce_schema(INSILICO_SCHEMA)
@enforce_schema(VARIANT_EFFECT_SCHEMA)
def get_foldx_prediction(score_column: Column) -> Column:
"""Generate inSilicoPredictor object from ddG column.
"""Generate variantEffect object from ddG column.
Args:
score_column (Column): ddG column from the FoldX dataset.
Returns:
Column: struct with the right shape of the in silico predictors.
Column: struct with the right shape of the variantEffect field.
"""
return f.struct(
f.lit("FoldX").alias("method"),
Expand Down Expand Up @@ -58,21 +58,21 @@ def ingest_foldx_data(
f.col("wild_type"), f.col("position"), f.col("mutated_type")
).alias("aminoAcidChange"),
cls.get_foldx_prediction(f.col("foldx_ddg")).alias(
"inSilicoPredictor"
"foldx_prediction"
),
)
# Collapse all predictors for a single array object to avoid variant explosions:
.groupBy("uniprotAccession", "aminoAcidChange")
.agg(
f.collect_set(f.col("inSilicoPredictor")).alias(
"inSilicoPredictors"
f.collect_set(f.col("fold_prediction")).alias(
"variantEffect"
)
)
# Normalise FoldX free energy changes:
.withColumn(
"inSilicoPredictors",
InSilicoPredictorNormaliser.normalise_in_silico_predictors(
f.col("inSilicoPredictors")
"variantEffect",
VariantEffectNormaliser.normalise_variant_effect(
f.col("variantEffect")
),
)
),
Expand Down

0 comments on commit 89e1785

Please sign in to comment.