From 6ec0d45baf48eeefa04c79632645c893f128b150 Mon Sep 17 00:00:00 2001 From: Yakov Date: Tue, 5 Nov 2024 15:30:56 +0000 Subject: [PATCH] fix: do not impute `isProteinCoding` (#902) * fix: fix col names for imputation * fix: fix v1 * fix: test --- src/gentropy/dataset/l2g_feature_matrix.py | 5 ++++- tests/gentropy/dataset/test_l2g_feature_matrix.py | 2 +- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/src/gentropy/dataset/l2g_feature_matrix.py b/src/gentropy/dataset/l2g_feature_matrix.py index 9caa4b58a..f59e1e725 100644 --- a/src/gentropy/dataset/l2g_feature_matrix.py +++ b/src/gentropy/dataset/l2g_feature_matrix.py @@ -143,7 +143,10 @@ def fill_na( Returns: L2GFeatureMatrix: L2G feature matrix dataset """ - cols_to_impute = ["proteinGeneCount500kb", "geneCount500kb", "isProteinCoding"] + cols_to_impute = [ + "proteinGeneCount500kb", + "geneCount500kb", + ] for col in cols_to_impute: if col not in self._df.columns: continue diff --git a/tests/gentropy/dataset/test_l2g_feature_matrix.py b/tests/gentropy/dataset/test_l2g_feature_matrix.py index f821daaac..4fe338254 100644 --- a/tests/gentropy/dataset/test_l2g_feature_matrix.py +++ b/tests/gentropy/dataset/test_l2g_feature_matrix.py @@ -228,7 +228,7 @@ def test_fill_na(spark: SparkSession) -> None: "geneId": "gene3", "proteinGeneCount500kb": 3.5, "geneCount500kb": 9.0, - "isProteinCoding": 1.0, + "isProteinCoding": 0.0, "anotherFeature": 0.0, }, ],