From 34e719b54b6acbd1fbaa0d7d8e78ab3aba3fadb4 Mon Sep 17 00:00:00 2001 From: gbrunin Date: Tue, 20 Aug 2024 16:19:42 +0200 Subject: [PATCH 1/4] Bug fix in utils. --- matminer/utils/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/matminer/utils/utils.py b/matminer/utils/utils.py index 7f91fd9a4..5853c992a 100644 --- a/matminer/utils/utils.py +++ b/matminer/utils/utils.py @@ -66,7 +66,7 @@ def get_elem_in_data(df, as_pure=False): else: for elem in Element: for compound in df.index.to_list(): - if elem.name in compound and elem.name not in elems_in_df: + if elem in Composition(compound) and elem.name not in elems_in_df: elems_in_df.append(elem.name) # Find the elements not in the data From 7b3d2284946b369ea52f7765b8e01c7840018909 Mon Sep 17 00:00:00 2001 From: gbrunin Date: Tue, 20 Aug 2024 17:07:35 +0200 Subject: [PATCH 2/4] type-all has been deprecated, listing all types. --- .pre-commit-config.yaml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index fe7bb7c7b..4b781cf48 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -45,7 +45,8 @@ repos: rev: v1.5.1 hooks: - id: mypy - additional_dependencies: [types-all] + additional_dependencies: + - types-paramiko # - repo: local # hooks: From af57a949f5c9d13a286bd46388d841bb491ba6b5 Mon Sep 17 00:00:00 2001 From: gbrunin Date: Tue, 20 Aug 2024 17:15:23 +0200 Subject: [PATCH 3/4] Actually listing types. --- .pre-commit-config.yaml | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 4b781cf48..cba6848a8 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -46,7 +46,10 @@ repos: hooks: - id: mypy additional_dependencies: - - types-paramiko + - types-setuptools + - types-paramiko + - types-requests + - types-ujson # - repo: local # hooks: From 6c4640680f128cf29f43a400edbbc4b32008e001 Mon Sep 17 00:00:00 2001 From: gbrunin Date: Wed, 21 Aug 2024 08:26:12 +0200 Subject: [PATCH 4/4] Bug fix in pseudo-inverse computation in the case of hydrogen. --- matminer/featurizers/composition/tests/test_composite.py | 4 ++-- matminer/utils/tests/test_data.py | 4 ++-- matminer/utils/utils.py | 6 ++++++ 3 files changed, 10 insertions(+), 4 deletions(-) diff --git a/matminer/featurizers/composition/tests/test_composite.py b/matminer/featurizers/composition/tests/test_composite.py index 13e0850ff..63de8c516 100644 --- a/matminer/featurizers/composition/tests/test_composite.py +++ b/matminer/featurizers/composition/tests/test_composite.py @@ -318,7 +318,7 @@ def test_elem_optical(self): self.df_nans, col_id="composition" ) self.assertEqual(df_elem.isna().sum().sum(), 0) - self.assertAlmostEqual(df_elem.drop(columns="composition").sum().sum(), 201.3255, 4) + self.assertAlmostEqual(df_elem.drop(columns="composition").sum().sum(), 204.4712, 4) def test_elem_transport(self): df_elem = ElementProperty.from_preset("mp_transport", impute_nan=False).featurize_dataframe( @@ -343,7 +343,7 @@ def test_elem_transport(self): df_elem = ElementProperty.from_preset("mp_transport", impute_nan=True).featurize_dataframe( self.df_nans, col_id="composition" ) - self.assertAlmostEqual(df_elem.drop(columns="composition").sum().sum(), 9798095.622017656, 4) + self.assertAlmostEqual(df_elem.drop(columns="composition").sum().sum(), 10029874.1567, 4) if __name__ == "__main__": diff --git a/matminer/utils/tests/test_data.py b/matminer/utils/tests/test_data.py index 2d174a0c6..8ea530354 100644 --- a/matminer/utils/tests/test_data.py +++ b/matminer/utils/tests/test_data.py @@ -283,7 +283,7 @@ def test_get_data(self): c_k = self.data_source_imputed.get_elemental_property(elem="C", property_name="k_760.0") self.assertAlmostEqual(c_k, 0.7462931865379264) og_r = self.data_source_imputed.get_elemental_property(elem="Og", property_name="R_400.0") - self.assertAlmostEqual(og_r, 0.4624005395190695) + self.assertAlmostEqual(og_r, 0.46962554794905487) class TestTransportData(TestCase): @@ -308,7 +308,7 @@ def test_get_data(self): cu_kappan = self.data_source_imputed.get_elemental_property(elem="Cu", property_name="kappa_n") self.assertAlmostEqual(cu_kappan, 1814544.75663, places=5) og_mn = self.data_source_imputed.get_elemental_property(elem="Og", property_name="m_n") - self.assertAlmostEqual(og_mn, 0.03237036761677134) + self.assertAlmostEqual(og_mn, 0.03293018092682478) if __name__ == "__main__": diff --git a/matminer/utils/utils.py b/matminer/utils/utils.py index 5853c992a..6d78b9a3d 100644 --- a/matminer/utils/utils.py +++ b/matminer/utils/utils.py @@ -137,5 +137,11 @@ def get_pseudo_inverse(df_init, cols=None): res_pi = np.vstack([res_pi, np.nan * np.ones([len(elems_not_in_df), len(df.T) - 1])]) df_pi = pd.DataFrame(res_pi, columns=cols, index=pd.Index(elems_in_df + elems_not_in_df)) + # Handle the case of hydrogen, deuterium, and tritium + # If all are present, there contributions are summed + # and given to hydrogen only. Others are removed. + if all(e in df_pi.index for e in ["H", "D", "T"]): + df_pi.loc["H"] = df_pi.loc["H"] + df_pi.loc["D"] + df_pi.loc["T"] + df_pi.drop(index=["T", "D"], inplace=True) return df_pi