diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index fe7bb7c7..cba6848a 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -45,7 +45,11 @@ repos: rev: v1.5.1 hooks: - id: mypy - additional_dependencies: [types-all] + additional_dependencies: + - types-setuptools + - types-paramiko + - types-requests + - types-ujson # - repo: local # hooks: diff --git a/matminer/featurizers/composition/tests/test_composite.py b/matminer/featurizers/composition/tests/test_composite.py index 13e0850f..63de8c51 100644 --- a/matminer/featurizers/composition/tests/test_composite.py +++ b/matminer/featurizers/composition/tests/test_composite.py @@ -318,7 +318,7 @@ def test_elem_optical(self): self.df_nans, col_id="composition" ) self.assertEqual(df_elem.isna().sum().sum(), 0) - self.assertAlmostEqual(df_elem.drop(columns="composition").sum().sum(), 201.3255, 4) + self.assertAlmostEqual(df_elem.drop(columns="composition").sum().sum(), 204.4712, 4) def test_elem_transport(self): df_elem = ElementProperty.from_preset("mp_transport", impute_nan=False).featurize_dataframe( @@ -343,7 +343,7 @@ def test_elem_transport(self): df_elem = ElementProperty.from_preset("mp_transport", impute_nan=True).featurize_dataframe( self.df_nans, col_id="composition" ) - self.assertAlmostEqual(df_elem.drop(columns="composition").sum().sum(), 9798095.622017656, 4) + self.assertAlmostEqual(df_elem.drop(columns="composition").sum().sum(), 10029874.1567, 4) if __name__ == "__main__": diff --git a/matminer/utils/tests/test_data.py b/matminer/utils/tests/test_data.py index 2d174a0c..8ea53035 100644 --- a/matminer/utils/tests/test_data.py +++ b/matminer/utils/tests/test_data.py @@ -283,7 +283,7 @@ def test_get_data(self): c_k = self.data_source_imputed.get_elemental_property(elem="C", property_name="k_760.0") self.assertAlmostEqual(c_k, 0.7462931865379264) og_r = self.data_source_imputed.get_elemental_property(elem="Og", property_name="R_400.0") - self.assertAlmostEqual(og_r, 0.4624005395190695) + self.assertAlmostEqual(og_r, 0.46962554794905487) class TestTransportData(TestCase): @@ -308,7 +308,7 @@ def test_get_data(self): cu_kappan = self.data_source_imputed.get_elemental_property(elem="Cu", property_name="kappa_n") self.assertAlmostEqual(cu_kappan, 1814544.75663, places=5) og_mn = self.data_source_imputed.get_elemental_property(elem="Og", property_name="m_n") - self.assertAlmostEqual(og_mn, 0.03237036761677134) + self.assertAlmostEqual(og_mn, 0.03293018092682478) if __name__ == "__main__": diff --git a/matminer/utils/utils.py b/matminer/utils/utils.py index 7f91fd9a..6d78b9a3 100644 --- a/matminer/utils/utils.py +++ b/matminer/utils/utils.py @@ -66,7 +66,7 @@ def get_elem_in_data(df, as_pure=False): else: for elem in Element: for compound in df.index.to_list(): - if elem.name in compound and elem.name not in elems_in_df: + if elem in Composition(compound) and elem.name not in elems_in_df: elems_in_df.append(elem.name) # Find the elements not in the data @@ -137,5 +137,11 @@ def get_pseudo_inverse(df_init, cols=None): res_pi = np.vstack([res_pi, np.nan * np.ones([len(elems_not_in_df), len(df.T) - 1])]) df_pi = pd.DataFrame(res_pi, columns=cols, index=pd.Index(elems_in_df + elems_not_in_df)) + # Handle the case of hydrogen, deuterium, and tritium + # If all are present, there contributions are summed + # and given to hydrogen only. Others are removed. + if all(e in df_pi.index for e in ["H", "D", "T"]): + df_pi.loc["H"] = df_pi.loc["H"] + df_pi.loc["D"] + df_pi.loc["T"] + df_pi.drop(index=["T", "D"], inplace=True) return df_pi