From 6601c9cc32c83a31f31488523cfe315a5f906783 Mon Sep 17 00:00:00 2001 From: Ehud-Karavani Date: Wed, 25 Oct 2023 11:55:49 +0300 Subject: [PATCH 1/6] Allow pandas >2 dependency Signed-off-by: Ehud-Karavani --- causallib/estimation/matching.py | 2 +- causallib/simulation/CausalSimulator3.py | 39 +++++++++++++++-------- causallib/tests/test_causal_simulator3.py | 2 +- causallib/tests/test_overlap_weights.py | 3 +- causallib/tests/test_survival.py | 3 ++ requirements.txt | 2 +- 6 files changed, 34 insertions(+), 17 deletions(-) diff --git a/causallib/estimation/matching.py b/causallib/estimation/matching.py index fd1a1453..55366fe3 100644 --- a/causallib/estimation/matching.py +++ b/causallib/estimation/matching.py @@ -679,7 +679,7 @@ def _matches_to_weights_single_matching(self, s, t, match_df): name = {0: "control", 1: "treatment"} weights.name = "{s}_to_{t}".format(s=name[s], t=name[t]) s_to_t_matches = match_df.loc[t][self.treatments_ == s].matches - for source_idx, matches_list in s_to_t_matches.iteritems(): + for source_idx, matches_list in s_to_t_matches.items(): if matches_list: weights.loc[source_idx] += 1 for match in matches_list: diff --git a/causallib/simulation/CausalSimulator3.py b/causallib/simulation/CausalSimulator3.py index 9aeeb701..07673d50 100644 --- a/causallib/simulation/CausalSimulator3.py +++ b/causallib/simulation/CausalSimulator3.py @@ -751,13 +751,17 @@ def generate_outcome_col(self, X_parents, link_type, snr, prob_category, outcome elif outcome_type == SURVIVAL: if survival_distribution == "expon": rnd_state = np.random.randint(low=0, high=999999) - param = survival_baseline * np.exp(x_outcome) + param = survival_baseline * np.exp(x_outcome.astype(float)) x_outcome = pd.Series( stats.expon(loc=0.0, scale=(1.0 / param)).rvs(x_outcome.size, random_state=rnd_state), index=x_outcome.index) cf = {i: pd.Series( - stats.expon(loc=0.0, scale=(1 / (survival_baseline * np.exp(cf[i])))).rvs(x_outcome.size, - random_state=rnd_state), + stats.expon( + loc=0.0, + scale=(1 / (survival_baseline * np.exp(cf[i].astype(float))))).rvs( + x_outcome.size, + random_state=rnd_state + ), index=x_outcome.index) if has_treatment_parent else cf[i] for i in list(cf.keys())} # Supplying the random state assures that the resulting outcome and cfs is consistent while sampling rvs @@ -826,7 +830,7 @@ def generate_censor_col(self, X_parents, link_type, snr, prob_category, outcome_ var_name=var_name) if survival_distribution == "expon": # param = survival_baseline * (prob_category.iloc[0]/prob_category.loc[1]) * np.exp(x_signal) # Cox ph - param = survival_baseline * np.exp(x_signal) # Cox ph model + param = survival_baseline * np.exp(x_signal.astype(float)) # Cox ph model survival_distribution = stats.expon(loc=0.0, scale=(1.0 / param)) x_censor = pd.Series(survival_distribution.rvs(size=x_signal.size), index=x_signal.index) # scale values with censoring proportions - 0 is non censored, 1 is censored: @@ -941,7 +945,7 @@ def _treatment_logistic_dichotomous(x_continuous, prob_category, params=None): # compute propensities: t = x_continuous.quantile(prob_category.iloc[1], interpolation="higher") slope = params.get("slope", 1.0) if params is not None else 1.0 - cur_propensity = 1.0 / (1 + np.exp(slope * (x_continuous - np.repeat(t, x_continuous.size)))) + cur_propensity = 1.0 / (1 + np.exp(slope * (x_continuous - np.repeat(t, x_continuous.size)).astype(float))) # assign the propensity values: propensity.loc[:, columns_names[1]] = cur_propensity propensity.loc[:, columns_names[0]] = np.ones(cur_propensity.size) - cur_propensity @@ -968,11 +972,12 @@ def _treatment_odds_ratio(x_continuous, prob_category, snr): - **propensity** (*pd.DataFrame*): The marginal conditional probability of treatment given covariates. A DataFrame shaped (num_samples x num_of_possible_treatment_categories). """ + x_continuous = x_continuous.astype(float) index_names = x_continuous.index columns_names = prob_category.index propensity = pd.DataFrame(index=index_names, columns=columns_names) # start with filling up the odds ratio: - for cur_category, p in prob_category.iteritems(): + for cur_category, p in prob_category.items(): t = x_continuous.quantile(p, interpolation="higher") cur_propensity = (1.0 / (1 + np.exp((x_continuous - np.repeat(t, x_continuous.size))))) # type: pd.Series cur_propensity = cur_propensity.div(np.ones_like(cur_propensity) - cur_propensity) @@ -1012,8 +1017,12 @@ def _treatment_quantile_gauss_fit(x_continuous, prob_category, snr): columns_names = prob_category.index propensity = pd.DataFrame(index=index_names, columns=columns_names) # section the signal into bins based on the probabilities (quantiles) - bins = pd.qcut(x=x_continuous, q=np.cumsum(pd.Series(0, index=["null"]).append(prob_category)), - labels=columns_names) + x_continuous = x_continuous.astype(float) + bins = pd.qcut( + x=x_continuous, + q=np.cumsum(pd.concat([pd.Series(0, index=["null"]), prob_category])), + labels=columns_names + ) for cur_category in columns_names: cur_samples_mask = (bins == cur_category) cur_samples = x_continuous[cur_samples_mask] @@ -1103,8 +1112,10 @@ def _discretize_col(x_col, prob_category, method="empiric", retbins=False, bins= res = cutoffs.sum(axis="columns") elif method == "empiric": # discretize according to percentiles from the empirical data itself try: - cumulative_ps = pd.Series(0, index=["null"]).append(prob_category).cumsum() - res, bins = pd.qcut(x=x_col, q=cumulative_ps, + cumulative_ps = pd.concat( + [pd.Series(0, index=["null"]), prob_category] + ).cumsum() + res, bins = pd.qcut(x=x_col.astype(float), q=cumulative_ps, labels=prob_category.index, retbins=True) bins = pd.Series(data=bins, index=cumulative_ps.index) # TODO: maybe noise this a little? @@ -1541,7 +1552,7 @@ def generate_random_topology(n_covariates, p, n_treatments=1, n_outcomes=1, n_ce generated_vars = covariates + treatments + outcomes + censoring generated_vars = pd.Series(data=generated_vars, index=generated_vars) - total_vars = given_vars.append(generated_vars) + total_vars = pd.concat([given_vars, generated_vars]) topology = pd.DataFrame(data=0, index=total_vars, columns=total_vars, dtype=bool) # generate between the independent given set to generated set: @@ -1577,10 +1588,12 @@ def generate_random_topology(n_covariates, p, n_treatments=1, n_outcomes=1, n_ce generated_types[treatments] = TREATMENT generated_types[outcomes] = OUTCOME generated_types[censoring] = CENSOR - var_types = pd.Series(data=COVARIATE, index=given_vars).append(generated_types) + var_types = pd.concat( + [pd.Series(data=COVARIATE, index=given_vars), generated_types] + ) # create a hidden variables mask: - hidden_vars = given_vars.append(pd.Series(covariates)).sample(frac=p_hidden) + hidden_vars = pd.concat([given_vars, pd.Series(covariates)]).sample(frac=p_hidden) var_types[hidden_vars] = HIDDEN return topology, var_types diff --git a/causallib/tests/test_causal_simulator3.py b/causallib/tests/test_causal_simulator3.py index c7b5a3da..ccab9461 100644 --- a/causallib/tests/test_causal_simulator3.py +++ b/causallib/tests/test_causal_simulator3.py @@ -350,7 +350,7 @@ def test_linear_linking(self): outcome_types=self.no_X.outcome_types, snr=snr, effect_sizes=self.no_X.effect_sizes) X, prop, cf = sim.generate_data(num_samples=self.NUM_SAMPLES) - singular_values = np.linalg.svd(X.values, compute_uv=False) + singular_values = np.linalg.svd(X.astype(float).values, compute_uv=False) eps = 1e-10 rank = np.sum(singular_values > eps) self.assertEqual(rank, 2, diff --git a/causallib/tests/test_overlap_weights.py b/causallib/tests/test_overlap_weights.py index eebca0e3..0ba881f0 100644 --- a/causallib/tests/test_overlap_weights.py +++ b/causallib/tests/test_overlap_weights.py @@ -80,6 +80,7 @@ def test_ow_weights_reversed_to_propensity(self): propensity = pd.DataFrame(propensity) ow_weights = self.estimator.compute_weight_matrix(self.data_r_100["X"], self.data_r_100["a"], clip_min=None, clip_max=None) + propensity.columns = propensity.columns.astype(ow_weights.columns.dtype) # Avoid column dtype assert pd.testing.assert_series_equal(propensity.loc[:, 0], ow_weights.loc[:, 1], check_names=False) pd.testing.assert_series_equal(propensity.loc[:, 1], ow_weights.loc[:, 0], check_names=False) - pd.testing.assert_index_equal(propensity.columns, ow_weights.columns) \ No newline at end of file + pd.testing.assert_index_equal(propensity.columns, ow_weights.columns) diff --git a/causallib/tests/test_survival.py b/causallib/tests/test_survival.py index d55b7585..2fc4d5be 100644 --- a/causallib/tests/test_survival.py +++ b/causallib/tests/test_survival.py @@ -635,6 +635,9 @@ def test_marginal_kaplan_meier_curves(self): 1: lifelines_km_a1.predict(sorted(self.t.unique()))}) marginal_curves_lifelines.columns.name = 'a' marginal_curves_lifelines.index.name = 't' + marginal_curves_lifelines.columns = ( + marginal_curves_lifelines.columns.astype(marginal_curves_causallib.columns.dtype) + ) # Avoid column dtype assert pd.testing.assert_frame_equal(marginal_curves_causallib, marginal_curves_causallib_lifelines) pd.testing.assert_frame_equal(marginal_curves_causallib, marginal_curves_lifelines) diff --git a/requirements.txt b/requirements.txt index 75f72124..218def97 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,4 @@ -pandas>=0.25.2,<2 +pandas>=0.25.2,<3 scipy>=0.19,<2 statsmodels>=0.9,<1 networkx>=1.1,<3 From ccb8b7b18b3e33ecfeae401ffce40c8cdef44a65 Mon Sep 17 00:00:00 2001 From: Ehud-Karavani Date: Wed, 25 Oct 2023 12:02:27 +0300 Subject: [PATCH 2/6] Allow networkx >3 dependency Signed-off-by: Ehud-Karavani --- causallib/simulation/CausalSimulator3.py | 2 +- causallib/tests/test_causal_simulator3.py | 4 ++-- requirements.txt | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/causallib/simulation/CausalSimulator3.py b/causallib/simulation/CausalSimulator3.py index 07673d50..7757c40b 100644 --- a/causallib/simulation/CausalSimulator3.py +++ b/causallib/simulation/CausalSimulator3.py @@ -186,7 +186,7 @@ def __init__(self, topology, var_types, prob_categories, link_types, snr, treatm # Create a graph out of matrix topology: self.topology = topology - self.graph_topology = nx.from_numpy_matrix(topology.transpose(), create_using=nx.DiGraph()) # type:nx.DiGraph + self.graph_topology = nx.from_numpy_array(topology.transpose(), create_using=nx.DiGraph()) # type:nx.DiGraph self.graph_topology = nx.relabel_nodes(self.graph_topology, dict(list(zip(list(range(self.m)), self.var_names)))) diff --git a/causallib/tests/test_causal_simulator3.py b/causallib/tests/test_causal_simulator3.py index ccab9461..c02ec975 100644 --- a/causallib/tests/test_causal_simulator3.py +++ b/causallib/tests/test_causal_simulator3.py @@ -308,7 +308,7 @@ def test_random_topology_generation(self): np.testing.assert_array_equal(T.loc[X.columns, :].sum(axis="columns"), np.zeros(5)) # Test for DAGness: - from networkx import DiGraph, from_numpy_matrix, is_directed_acyclic_graph + from networkx import DiGraph, from_numpy_array, is_directed_acyclic_graph NUM_TESTS = 50 for test in range(NUM_TESTS): n_cov = np.random.randint(low=10, high=100) @@ -317,7 +317,7 @@ def test_random_topology_generation(self): n_cen = np.random.randint(low=0, high=n_tre_out) T, _ = CS3m.generate_random_topology(n_covariates=n_cov, p=p, n_treatments=n_tre_out, n_outcomes=n_tre_out, n_censoring=n_cen, given_vars=[], p_hidden=0) - G = from_numpy_matrix(T.values.transpose(), create_using=DiGraph()) + G = from_numpy_array(T.values.transpose(), create_using=DiGraph()) res = is_directed_acyclic_graph(G) self.assertTrue(res) diff --git a/requirements.txt b/requirements.txt index 218def97..6ca6aa9b 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,4 @@ -pandas>=0.25.2,<3 +pandas>=0.25.2,<4 scipy>=0.19,<2 statsmodels>=0.9,<1 networkx>=1.1,<3 From d40b615cde0e6f32cefdd110f08dda0a3edc4448 Mon Sep 17 00:00:00 2001 From: Ehud-Karavani Date: Wed, 25 Oct 2023 12:13:12 +0300 Subject: [PATCH 3/6] Github actions add tests for Python 3.10 and 3.11 Signed-off-by: Ehud-Karavani --- .github/workflows/build.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index cb0c64bc..95e8eb33 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -8,7 +8,7 @@ jobs: strategy: fail-fast: false # Don't cancel entire run if one python-version fails matrix: - python-version: ["3.7", "3.8", "3.9"] + python-version: ["3.7", "3.8", "3.9", "3.10", "3.11"] name: Build and test on Python ${{ matrix.python-version }} steps: - uses: actions/checkout@v3 From 598b8fb9753c358da46564ddf9107c0d05100901 Mon Sep 17 00:00:00 2001 From: Ehud-Karavani Date: Wed, 25 Oct 2023 12:26:25 +0300 Subject: [PATCH 4/6] Change `faiss` dependency from GPU to CPU GPU not yet supported for Python 3.11 https://github.com/facebookresearch/faiss/issues/2861#issuecomment-1558716885 Signed-off-by: Ehud-Karavani --- causallib/contrib/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/causallib/contrib/requirements.txt b/causallib/contrib/requirements.txt index 9a7015cc..a9ef6d7c 100644 --- a/causallib/contrib/requirements.txt +++ b/causallib/contrib/requirements.txt @@ -1,3 +1,3 @@ -f https://download.pytorch.org/whl/cpu/ # To support cpu torch installation torch>=1.2.0 -faiss-gpu~=1.7.0 \ No newline at end of file +faiss-cpu~=1.7.0 # Can also use gpu for some Python versions \ No newline at end of file From 72565dbd0c60c9cf9c74c477d00dcb033dfffff4 Mon Sep 17 00:00:00 2001 From: Ehud-Karavani Date: Wed, 25 Oct 2023 12:34:14 +0300 Subject: [PATCH 5/6] Bump version: v0.9.6 Signed-off-by: Ehud-Karavani --- causallib/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/causallib/__init__.py b/causallib/__init__.py index f8c6ac7f..50533e30 100644 --- a/causallib/__init__.py +++ b/causallib/__init__.py @@ -1 +1 @@ -__version__ = "0.9.5" +__version__ = "0.9.6" From c5a305374dc4664a05d256fb5c96082b36b372df Mon Sep 17 00:00:00 2001 From: Ehud-Karavani Date: Wed, 25 Oct 2023 12:42:44 +0300 Subject: [PATCH 6/6] Fix requirements bump twice for pandas none for nx Bumped pandas twice instead pandas and networkx Signed-off-by: Ehud-Karavani --- requirements.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/requirements.txt b/requirements.txt index 6ca6aa9b..16b9e887 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,7 +1,7 @@ -pandas>=0.25.2,<4 +pandas>=0.25.2,<3 scipy>=0.19,<2 statsmodels>=0.9,<1 -networkx>=1.1,<3 +networkx>=1.1,<4 numpy>=1.13,<2 scikit-learn>=0.20,<1.2 matplotlib>=2.2,<4