From d16b71a43b731fcf0c0e7e1c50dfcc80d997b7d7 Mon Sep 17 00:00:00 2001 From: Florian Finkernagel Date: Tue, 25 Jun 2024 11:26:05 +0200 Subject: [PATCH] pandas2, numpy2 compatibility --- setup.cfg | 2 +- src/dppd/__init__.py | 2 +- src/dppd/single_verbs.py | 29 +++++++++++++++-------------- tests/test_single_verbs.py | 9 +++++---- 4 files changed, 22 insertions(+), 20 deletions(-) diff --git a/setup.cfg b/setup.cfg index 2cc8d84..687946d 100644 --- a/setup.cfg +++ b/setup.cfg @@ -5,7 +5,7 @@ [metadata] name = dppd description = A pythonic dplyr clone -version=0.27 +version=0.28 author = Florian Finkernagel author_email = finkernagel@imt.uni-marburg.de license = mit diff --git a/src/dppd/__init__.py b/src/dppd/__init__.py index 0071f3c..8311cc4 100644 --- a/src/dppd/__init__.py +++ b/src/dppd/__init__.py @@ -4,6 +4,6 @@ from . import single_verbs # noqa:F401 from . import non_df_verbs # noqa:F401 -__version__ = "0.27" +__version__ = "0.28" __all_ = [dppd, register_verb, register_type_methods_as_verbs, __version__] diff --git a/src/dppd/single_verbs.py b/src/dppd/single_verbs.py index fb61bbb..a0bda84 100644 --- a/src/dppd/single_verbs.py +++ b/src/dppd/single_verbs.py @@ -56,7 +56,10 @@ def _print(obj): @register_verb(name="debug", types=None) def _debug(obj, k=5): d = obj.iloc[np.r_[0:k, -k:0]] - print(d) + try: + display(d) # noqa: F821 - Jupyter only, needs to import. + except NameError: + print(d) return obj @@ -79,14 +82,6 @@ def _display(obj): # pragma: no cover display(obj) # noqa: F821 - Jupyter only, needs to import. return obj -@register_verb(name="debug", types=[pd.DataFrame, pd.Series]) -def _debug(obj): # pragma: no cover - """Verb: display head and tail of a DataFrame or Series""" - display(ends()) # noqa: F821 - Jupyter only, needs to import. - return obj - - - @register_verb("ungroup", types=[DataFrameGroupBy]) def ungroup_DataFrameGroupBy(grp): @@ -362,9 +357,12 @@ def mutate_DataFrameGroupBy(grp, **kwargs): try: r = v[group_key] except KeyError: - raise KeyError( - f"Grouped mutate results did not contain data for {group_key}" - ) + try: + r = v[group_key,] + except KeyError: + raise KeyError( + f"Grouped mutate results did not contain data for {group_key}. Keys where {v.keys()}" + ) r = pd.Series(r, index=sub_index) parts.append(r) parts = pd.concat(parts) @@ -459,7 +457,10 @@ def filter_by(obj, filter_arg): for idx, sub_df in df.groupby(groups): # if not idx in filter_arg and not isinstance(tuple(idx)): # idx = (idx,) - keep = filter_arg[idx] + try: + keep = filter_arg[idx] + except KeyError: + keep = filter_arg[idx[0]] parts.append(sub_df[keep]) result = pd.concat(parts, axis=0) elif isinstance(filter_arg, str): @@ -1012,7 +1013,7 @@ def pca_dataframe(df, whiten=False, random_state=None, n_components=2): df_fit = pd.DataFrame(p.fit_transform(df)) cols = ["1st", "2nd"] if n_components > 2: - cols.append('3rd') + cols.append("3rd") for ii in range(3, n_components): cols.append(f"{ii+1}th") df_fit.columns = cols diff --git a/tests/test_single_verbs.py b/tests/test_single_verbs.py index 4e5a360..b4d5ac7 100644 --- a/tests/test_single_verbs.py +++ b/tests/test_single_verbs.py @@ -526,6 +526,7 @@ def test_groupby_sort_changes_order_but_not_result(): .ungroup() .pd ) + print(a, b) assert_frame_equal(a, b.loc[a.index]) # @@ -780,14 +781,14 @@ def test_groupby_select(): def test_groupby_within_chain(): - actual = dp(mtcars).groupby("cyl").mean().select("hp").pd - should = mtcars.groupby("cyl").mean()[["hp"]] + actual = dp(mtcars).groupby("cyl").select("hp").mean().pd + should = mtcars.groupby("cyl")[["hp"]].mean() assert_frame_equal(should, actual) def test_groupby_within_chain_select_on_group(): actual = dp(mtcars).groupby("cyl").select("hp").mean().pd - should = mtcars.groupby("cyl").mean()[["hp"]] + should = mtcars.groupby("cyl")[["hp"]].mean() assert_frame_equal(should, actual) @@ -911,7 +912,7 @@ def test_iter_tuples_in_group_by(): actual = {k: list(v) for (k, v) in dp(mtcars).groupby("cyl").itertuples()} should = {} for key, sub_df in mtcars.groupby("cyl"): - should[key, ] = list(sub_df.itertuples()) + should[key,] = list(sub_df.itertuples()) assert actual == should