From e032ba9e9b7813f3b41c518e7b64ef6df03dfd65 Mon Sep 17 00:00:00 2001 From: Bruno Conde Kind Date: Mon, 19 Aug 2024 10:56:06 -0300 Subject: [PATCH 1/2] Bump python-polars to >=1.3.0 --- minimal_plugin/__init__.py | 107 +++++++++++++++++++------------------ minimal_plugin/utils.py | 90 ------------------------------- pyproject.toml | 2 +- requirements.txt | 2 +- run.py | 19 ++++--- 5 files changed, 66 insertions(+), 154 deletions(-) delete mode 100644 minimal_plugin/utils.py diff --git a/minimal_plugin/__init__.py b/minimal_plugin/__init__.py index d70e2ea..3ffe252 100644 --- a/minimal_plugin/__init__.py +++ b/minimal_plugin/__init__.py @@ -3,154 +3,157 @@ import polars as pl from pathlib import Path -from minimal_plugin.utils import register_plugin, parse_version -if parse_version(pl.__version__) < parse_version("0.20.16"): - from polars.utils.udfs import _get_shared_lib_location # type: ignore[missing-import] +from polars.plugins import register_plugin_function - lib: str | Path = _get_shared_lib_location(__file__) -else: - lib = Path(__file__).parent + +LIB = Path(__file__).parent if TYPE_CHECKING: from minimal_plugin.typing import IntoExpr def noop(expr: IntoExpr) -> pl.Expr: - return register_plugin( + return register_plugin_function( args=[expr], - lib=lib, - symbol="noop", + plugin_path=LIB, + function_name="noop", is_elementwise=True, ) def abs_i64(expr: IntoExpr) -> pl.Expr: - return register_plugin( + return register_plugin_function( args=[expr], - lib=lib, - symbol="abs_i64", + plugin_path=LIB, + function_name="abs_i64", is_elementwise=True, ) def abs_numeric(expr: IntoExpr) -> pl.Expr: - return register_plugin( + return register_plugin_function( args=[expr], - lib=lib, - symbol="abs_numeric", + plugin_path=LIB, + function_name="abs_numeric", is_elementwise=True, ) def sum_i64(expr: IntoExpr, other: IntoExpr) -> pl.Expr: - return register_plugin( + return register_plugin_function( args=[expr, other], - lib=lib, - symbol="sum_i64", + plugin_path=LIB, + function_name="sum_i64", is_elementwise=True, ) def cum_sum(expr: IntoExpr) -> pl.Expr: - return register_plugin( + return register_plugin_function( args=[expr], - lib=lib, - symbol="cum_sum", + plugin_path=LIB, + function_name="cum_sum", is_elementwise=False, ) def pig_latinnify(expr: IntoExpr) -> pl.Expr: - return register_plugin( + return register_plugin_function( args=[expr], - lib=lib, - symbol="pig_latinnify", + plugin_path=LIB, + function_name="pig_latinnify", is_elementwise=True, ) def abs_i64_fast(expr: IntoExpr) -> pl.Expr: - return register_plugin( + return register_plugin_function( args=[expr], - lib=lib, - symbol="abs_i64_fast", + plugin_path=LIB, + function_name="abs_i64_fast", is_elementwise=True, ) def add_suffix(expr: IntoExpr, *, suffix: str) -> pl.Expr: - return register_plugin( + return register_plugin_function( args=[expr], - lib=lib, - symbol="add_suffix", + plugin_path=LIB, + function_name="add_suffix", is_elementwise=True, kwargs={"suffix": suffix}, ) def snowball_stem(expr: IntoExpr) -> pl.Expr: - return register_plugin( + return register_plugin_function( args=[expr], - lib=lib, - symbol="snowball_stem", + plugin_path=LIB, + function_name="snowball_stem", is_elementwise=True, ) def weighted_mean(expr: IntoExpr, weights: IntoExpr) -> pl.Expr: - return register_plugin( + return register_plugin_function( args=[expr, weights], - lib=lib, - symbol="weighted_mean", + plugin_path=LIB, + function_name="weighted_mean", is_elementwise=True, ) def shift_struct(expr: IntoExpr) -> pl.Expr: - return register_plugin( + return register_plugin_function( args=[expr], - lib=lib, - symbol="shift_struct", + plugin_path=LIB, + function_name="shift_struct", is_elementwise=True, ) def reverse_geocode(lat: IntoExpr, long: IntoExpr) -> pl.Expr: - return register_plugin( - args=[lat, long], lib=lib, symbol="reverse_geocode", is_elementwise=True + return register_plugin_function( + args=[lat, long], + plugin_path=LIB, + function_name="reverse_geocode", + is_elementwise=True, ) def non_zero_indices(expr: IntoExpr) -> pl.Expr: - return register_plugin( - args=[expr], lib=lib, symbol="non_zero_indices", is_elementwise=True + return register_plugin_function( + args=[expr], + plugin_path=LIB, + function_name="non_zero_indices", + is_elementwise=True, ) def vertical_weighted_mean(values: IntoExpr, weights: IntoExpr) -> pl.Expr: - return register_plugin( + return register_plugin_function( args=[values, weights], - lib=lib, - symbol="vertical_weighted_mean", + plugin_path=LIB, + function_name="vertical_weighted_mean", is_elementwise=False, returns_scalar=True, ) def interpolate(expr: IntoExpr) -> pl.Expr: - return register_plugin( + return register_plugin_function( args=[expr], - lib=lib, - symbol="interpolate", + plugin_path=LIB, + function_name="interpolate", is_elementwise=False, ) def life_step(left: IntoExpr, mid: IntoExpr, right: IntoExpr) -> pl.Expr: - return register_plugin( + return register_plugin_function( args=[left, mid, right], - lib=lib, - symbol="life_step", + plugin_path=LIB, + function_name="life_step", is_elementwise=False, ) diff --git a/minimal_plugin/utils.py b/minimal_plugin/utils.py deleted file mode 100644 index 0998a8a..0000000 --- a/minimal_plugin/utils.py +++ /dev/null @@ -1,90 +0,0 @@ -from __future__ import annotations - -import re -from typing import TYPE_CHECKING, Sequence, Any - -import polars as pl - -if TYPE_CHECKING: - from my_plugin.typing import IntoExpr, PolarsDataType - from pathlib import Path - - -def parse_into_expr( - expr: IntoExpr, - *, - str_as_lit: bool = False, - list_as_lit: bool = True, - dtype: PolarsDataType | None = None, -) -> pl.Expr: - """ - Parse a single input into an expression. - - Parameters - ---------- - expr - The input to be parsed as an expression. - str_as_lit - Interpret string input as a string literal. If set to `False` (default), - strings are parsed as column names. - list_as_lit - Interpret list input as a lit literal, If set to `False`, - lists are parsed as `Series` literals. - dtype - If the input is expected to resolve to a literal with a known dtype, pass - this to the `lit` constructor. - - Returns - ------- - polars.Expr - """ - if isinstance(expr, pl.Expr): - pass - elif isinstance(expr, str) and not str_as_lit: - expr = pl.col(expr) - elif isinstance(expr, list) and not list_as_lit: - expr = pl.lit(pl.Series(expr), dtype=dtype) - else: - expr = pl.lit(expr, dtype=dtype) - - return expr - - -def register_plugin( - *, - symbol: str, - is_elementwise: bool, - kwargs: dict[str, Any] | None = None, - args: list[IntoExpr], - lib: str | Path, - returns_scalar: bool = False, -) -> pl.Expr: - if parse_version(pl.__version__) < parse_version("0.20.16"): - expr = parse_into_expr(args[0]) - assert isinstance(lib, str) - return expr.register_plugin( - lib=lib, - symbol=symbol, - args=args[1:], # type: ignore[arg-type] - kwargs=kwargs, - is_elementwise=is_elementwise, - returns_scalar=returns_scalar, - ) - from polars.plugins import register_plugin_function - - return register_plugin_function( - args=args, - plugin_path=lib, - function_name=symbol, - kwargs=kwargs, - is_elementwise=is_elementwise, - returns_scalar=returns_scalar, - ) - - -def parse_version(version: Sequence[str | int]) -> tuple[int, ...]: - # Simple version parser; split into a tuple of ints for comparison. - # vendored from Polars - if isinstance(version, str): - version = version.split(".") - return tuple(int(re.sub(r"\D", "", str(v))) for v in version) diff --git a/pyproject.toml b/pyproject.toml index 97b0b13..e31b496 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,5 +1,5 @@ [build-system] -requires = ["maturin>=1.0,<2.0", "polars>=0.26.0"] +requires = ["maturin>=1.0,<2.0", "polars>=1.3.0"] build-backend = "maturin" [project] diff --git a/requirements.txt b/requirements.txt index 88d0afb..dd6659f 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,2 +1,2 @@ maturin>=1.4.0 -polars>=0.20.6 +polars>=1.3.0 diff --git a/run.py b/run.py index 70e1762..bb3c4e0 100644 --- a/run.py +++ b/run.py @@ -1,10 +1,8 @@ -import polars as pl -import minimal_plugin as mp - import polars as pl import minimal_plugin as mp + df = pl.DataFrame( {"values": [[1, 3, 2], [5, 7], []], "weights": [[0.5, 0.3, 0.2], [0.1, 0.9], []]} ) @@ -30,10 +28,11 @@ ) ) -df = pl.DataFrame( - { - "a": [None, None, 3, None, None, 9, 11, None], - } -) -result = df.with_columns(interpolate=mp.interpolate("a")) -print(result) +#df = pl.DataFrame( +# { +# "a": [None, None, 3, None, None, 9, 11, None], +# } +#) +#result = df.with_columns(interpolate=mp.interpolate("a")) +#print(result) + From cba8d80b5630730d93d0985b5a46bf9a72bf760b Mon Sep 17 00:00:00 2001 From: Bruno Conde Kind Date: Mon, 19 Aug 2024 11:44:21 -0300 Subject: [PATCH 2/2] Removed comments preventing a panic --- run.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/run.py b/run.py index bb3c4e0..a235817 100644 --- a/run.py +++ b/run.py @@ -28,11 +28,11 @@ ) ) -#df = pl.DataFrame( -# { -# "a": [None, None, 3, None, None, 9, 11, None], -# } -#) -#result = df.with_columns(interpolate=mp.interpolate("a")) -#print(result) +df = pl.DataFrame( + { + "a": [None, None, 3, None, None, 9, 11, None], + } +) +result = df.with_columns(interpolate=mp.interpolate("a")) +print(result)