From ed7428220a000f56a57a565d0174aa4856479e0d Mon Sep 17 00:00:00 2001 From: MarcoGorelli <33491632+MarcoGorelli@users.noreply.github.com> Date: Fri, 26 Jan 2024 15:40:26 +0000 Subject: [PATCH] update for new versions --- Cargo.lock | 64 ++++++++++++++++---------------------- Cargo.toml | 6 ++-- minimal_plugin/__init__.py | 2 ++ minimal_plugin/utils.py | 49 +++++++++++++++++++++++++++++ pyproject.toml | 2 +- src/expressions.rs | 20 ++++++------ 6 files changed, 92 insertions(+), 51 deletions(-) create mode 100644 minimal_plugin/utils.py diff --git a/Cargo.lock b/Cargo.lock index c5a45a2..7403012 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -350,7 +350,6 @@ dependencies = [ "polars", "pyo3", "pyo3-polars", - "rust-stemmers", "serde", ] @@ -444,9 +443,9 @@ checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e" [[package]] name = "polars" -version = "0.36.2" +version = "0.37.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "938048fcda6a8e2ace6eb168bee1b415a92423ce51e418b853bf08fc40349b6b" +checksum = "e43795c49010cb851d45227caa17769e83760e21d260ba6285c563b754e1652f" dependencies = [ "getrandom", "polars-core", @@ -455,9 +454,9 @@ dependencies = [ [[package]] name = "polars-arrow" -version = "0.36.2" +version = "0.37.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ce68a02f698ff7787c261aea1b4c040a8fe183a8fb200e2436d7f35d95a1b86f" +checksum = "faacd21a2548fa6d50c72d6b8d4649a8e029a0f3c6c5545b7f436f0610e49b0f" dependencies = [ "ahash", "atoi_simd", @@ -484,22 +483,23 @@ dependencies = [ [[package]] name = "polars-compute" -version = "0.36.2" +version = "0.37.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b14fbc5f141b29b656a4cec4802632e5bff10bf801c6809c6bbfbd4078a044dd" +checksum = "32d9dc87f8003ae0edeef5ad9ac92b2a345480bbe17adad64496113ae84706dd" dependencies = [ "bytemuck", "num-traits", "polars-arrow", + "polars-error", "polars-utils", "version_check", ] [[package]] name = "polars-core" -version = "0.36.2" +version = "0.37.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d0f5efe734b6cbe5f97ea769be8360df5324fade396f1f3f5ad7fe9360ca4a23" +checksum = "befd4d280a82219a01035c4f901319ceba65998c594d0c64f9a439cdee1d7777" dependencies = [ "ahash", "bitflags 2.4.1", @@ -525,9 +525,9 @@ dependencies = [ [[package]] name = "polars-error" -version = "0.36.2" +version = "0.37.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6396de788f99ebfc9968e7b6f523e23000506cde4ba6dfc62ae4ce949002a886" +checksum = "50f2435b02d1ba36d8c1f6a722cad04e4c0b2705a3112c5706e6960d405d7798" dependencies = [ "simdutf8", "thiserror", @@ -535,9 +535,9 @@ dependencies = [ [[package]] name = "polars-ffi" -version = "0.36.2" +version = "0.37.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "060c7321917fea97908b38a10b7468f8e99964bc4d22c95a6e9f657e7e485398" +checksum = "f02331289626c8894e7b68467cd2de84d010f90def9bdd479e00e4fbafced7a9" dependencies = [ "polars-arrow", "polars-core", @@ -545,9 +545,9 @@ dependencies = [ [[package]] name = "polars-io" -version = "0.36.2" +version = "0.37.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7d0458efe8946f4718fd352f230c0db5a37926bd0d2bd25af79dc24746abaaea" +checksum = "b51fba2cf014cb39c2b38353d601540fb9db643be65abb9ca8ff44b9c4c4a88e" dependencies = [ "ahash", "bytes", @@ -568,9 +568,9 @@ dependencies = [ [[package]] name = "polars-ops" -version = "0.36.2" +version = "0.37.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2e09afc456ab11e75e5dcb43e00a01c71f3a46a2781e450054acb6bb096ca78e" +checksum = "6395f5fd5e1adf016fd6403c0a493181c1a349a7a145b2687cdf50a0d630310a" dependencies = [ "ahash", "argminmax", @@ -593,9 +593,9 @@ dependencies = [ [[package]] name = "polars-plan" -version = "0.36.2" +version = "0.37.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "384a175624d050c31c473ee11df9d7af5d729ae626375e522158cfb3d150acd0" +checksum = "7fb7d7527be2aa33baace9000f6772eb9df7cd57ec010a4b273435d2dc1349e8" dependencies = [ "ahash", "bytemuck", @@ -614,9 +614,9 @@ dependencies = [ [[package]] name = "polars-row" -version = "0.36.2" +version = "0.37.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "32322f7acbb83db3e9c7697dc821be73d06238da89c817dcc8bc1549a5e9c72f" +checksum = "f4984d97aad3d0db92afe76ebcab10b5e37a1216618b5703ae0d2917ccd6168c" dependencies = [ "polars-arrow", "polars-error", @@ -625,9 +625,9 @@ dependencies = [ [[package]] name = "polars-utils" -version = "0.36.2" +version = "0.37.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b174ca4a77ad47d7b91a0460aaae65bbf874c8bfbaaa5308675dadef3976bbda" +checksum = "38f9c955bb1e9b55d835aeb7fe4e4e8826e01abe5f0ada979ceb7d2b9af7b569" dependencies = [ "ahash", "bytemuck", @@ -719,9 +719,9 @@ dependencies = [ [[package]] name = "pyo3-polars" -version = "0.10.0" +version = "0.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b1e983cb07cf665ea6e645ae9263c358062580f23a9aee41618a5706d4a7cc21" +checksum = "02a0967fbc39cd3353b9b1c4e2b7b92ec5237e117b91b2ffa9d069830dd6e71b" dependencies = [ "polars", "polars-core", @@ -736,9 +736,9 @@ dependencies = [ [[package]] name = "pyo3-polars-derive" -version = "0.4.0" +version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "15a1b2ff4deb2959da411b14e602222e0c7002796df0b5d77486775dc28e25a6" +checksum = "e654e869ba1b0b440dde8e11890fb3578fccd75a4b007672e44eed70071517b4" dependencies = [ "polars-core", "polars-ffi", @@ -855,16 +855,6 @@ version = "0.8.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c08c74e62047bb2de4ff487b251e4a92e24f48745648451635cec7d591162d9f" -[[package]] -name = "rust-stemmers" -version = "1.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e46a2036019fdb888131db7a4c847a1063a7493f971ed94ea82c67eada63ca54" -dependencies = [ - "serde", - "serde_derive", -] - [[package]] name = "rustversion" version = "1.0.14" diff --git a/Cargo.toml b/Cargo.toml index 0949c50..369ce64 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -13,10 +13,10 @@ crate-type= ["cdylib"] [dependencies] pyo3 = { version = "0.20.0", features = ["extension-module"] } -pyo3-polars = { version = "0.10.0", features = ["derive"] } +pyo3-polars = { version = "0.11.1", features = ["derive"] } serde = { version = "1", features = ["derive"] } -polars = { version = "0.36.2", default-features = false } -rust-stemmers = "1.2.0" +polars = { version = "0.37.0", default-features = false } +# rust-stemmers = "1.2.0" [target.'cfg(target_os = "linux")'.dependencies] jemallocator = { version = "0.5", features = ["disable_initial_exec_tls"] } diff --git a/minimal_plugin/__init__.py b/minimal_plugin/__init__.py index 41667ea..6a744b7 100644 --- a/minimal_plugin/__init__.py +++ b/minimal_plugin/__init__.py @@ -1,10 +1,12 @@ import polars as pl from polars.utils.udfs import _get_shared_lib_location from polars.type_aliases import IntoExpr +from minimal_plugin.utils import parse_into_expr lib = _get_shared_lib_location(__file__) + def noop(expr: IntoExpr) -> pl.Expr: expr = parse_into_expr(expr) return expr.register_plugin( diff --git a/minimal_plugin/utils.py b/minimal_plugin/utils.py new file mode 100644 index 0000000..437e748 --- /dev/null +++ b/minimal_plugin/utils.py @@ -0,0 +1,49 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING + +import polars as pl + +if TYPE_CHECKING: + from polars.type_aliases import IntoExpr, PolarsDataType + + +def parse_into_expr( + expr: IntoExpr, + *, + str_as_lit: bool = False, + list_as_lit: bool = True, + dtype: PolarsDataType | None = None, +) -> pl.Expr: + """ + Parse a single input into an expression. + + Parameters + ---------- + expr + The input to be parsed as an expression. + str_as_lit + Interpret string input as a string literal. If set to `False` (default), + strings are parsed as column names. + list_as_lit + Interpret list input as a lit literal, If set to `False`, + lists are parsed as `Series` literals. + dtype + If the input is expected to resolve to a literal with a known dtype, pass + this to the `lit` constructor. + + Returns + ------- + polars.Expr + """ + if isinstance(expr, pl.Expr): + pass + elif isinstance(expr, str) and not str_as_lit: + expr = pl.col(expr) + elif isinstance(expr, list) and not list_as_lit: + expr = pl.lit(pl.Series(expr), dtype=dtype) + else: + expr = pl.lit(expr, dtype=dtype) + + return expr + diff --git a/pyproject.toml b/pyproject.toml index 2677fce..97b0b13 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,5 +1,5 @@ [build-system] -requires = ["maturin>=1.0,<2.0"] +requires = ["maturin>=1.0,<2.0", "polars>=0.26.0"] build-backend = "maturin" [project] diff --git a/src/expressions.rs b/src/expressions.rs index 6e2bc34..65c1dfb 100644 --- a/src/expressions.rs +++ b/src/expressions.rs @@ -180,17 +180,17 @@ fn add_suffix(inputs: &[Series], kwargs: AddSuffixKwargs) -> PolarsResult PolarsResult { - let ca: &StringChunked = inputs[0].str()?; - let en_stemmer = Stemmer::create(Algorithm::English); - let out: StringChunked = ca.apply_to_buffer(|value: &str, output: &mut String| { - write!(output, "{}", en_stemmer.stem(value)).unwrap() - }); - Ok(out.into_series()) -} +// #[polars_expr(output_type=String)] +// fn snowball_stem(inputs: &[Series]) -> PolarsResult { +// let ca: &StringChunked = inputs[0].str()?; +// let en_stemmer = Stemmer::create(Algorithm::English); +// let out: StringChunked = ca.apply_to_buffer(|value: &str, output: &mut String| { +// write!(output, "{}", en_stemmer.stem(value)).unwrap() +// }); +// Ok(out.into_series()) +// } #[polars_expr(output_type=Float64)] fn weighted_mean(inputs: &[Series]) -> PolarsResult {