From 23be92b783cf6454ec412ca0ee2c936be4f81917 Mon Sep 17 00:00:00 2001
From: Tim Saucer <timsaucer@gmail.com>
Date: Fri, 21 Feb 2025 13:03:26 -0500
Subject: [PATCH 1/9] Updating DataFusion to prepare for use of scalar FFI

---
 Cargo.lock                 | 301 ++++++++++++++++++-------------------
 Cargo.toml                 |   9 +-
 src/expr.rs                |  16 +-
 src/expr/aggregate.rs      |   9 +-
 src/expr/aggregate_expr.rs |  11 +-
 src/expr/window.rs         |  24 ++-
 src/functions.rs           |  34 ++---
 7 files changed, 216 insertions(+), 188 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index f1b1ed50a..3708db4f2 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -179,9 +179,9 @@ checksum = "7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50"
 
 [[package]]
 name = "arrow"
-version = "54.1.0"
+version = "54.2.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6422e12ac345a0678d7a17e316238e3a40547ae7f92052b77bd86d5e0239f3fc"
+checksum = "755b6da235ac356a869393c23668c663720b8749dd6f15e52b6c214b4b964cc7"
 dependencies = [
  "arrow-arith",
  "arrow-array",
@@ -201,9 +201,9 @@ dependencies = [
 
 [[package]]
 name = "arrow-arith"
-version = "54.1.0"
+version = "54.2.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "23cf34bb1f48c41d3475927bcc7be498665b8e80b379b88f62a840337f8b8248"
+checksum = "64656a1e0b13ca766f8440752e9a93e11014eec7b67909986f83ed0ab1fe37b8"
 dependencies = [
  "arrow-array",
  "arrow-buffer",
@@ -215,9 +215,9 @@ dependencies = [
 
 [[package]]
 name = "arrow-array"
-version = "54.1.0"
+version = "54.2.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "fb4a06d507f54b70a277be22a127c8ffe0cec6cd98c0ad8a48e77779bbda8223"
+checksum = "57a4a6d2896083cfbdf84a71a863b22460d0708f8206a8373c52e326cc72ea1a"
 dependencies = [
  "ahash",
  "arrow-buffer",
@@ -232,9 +232,9 @@ dependencies = [
 
 [[package]]
 name = "arrow-buffer"
-version = "54.1.0"
+version = "54.2.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d69d326d5ad1cb82dcefa9ede3fee8fdca98f9982756b16f9cb142f4aa6edc89"
+checksum = "cef870583ce5e4f3b123c181706f2002fb134960f9a911900f64ba4830c7a43a"
 dependencies = [
  "bytes",
  "half",
@@ -243,9 +243,9 @@ dependencies = [
 
 [[package]]
 name = "arrow-cast"
-version = "54.1.0"
+version = "54.2.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "626e65bd42636a84a238bed49d09c8777e3d825bf81f5087a70111c2831d9870"
+checksum = "1ac7eba5a987f8b4a7d9629206ba48e19a1991762795bbe5d08497b7736017ee"
 dependencies = [
  "arrow-array",
  "arrow-buffer",
@@ -264,9 +264,9 @@ dependencies = [
 
 [[package]]
 name = "arrow-csv"
-version = "54.1.0"
+version = "54.2.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "71c8f959f7a1389b1dbd883cdcd37c3ed12475329c111912f7f69dad8195d8c6"
+checksum = "90f12542b8164398fc9ec595ff783c4cf6044daa89622c5a7201be920e4c0d4c"
 dependencies = [
  "arrow-array",
  "arrow-cast",
@@ -280,9 +280,9 @@ dependencies = [
 
 [[package]]
 name = "arrow-data"
-version = "54.1.0"
+version = "54.2.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1858e7c7d01c44cf71c21a85534fd1a54501e8d60d1195d0d6fbcc00f4b10754"
+checksum = "b095e8a4f3c309544935d53e04c3bfe4eea4e71c3de6fe0416d1f08bb4441a83"
 dependencies = [
  "arrow-buffer",
  "arrow-schema",
@@ -292,9 +292,9 @@ dependencies = [
 
 [[package]]
 name = "arrow-ipc"
-version = "54.1.0"
+version = "54.2.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a6bb3f727f049884c7603f0364bc9315363f356b59e9f605ea76541847e06a1e"
+checksum = "65c63da4afedde2b25ef69825cd4663ca76f78f79ffe2d057695742099130ff6"
 dependencies = [
  "arrow-array",
  "arrow-buffer",
@@ -306,9 +306,9 @@ dependencies = [
 
 [[package]]
 name = "arrow-json"
-version = "54.1.0"
+version = "54.2.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "35de94f165ed8830aede72c35f238763794f0d49c69d30c44d49c9834267ff8c"
+checksum = "9551d9400532f23a370cabbea1dc5a53c49230397d41f96c4c8eedf306199305"
 dependencies = [
  "arrow-array",
  "arrow-buffer",
@@ -326,9 +326,9 @@ dependencies = [
 
 [[package]]
 name = "arrow-ord"
-version = "54.1.0"
+version = "54.2.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8aa06e5f267dc53efbacb933485c79b6fc1685d3ffbe870a16ce4e696fb429da"
+checksum = "6c07223476f8219d1ace8cd8d85fa18c4ebd8d945013f25ef5c72e85085ca4ee"
 dependencies = [
  "arrow-array",
  "arrow-buffer",
@@ -339,9 +339,9 @@ dependencies = [
 
 [[package]]
 name = "arrow-row"
-version = "54.1.0"
+version = "54.2.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "66f1144bb456a2f9d82677bd3abcea019217e572fc8f07de5a7bac4b2c56eb2c"
+checksum = "91b194b38bfd89feabc23e798238989c6648b2506ad639be42ec8eb1658d82c4"
 dependencies = [
  "arrow-array",
  "arrow-buffer",
@@ -352,18 +352,18 @@ dependencies = [
 
 [[package]]
 name = "arrow-schema"
-version = "54.1.0"
+version = "54.2.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "105f01ec0090259e9a33a9263ec18ff223ab91a0ea9fbc18042f7e38005142f6"
+checksum = "0f40f6be8f78af1ab610db7d9b236e21d587b7168e368a36275d2e5670096735"
 dependencies = [
  "bitflags 2.8.0",
 ]
 
 [[package]]
 name = "arrow-select"
-version = "54.1.0"
+version = "54.2.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f690752fdbd2dee278b5f1636fefad8f2f7134c85e20fd59c4199e15a39a6807"
+checksum = "ac265273864a820c4a179fc67182ccc41ea9151b97024e1be956f0f2369c2539"
 dependencies = [
  "ahash",
  "arrow-array",
@@ -375,9 +375,9 @@ dependencies = [
 
 [[package]]
 name = "arrow-string"
-version = "54.1.0"
+version = "54.2.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d0fff9cd745a7039b66c47ecaf5954460f9fa12eed628f65170117ea93e64ee0"
+checksum = "d44c8eed43be4ead49128370f7131f054839d3d6003e52aebf64322470b8fbd0"
 dependencies = [
  "arrow-array",
  "arrow-buffer",
@@ -535,15 +535,16 @@ dependencies = [
 
 [[package]]
 name = "blake3"
-version = "1.5.5"
+version = "1.6.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b8ee0c1824c4dea5b5f81736aff91bae041d2c07ee1192bec91054e10e3e601e"
+checksum = "1230237285e3e10cde447185e8975408ae24deaa67205ce684805c25bc0c7937"
 dependencies = [
  "arrayref",
  "arrayvec",
  "cc",
  "cfg-if",
  "constant_time_eq",
+ "memmap2",
 ]
 
 [[package]]
@@ -606,19 +607,18 @@ dependencies = [
 
 [[package]]
 name = "bzip2"
-version = "0.5.0"
+version = "0.5.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "bafdbf26611df8c14810e268ddceda071c297570a5fb360ceddf617fe417ef58"
+checksum = "75b89e7c29231c673a61a46e722602bcd138298f6b9e81e71119693534585f5c"
 dependencies = [
  "bzip2-sys",
- "libc",
 ]
 
 [[package]]
 name = "bzip2-sys"
-version = "0.1.11+1.0.8"
+version = "0.1.12+1.0.8"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "736a955f3fa7875102d57c82b8cac37ec45224a07fd32d58f9f7a186b6cd4cdc"
+checksum = "72ebc2f1a417f01e1da30ef264ee86ae31d2dcd2d603ea283d3c244a883ca2a9"
 dependencies = [
  "cc",
  "libc",
@@ -867,29 +867,30 @@ dependencies = [
 [[package]]
 name = "datafusion"
 version = "45.0.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "eae420e7a5b0b7f1c39364cc76cbcd0f5fdc416b2514ae3847c2676bbd60702a"
+source = "git+https://github.com/apache/datafusion.git?rev=8ab0661a39bd69783b31b949e7a768fb518629e7#8ab0661a39bd69783b31b949e7a768fb518629e7"
 dependencies = [
  "apache-avro",
  "arrow",
- "arrow-array",
  "arrow-ipc",
  "arrow-schema",
- "async-compression",
  "async-trait",
  "bytes",
- "bzip2 0.5.0",
+ "bzip2 0.5.1",
  "chrono",
  "datafusion-catalog",
+ "datafusion-catalog-listing",
  "datafusion-common",
  "datafusion-common-runtime",
+ "datafusion-datasource",
  "datafusion-execution",
  "datafusion-expr",
+ "datafusion-expr-common",
  "datafusion-functions",
  "datafusion-functions-aggregate",
  "datafusion-functions-nested",
  "datafusion-functions-table",
  "datafusion-functions-window",
+ "datafusion-macros",
  "datafusion-optimizer",
  "datafusion-physical-expr",
  "datafusion-physical-expr-common",
@@ -898,7 +899,6 @@ dependencies = [
  "datafusion-sql",
  "flate2",
  "futures",
- "glob",
  "itertools 0.14.0",
  "log",
  "num-traits",
@@ -910,7 +910,6 @@ dependencies = [
  "sqlparser",
  "tempfile",
  "tokio",
- "tokio-util",
  "url",
  "uuid",
  "xz2",
@@ -920,8 +919,7 @@ dependencies = [
 [[package]]
 name = "datafusion-catalog"
 version = "45.0.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6f27987bc22b810939e8dfecc55571e9d50355d6ea8ec1c47af8383a76a6d0e1"
+source = "git+https://github.com/apache/datafusion.git?rev=8ab0661a39bd69783b31b949e7a768fb518629e7#8ab0661a39bd69783b31b949e7a768fb518629e7"
 dependencies = [
  "arrow",
  "async-trait",
@@ -935,22 +933,38 @@ dependencies = [
  "itertools 0.14.0",
  "log",
  "parking_lot",
- "sqlparser",
+]
+
+[[package]]
+name = "datafusion-catalog-listing"
+version = "45.0.0"
+source = "git+https://github.com/apache/datafusion.git?rev=8ab0661a39bd69783b31b949e7a768fb518629e7#8ab0661a39bd69783b31b949e7a768fb518629e7"
+dependencies = [
+ "arrow",
+ "async-trait",
+ "datafusion-catalog",
+ "datafusion-common",
+ "datafusion-datasource",
+ "datafusion-execution",
+ "datafusion-expr",
+ "datafusion-physical-expr",
+ "datafusion-physical-expr-common",
+ "datafusion-physical-plan",
+ "futures",
+ "log",
+ "object_store",
+ "tokio",
 ]
 
 [[package]]
 name = "datafusion-common"
 version = "45.0.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e3f6d5b8c9408cc692f7c194b8aa0c0f9b253e065a8d960ad9cdc2a13e697602"
+source = "git+https://github.com/apache/datafusion.git?rev=8ab0661a39bd69783b31b949e7a768fb518629e7#8ab0661a39bd69783b31b949e7a768fb518629e7"
 dependencies = [
  "ahash",
  "apache-avro",
  "arrow",
- "arrow-array",
- "arrow-buffer",
  "arrow-ipc",
- "arrow-schema",
  "base64 0.22.1",
  "half",
  "hashbrown 0.14.5",
@@ -969,24 +983,52 @@ dependencies = [
 [[package]]
 name = "datafusion-common-runtime"
 version = "45.0.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0d4603c8e8a4baf77660ab7074cc66fc15cc8a18f2ce9dfadb755fc6ee294e48"
+source = "git+https://github.com/apache/datafusion.git?rev=8ab0661a39bd69783b31b949e7a768fb518629e7#8ab0661a39bd69783b31b949e7a768fb518629e7"
 dependencies = [
  "log",
  "tokio",
 ]
 
+[[package]]
+name = "datafusion-datasource"
+version = "45.0.0"
+source = "git+https://github.com/apache/datafusion.git?rev=8ab0661a39bd69783b31b949e7a768fb518629e7#8ab0661a39bd69783b31b949e7a768fb518629e7"
+dependencies = [
+ "arrow",
+ "async-compression",
+ "async-trait",
+ "bytes",
+ "bzip2 0.5.1",
+ "chrono",
+ "datafusion-catalog",
+ "datafusion-common",
+ "datafusion-common-runtime",
+ "datafusion-execution",
+ "datafusion-expr",
+ "datafusion-physical-plan",
+ "flate2",
+ "futures",
+ "glob",
+ "itertools 0.14.0",
+ "log",
+ "object_store",
+ "rand",
+ "tokio",
+ "tokio-util",
+ "url",
+ "xz2",
+ "zstd",
+]
+
 [[package]]
 name = "datafusion-doc"
 version = "45.0.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e5bf4bc68623a5cf231eed601ed6eb41f46a37c4d15d11a0bff24cbc8396cd66"
+source = "git+https://github.com/apache/datafusion.git?rev=8ab0661a39bd69783b31b949e7a768fb518629e7#8ab0661a39bd69783b31b949e7a768fb518629e7"
 
 [[package]]
 name = "datafusion-execution"
 version = "45.0.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "88b491c012cdf8e051053426013429a76f74ee3c2db68496c79c323ca1084d27"
+source = "git+https://github.com/apache/datafusion.git?rev=8ab0661a39bd69783b31b949e7a768fb518629e7#8ab0661a39bd69783b31b949e7a768fb518629e7"
 dependencies = [
  "arrow",
  "dashmap",
@@ -1004,8 +1046,7 @@ dependencies = [
 [[package]]
 name = "datafusion-expr"
 version = "45.0.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e5a181408d4fc5dc22f9252781a8f39f2d0e5d1b33ec9bde242844980a2689c1"
+source = "git+https://github.com/apache/datafusion.git?rev=8ab0661a39bd69783b31b949e7a768fb518629e7#8ab0661a39bd69783b31b949e7a768fb518629e7"
 dependencies = [
  "arrow",
  "chrono",
@@ -1025,11 +1066,11 @@ dependencies = [
 [[package]]
 name = "datafusion-expr-common"
 version = "45.0.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d1129b48e8534d8c03c6543bcdccef0b55c8ac0c1272a15a56c67068b6eb1885"
+source = "git+https://github.com/apache/datafusion.git?rev=8ab0661a39bd69783b31b949e7a768fb518629e7#8ab0661a39bd69783b31b949e7a768fb518629e7"
 dependencies = [
  "arrow",
  "datafusion-common",
+ "indexmap",
  "itertools 0.14.0",
  "paste",
 ]
@@ -1037,13 +1078,10 @@ dependencies = [
 [[package]]
 name = "datafusion-ffi"
 version = "45.0.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ff47a79d442207c168c6e3e1d970c248589c148e4800e5b285ac1b2cb1a230f8"
+source = "git+https://github.com/apache/datafusion.git?rev=8ab0661a39bd69783b31b949e7a768fb518629e7#8ab0661a39bd69783b31b949e7a768fb518629e7"
 dependencies = [
  "abi_stable",
  "arrow",
- "arrow-array",
- "arrow-schema",
  "async-ffi",
  "async-trait",
  "datafusion",
@@ -1058,8 +1096,7 @@ dependencies = [
 [[package]]
 name = "datafusion-functions"
 version = "45.0.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6125874e4856dfb09b59886784fcb74cde5cfc5930b3a80a1a728ef7a010df6b"
+source = "git+https://github.com/apache/datafusion.git?rev=8ab0661a39bd69783b31b949e7a768fb518629e7#8ab0661a39bd69783b31b949e7a768fb518629e7"
 dependencies = [
  "arrow",
  "arrow-buffer",
@@ -1073,7 +1110,6 @@ dependencies = [
  "datafusion-expr",
  "datafusion-expr-common",
  "datafusion-macros",
- "hashbrown 0.14.5",
  "hex",
  "itertools 0.14.0",
  "log",
@@ -1088,13 +1124,10 @@ dependencies = [
 [[package]]
 name = "datafusion-functions-aggregate"
 version = "45.0.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f3add7b1d3888e05e7c95f2b281af900ca69ebdcb21069ba679b33bde8b3b9d6"
+source = "git+https://github.com/apache/datafusion.git?rev=8ab0661a39bd69783b31b949e7a768fb518629e7#8ab0661a39bd69783b31b949e7a768fb518629e7"
 dependencies = [
  "ahash",
  "arrow",
- "arrow-buffer",
- "arrow-schema",
  "datafusion-common",
  "datafusion-doc",
  "datafusion-execution",
@@ -1111,8 +1144,7 @@ dependencies = [
 [[package]]
 name = "datafusion-functions-aggregate-common"
 version = "45.0.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6e18baa4cfc3d2f144f74148ed68a1f92337f5072b6dde204a0dbbdf3324989c"
+source = "git+https://github.com/apache/datafusion.git?rev=8ab0661a39bd69783b31b949e7a768fb518629e7#8ab0661a39bd69783b31b949e7a768fb518629e7"
 dependencies = [
  "ahash",
  "arrow",
@@ -1124,14 +1156,10 @@ dependencies = [
 [[package]]
 name = "datafusion-functions-nested"
 version = "45.0.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "3ec5ee8cecb0dc370291279673097ddabec03a011f73f30d7f1096457127e03e"
+source = "git+https://github.com/apache/datafusion.git?rev=8ab0661a39bd69783b31b949e7a768fb518629e7#8ab0661a39bd69783b31b949e7a768fb518629e7"
 dependencies = [
  "arrow",
- "arrow-array",
- "arrow-buffer",
  "arrow-ord",
- "arrow-schema",
  "datafusion-common",
  "datafusion-doc",
  "datafusion-execution",
@@ -1148,8 +1176,7 @@ dependencies = [
 [[package]]
 name = "datafusion-functions-table"
 version = "45.0.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2c403ddd473bbb0952ba880008428b3c7febf0ed3ce1eec35a205db20efb2a36"
+source = "git+https://github.com/apache/datafusion.git?rev=8ab0661a39bd69783b31b949e7a768fb518629e7#8ab0661a39bd69783b31b949e7a768fb518629e7"
 dependencies = [
  "arrow",
  "async-trait",
@@ -1164,8 +1191,7 @@ dependencies = [
 [[package]]
 name = "datafusion-functions-window"
 version = "45.0.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1ab18c2fb835614d06a75f24a9e09136d3a8c12a92d97c95a6af316a1787a9c5"
+source = "git+https://github.com/apache/datafusion.git?rev=8ab0661a39bd69783b31b949e7a768fb518629e7#8ab0661a39bd69783b31b949e7a768fb518629e7"
 dependencies = [
  "datafusion-common",
  "datafusion-doc",
@@ -1181,8 +1207,7 @@ dependencies = [
 [[package]]
 name = "datafusion-functions-window-common"
 version = "45.0.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a77b73bc15e7d1967121fdc7a55d819bfb9d6c03766a6c322247dce9094a53a4"
+source = "git+https://github.com/apache/datafusion.git?rev=8ab0661a39bd69783b31b949e7a768fb518629e7#8ab0661a39bd69783b31b949e7a768fb518629e7"
 dependencies = [
  "datafusion-common",
  "datafusion-physical-expr-common",
@@ -1191,8 +1216,7 @@ dependencies = [
 [[package]]
 name = "datafusion-macros"
 version = "45.0.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "09369b8d962291e808977cf94d495fd8b5b38647232d7ef562c27ac0f495b0af"
+source = "git+https://github.com/apache/datafusion.git?rev=8ab0661a39bd69783b31b949e7a768fb518629e7#8ab0661a39bd69783b31b949e7a768fb518629e7"
 dependencies = [
  "datafusion-expr",
  "quote",
@@ -1202,8 +1226,7 @@ dependencies = [
 [[package]]
 name = "datafusion-optimizer"
 version = "45.0.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2403a7e4a84637f3de7d8d4d7a9ccc0cc4be92d89b0161ba3ee5be82f0531c54"
+source = "git+https://github.com/apache/datafusion.git?rev=8ab0661a39bd69783b31b949e7a768fb518629e7#8ab0661a39bd69783b31b949e7a768fb518629e7"
 dependencies = [
  "arrow",
  "chrono",
@@ -1221,14 +1244,10 @@ dependencies = [
 [[package]]
 name = "datafusion-physical-expr"
 version = "45.0.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "86ff72ac702b62dbf2650c4e1d715ebd3e4aab14e3885e72e8549e250307347c"
+source = "git+https://github.com/apache/datafusion.git?rev=8ab0661a39bd69783b31b949e7a768fb518629e7#8ab0661a39bd69783b31b949e7a768fb518629e7"
 dependencies = [
  "ahash",
  "arrow",
- "arrow-array",
- "arrow-buffer",
- "arrow-schema",
  "datafusion-common",
  "datafusion-expr",
  "datafusion-expr-common",
@@ -1240,18 +1259,16 @@ dependencies = [
  "itertools 0.14.0",
  "log",
  "paste",
- "petgraph 0.7.1",
+ "petgraph",
 ]
 
 [[package]]
 name = "datafusion-physical-expr-common"
 version = "45.0.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "60982b7d684e25579ee29754b4333057ed62e2cc925383c5f0bd8cab7962f435"
+source = "git+https://github.com/apache/datafusion.git?rev=8ab0661a39bd69783b31b949e7a768fb518629e7#8ab0661a39bd69783b31b949e7a768fb518629e7"
 dependencies = [
  "ahash",
  "arrow",
- "arrow-buffer",
  "datafusion-common",
  "datafusion-expr-common",
  "hashbrown 0.14.5",
@@ -1261,11 +1278,9 @@ dependencies = [
 [[package]]
 name = "datafusion-physical-optimizer"
 version = "45.0.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ac5e85c189d5238a5cf181a624e450c4cd4c66ac77ca551d6f3ff9080bac90bb"
+source = "git+https://github.com/apache/datafusion.git?rev=8ab0661a39bd69783b31b949e7a768fb518629e7#8ab0661a39bd69783b31b949e7a768fb518629e7"
 dependencies = [
  "arrow",
- "arrow-schema",
  "datafusion-common",
  "datafusion-execution",
  "datafusion-expr",
@@ -1273,23 +1288,18 @@ dependencies = [
  "datafusion-physical-expr",
  "datafusion-physical-expr-common",
  "datafusion-physical-plan",
- "futures",
  "itertools 0.14.0",
  "log",
  "recursive",
- "url",
 ]
 
 [[package]]
 name = "datafusion-physical-plan"
 version = "45.0.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c36bf163956d7e2542657c78b3383fdc78f791317ef358a359feffcdb968106f"
+source = "git+https://github.com/apache/datafusion.git?rev=8ab0661a39bd69783b31b949e7a768fb518629e7#8ab0661a39bd69783b31b949e7a768fb518629e7"
 dependencies = [
  "ahash",
  "arrow",
- "arrow-array",
- "arrow-buffer",
  "arrow-ord",
  "arrow-schema",
  "async-trait",
@@ -1315,8 +1325,7 @@ dependencies = [
 [[package]]
 name = "datafusion-proto"
 version = "45.0.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2db5d79f0c974041787b899d24dc91bdab2ff112d1942dd71356a4ce3b407e6c"
+source = "git+https://github.com/apache/datafusion.git?rev=8ab0661a39bd69783b31b949e7a768fb518629e7#8ab0661a39bd69783b31b949e7a768fb518629e7"
 dependencies = [
  "arrow",
  "chrono",
@@ -1331,8 +1340,7 @@ dependencies = [
 [[package]]
 name = "datafusion-proto-common"
 version = "45.0.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "de21bde1603aac0ff32cf478e47081be6e3583c6861fe8f57034da911efe7578"
+source = "git+https://github.com/apache/datafusion.git?rev=8ab0661a39bd69783b31b949e7a768fb518629e7#8ab0661a39bd69783b31b949e7a768fb518629e7"
 dependencies = [
  "arrow",
  "datafusion-common",
@@ -1365,12 +1373,9 @@ dependencies = [
 [[package]]
 name = "datafusion-sql"
 version = "45.0.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e13caa4daede211ecec53c78b13c503b592794d125f9a3cc3afe992edf9e7f43"
+source = "git+https://github.com/apache/datafusion.git?rev=8ab0661a39bd69783b31b949e7a768fb518629e7#8ab0661a39bd69783b31b949e7a768fb518629e7"
 dependencies = [
  "arrow",
- "arrow-array",
- "arrow-schema",
  "bigdecimal",
  "datafusion-common",
  "datafusion-expr",
@@ -1384,10 +1389,8 @@ dependencies = [
 [[package]]
 name = "datafusion-substrait"
 version = "45.0.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1634405abd8bd3c64c352f2da2f2aec6d80a815930257e0db0ce4ff5daf00944"
+source = "git+https://github.com/apache/datafusion.git?rev=8ab0661a39bd69783b31b949e7a768fb518629e7#8ab0661a39bd69783b31b949e7a768fb518629e7"
 dependencies = [
- "arrow-buffer",
  "async-recursion",
  "async-trait",
  "chrono",
@@ -1456,12 +1459,6 @@ version = "2.3.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be"
 
-[[package]]
-name = "fixedbitset"
-version = "0.4.2"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0ce7134b9999ecaf8bcd65542e436736ef32ddca1b3e06094cb6ec5755203b80"
-
 [[package]]
 name = "fixedbitset"
 version = "0.5.7"
@@ -2243,6 +2240,15 @@ version = "2.7.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3"
 
+[[package]]
+name = "memmap2"
+version = "0.9.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "fd3f7eed9d3848f8b98834af67102b720745c4ec028fcd0aa0239277e7de374f"
+dependencies = [
+ "libc",
+]
+
 [[package]]
 name = "memoffset"
 version = "0.9.1"
@@ -2455,9 +2461,9 @@ dependencies = [
 
 [[package]]
 name = "parquet"
-version = "54.1.0"
+version = "54.2.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8a01a0efa30bbd601ae85b375c728efdb211ade54390281628a7b16708beb235"
+checksum = "761c44d824fe83106e0600d2510c07bf4159a4985bf0569b513ea4288dc1b4fb"
 dependencies = [
  "ahash",
  "arrow-array",
@@ -2548,23 +2554,13 @@ version = "2.3.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e"
 
-[[package]]
-name = "petgraph"
-version = "0.6.5"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b4c5cc86750666a3ed20bdaf5ca2a0344f9c67674cae0515bec2da16fbaa47db"
-dependencies = [
- "fixedbitset 0.4.2",
- "indexmap",
-]
-
 [[package]]
 name = "petgraph"
 version = "0.7.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "3672b37090dbd86368a4145bc067582552b29c27377cad4e0a306c97f9bd7772"
 dependencies = [
- "fixedbitset 0.5.7",
+ "fixedbitset",
  "indexmap",
 ]
 
@@ -2660,9 +2656,9 @@ dependencies = [
 
 [[package]]
 name = "prost"
-version = "0.13.4"
+version = "0.13.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2c0fef6c4230e4ccf618a35c59d7ede15dea37de8427500f50aff708806e42ec"
+checksum = "2796faa41db3ec313a31f7624d9286acf277b52de526150b7e69f3debf891ee5"
 dependencies = [
  "bytes",
  "prost-derive",
@@ -2670,16 +2666,16 @@ dependencies = [
 
 [[package]]
 name = "prost-build"
-version = "0.13.4"
+version = "0.13.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d0f3e5beed80eb580c68e2c600937ac2c4eedabdfd5ef1e5b7ea4f3fba84497b"
+checksum = "be769465445e8c1474e9c5dac2018218498557af32d9ed057325ec9a41ae81bf"
 dependencies = [
  "heck",
- "itertools 0.13.0",
+ "itertools 0.14.0",
  "log",
  "multimap",
  "once_cell",
- "petgraph 0.6.5",
+ "petgraph",
  "prettyplease",
  "prost",
  "prost-types",
@@ -2690,12 +2686,12 @@ dependencies = [
 
 [[package]]
 name = "prost-derive"
-version = "0.13.4"
+version = "0.13.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "157c5a9d7ea5c2ed2d9fb8f495b64759f7816c7eaea54ba3978f0d63000162e3"
+checksum = "8a56d757972c98b346a9b766e3f02746cde6dd1cd1d1d563472929fdd74bec4d"
 dependencies = [
  "anyhow",
- "itertools 0.13.0",
+ "itertools 0.14.0",
  "proc-macro2",
  "quote",
  "syn 2.0.98",
@@ -2703,9 +2699,9 @@ dependencies = [
 
 [[package]]
 name = "prost-types"
-version = "0.13.4"
+version = "0.13.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "cc2f1e56baa61e93533aebc21af4d2134b70f66275e0fcdf3cbe43d77ff7e8fc"
+checksum = "52c2c1bf36ddb1a1c396b3601a3cec27c2462e45f07c386894ec3ccf5332bd16"
 dependencies = [
  "prost",
 ]
@@ -3426,11 +3422,12 @@ checksum = "6980e8d7511241f8acf4aebddbb1ff938df5eebe98691418c4468d0b72a96a67"
 
 [[package]]
 name = "sqlparser"
-version = "0.53.0"
+version = "0.54.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "05a528114c392209b3264855ad491fcce534b94a38771b0a0b97a79379275ce8"
+checksum = "c66e3b7374ad4a6af849b08b3e7a6eda0edbd82f0fd59b57e22671bf16979899"
 dependencies = [
  "log",
+ "recursive",
  "sqlparser_derive",
 ]
 
@@ -3491,9 +3488,9 @@ dependencies = [
 
 [[package]]
 name = "substrait"
-version = "0.52.3"
+version = "0.53.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5db15789cecbfdf6b1fcf2db807e767c92273bdc407ac057c2194b070c597756"
+checksum = "6fac3d70185423235f37b889764e184b81a5af4bb7c95833396ee9bd92577e1b"
 dependencies = [
  "heck",
  "pbjson",
@@ -3952,7 +3949,9 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "ced87ca4be083373936a67f8de945faa23b6b42384bd5b64434850802c6dccd0"
 dependencies = [
  "getrandom 0.3.1",
+ "js-sys",
  "serde",
+ "wasm-bindgen",
 ]
 
 [[package]]
diff --git a/Cargo.toml b/Cargo.toml
index d18e0e8f0..a9e167a61 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -37,7 +37,7 @@ substrait = ["dep:datafusion-substrait"]
 tokio = { version = "1.42", features = ["macros", "rt", "rt-multi-thread", "sync"] }
 pyo3 = { version = "0.23", features = ["extension-module", "abi3", "abi3-py38"] }
 pyo3-async-runtimes = { version = "0.23", features = ["tokio-runtime"]}
-arrow = { version = "54", features = ["pyarrow"] }
+arrow = { version = "54.2.0", features = ["pyarrow"] }
 datafusion = { version = "45.0.0", features = ["avro", "unicode_expressions"] }
 datafusion-substrait = { version = "45.0.0", optional = true }
 datafusion-proto = { version = "45.0.0" }
@@ -61,3 +61,10 @@ crate-type = ["cdylib", "rlib"]
 [profile.release]
 lto = true
 codegen-units = 1
+
+# TODO remove once we update datafusion versions to 46
+[patch.crates-io]
+datafusion = { git = "https://github.com/apache/datafusion.git", rev = "8ab0661a39bd69783b31b949e7a768fb518629e7", features = ["avro", "unicode_expressions"] }
+datafusion-substrait = { git = "https://github.com/apache/datafusion.git", rev = "8ab0661a39bd69783b31b949e7a768fb518629e7", optional = true }
+datafusion-proto = { git = "https://github.com/apache/datafusion.git", rev = "8ab0661a39bd69783b31b949e7a768fb518629e7" }
+datafusion-ffi = { git = "https://github.com/apache/datafusion.git", rev = "8ab0661a39bd69783b31b949e7a768fb518629e7" }
diff --git a/src/expr.rs b/src/expr.rs
index e750be6a4..6f1f5a26c 100644
--- a/src/expr.rs
+++ b/src/expr.rs
@@ -15,6 +15,7 @@
 // specific language governing permissions and limitations
 // under the License.
 
+use datafusion::logical_expr::expr::{AggregateFunctionParams, WindowFunctionParams};
 use datafusion::logical_expr::utils::exprlist_to_fields;
 use datafusion::logical_expr::{
     ExprFuncBuilder, ExprFunctionExt, LogicalPlan, WindowFunctionDefinition,
@@ -393,12 +394,15 @@ impl PyExpr {
             | Expr::TryCast(TryCast { expr, .. })
             | Expr::InSubquery(InSubquery { expr, .. }) => Ok(vec![PyExpr::from(*expr.clone())]),
 
-            // Expr variants containing a collection of Expr(s) for operands
-            Expr::AggregateFunction(AggregateFunction { args, .. })
+            Expr::AggregateFunction(AggregateFunction {
+                params: AggregateFunctionParams { args, .. },
+                ..
+            })
             | Expr::ScalarFunction(ScalarFunction { args, .. })
-            | Expr::WindowFunction(WindowFunction { args, .. }) => {
-                Ok(args.iter().map(|arg| PyExpr::from(arg.clone())).collect())
-            }
+            | Expr::WindowFunction(WindowFunction {
+                params: WindowFunctionParams { args, .. },
+                ..
+            }) => Ok(args.iter().map(|arg| PyExpr::from(arg.clone())).collect()),
 
             // Expr(s) that require more specific processing
             Expr::Case(Case {
@@ -575,7 +579,7 @@ impl PyExpr {
             Expr::AggregateFunction(agg_fn) => {
                 let window_fn = Expr::WindowFunction(WindowFunction::new(
                     WindowFunctionDefinition::AggregateUDF(agg_fn.func.clone()),
-                    agg_fn.args.clone(),
+                    agg_fn.params.args.clone(),
                 ));
 
                 add_builder_fns_to_window(
diff --git a/src/expr/aggregate.rs b/src/expr/aggregate.rs
index 8fc9da5b0..f4283eb4e 100644
--- a/src/expr/aggregate.rs
+++ b/src/expr/aggregate.rs
@@ -16,7 +16,7 @@
 // under the License.
 
 use datafusion::common::DataFusionError;
-use datafusion::logical_expr::expr::{AggregateFunction, Alias};
+use datafusion::logical_expr::expr::{AggregateFunction, AggregateFunctionParams, Alias};
 use datafusion::logical_expr::logical_plan::Aggregate;
 use datafusion::logical_expr::Expr;
 use pyo3::{prelude::*, IntoPyObjectExt};
@@ -126,9 +126,10 @@ impl PyAggregate {
         match expr {
             // TODO: This Alias logic seems to be returning some strange results that we should investigate
             Expr::Alias(Alias { expr, .. }) => self._aggregation_arguments(expr.as_ref()),
-            Expr::AggregateFunction(AggregateFunction { func: _, args, .. }) => {
-                Ok(args.iter().map(|e| PyExpr::from(e.clone())).collect())
-            }
+            Expr::AggregateFunction(AggregateFunction {
+                params: AggregateFunctionParams { args, .. },
+                ..
+            }) => Ok(args.iter().map(|e| PyExpr::from(e.clone())).collect()),
             _ => Err(py_type_err(
                 "Encountered a non Aggregate type in aggregation_arguments",
             )),
diff --git a/src/expr/aggregate_expr.rs b/src/expr/aggregate_expr.rs
index 09471097f..c09f116e3 100644
--- a/src/expr/aggregate_expr.rs
+++ b/src/expr/aggregate_expr.rs
@@ -40,7 +40,13 @@ impl From<AggregateFunction> for PyAggregateFunction {
 
 impl Display for PyAggregateFunction {
     fn fmt(&self, f: &mut Formatter) -> std::fmt::Result {
-        let args: Vec<String> = self.aggr.args.iter().map(|expr| expr.to_string()).collect();
+        let args: Vec<String> = self
+            .aggr
+            .params
+            .args
+            .iter()
+            .map(|expr| expr.to_string())
+            .collect();
         write!(f, "{}({})", self.aggr.func.name(), args.join(", "))
     }
 }
@@ -54,12 +60,13 @@ impl PyAggregateFunction {
 
     /// is this a distinct aggregate such as `COUNT(DISTINCT expr)`
     fn is_distinct(&self) -> bool {
-        self.aggr.distinct
+        self.aggr.params.distinct
     }
 
     /// Get the arguments to the aggregate function
     fn args(&self) -> Vec<PyExpr> {
         self.aggr
+            .params
             .args
             .iter()
             .map(|expr| PyExpr::from(expr.clone()))
diff --git a/src/expr/window.rs b/src/expr/window.rs
index 13deaec25..c5467bf94 100644
--- a/src/expr/window.rs
+++ b/src/expr/window.rs
@@ -16,7 +16,7 @@
 // under the License.
 
 use datafusion::common::{DataFusionError, ScalarValue};
-use datafusion::logical_expr::expr::WindowFunction;
+use datafusion::logical_expr::expr::{WindowFunction, WindowFunctionParams};
 use datafusion::logical_expr::{Expr, Window, WindowFrame, WindowFrameBound, WindowFrameUnits};
 use pyo3::{prelude::*, IntoPyObjectExt};
 use std::fmt::{self, Display, Formatter};
@@ -118,7 +118,10 @@ impl PyWindowExpr {
     /// Returns order by columns in a window function expression
     pub fn get_sort_exprs(&self, expr: PyExpr) -> PyResult<Vec<PySortExpr>> {
         match expr.expr.unalias() {
-            Expr::WindowFunction(WindowFunction { order_by, .. }) => py_sort_expr_list(&order_by),
+            Expr::WindowFunction(WindowFunction {
+                params: WindowFunctionParams { order_by, .. },
+                ..
+            }) => py_sort_expr_list(&order_by),
             other => Err(not_window_function_err(other)),
         }
     }
@@ -126,9 +129,10 @@ impl PyWindowExpr {
     /// Return partition by columns in a window function expression
     pub fn get_partition_exprs(&self, expr: PyExpr) -> PyResult<Vec<PyExpr>> {
         match expr.expr.unalias() {
-            Expr::WindowFunction(WindowFunction { partition_by, .. }) => {
-                py_expr_list(&partition_by)
-            }
+            Expr::WindowFunction(WindowFunction {
+                params: WindowFunctionParams { partition_by, .. },
+                ..
+            }) => py_expr_list(&partition_by),
             other => Err(not_window_function_err(other)),
         }
     }
@@ -136,7 +140,10 @@ impl PyWindowExpr {
     /// Return input args for window function
     pub fn get_args(&self, expr: PyExpr) -> PyResult<Vec<PyExpr>> {
         match expr.expr.unalias() {
-            Expr::WindowFunction(WindowFunction { args, .. }) => py_expr_list(&args),
+            Expr::WindowFunction(WindowFunction {
+                params: WindowFunctionParams { args, .. },
+                ..
+            }) => py_expr_list(&args),
             other => Err(not_window_function_err(other)),
         }
     }
@@ -152,7 +159,10 @@ impl PyWindowExpr {
     /// Returns a Pywindow frame for a given window function expression
     pub fn get_frame(&self, expr: PyExpr) -> Option<PyWindowFrame> {
         match expr.expr.unalias() {
-            Expr::WindowFunction(WindowFunction { window_frame, .. }) => Some(window_frame.into()),
+            Expr::WindowFunction(WindowFunction {
+                params: WindowFunctionParams { window_frame, .. },
+                ..
+            }) => Some(window_frame.into()),
             _ => None,
         }
     }
diff --git a/src/functions.rs b/src/functions.rs
index 6a8abb18d..cede3250a 100644
--- a/src/functions.rs
+++ b/src/functions.rs
@@ -17,6 +17,7 @@
 
 use datafusion::functions_aggregate::all_default_aggregate_functions;
 use datafusion::functions_window::all_default_window_functions;
+use datafusion::logical_expr::expr::WindowFunctionParams;
 use datafusion::logical_expr::ExprFunctionExt;
 use datafusion::logical_expr::WindowFrame;
 use pyo3::{prelude::*, wrap_pyfunction};
@@ -196,10 +197,7 @@ fn alias(expr: PyExpr, name: &str) -> PyResult<PyExpr> {
 #[pyfunction]
 fn col(name: &str) -> PyResult<PyExpr> {
     Ok(PyExpr {
-        expr: datafusion::logical_expr::Expr::Column(Column {
-            relation: None,
-            name: name.to_string(),
-        }),
+        expr: datafusion::logical_expr::Expr::Column(Column::new(None::<TableReference>, name)),
     })
 }
 
@@ -314,19 +312,21 @@ fn window(
     Ok(PyExpr {
         expr: datafusion::logical_expr::Expr::WindowFunction(WindowFunction {
             fun,
-            args: args.into_iter().map(|x| x.expr).collect::<Vec<_>>(),
-            partition_by: partition_by
-                .unwrap_or_default()
-                .into_iter()
-                .map(|x| x.expr)
-                .collect::<Vec<_>>(),
-            order_by: order_by
-                .unwrap_or_default()
-                .into_iter()
-                .map(|x| x.into())
-                .collect::<Vec<_>>(),
-            window_frame,
-            null_treatment: None,
+            params: WindowFunctionParams {
+                args: args.into_iter().map(|x| x.expr).collect::<Vec<_>>(),
+                partition_by: partition_by
+                    .unwrap_or_default()
+                    .into_iter()
+                    .map(|x| x.expr)
+                    .collect::<Vec<_>>(),
+                order_by: order_by
+                    .unwrap_or_default()
+                    .into_iter()
+                    .map(|x| x.into())
+                    .collect::<Vec<_>>(),
+                window_frame,
+                null_treatment: None,
+            },
         }),
     })
 }

From e20884dd5715f4c939d8b9310fc1c52ee1ad1ea7 Mon Sep 17 00:00:00 2001
From: Tim Saucer <timsaucer@gmail.com>
Date: Fri, 21 Feb 2025 13:04:40 -0500
Subject: [PATCH 2/9] Initial commit for scalar udf pycapsule

---
 src/udf.rs | 26 ++++++++++++++++++++++++--
 1 file changed, 24 insertions(+), 2 deletions(-)

diff --git a/src/udf.rs b/src/udf.rs
index 574c9d7b5..3102b418b 100644
--- a/src/udf.rs
+++ b/src/udf.rs
@@ -17,6 +17,8 @@
 
 use std::sync::Arc;
 
+use datafusion_ffi::udf::{FFI_ScalarUDF, ForeignScalarUDF};
+use pyo3::types::PyCapsule;
 use pyo3::{prelude::*, types::PyTuple};
 
 use datafusion::arrow::array::{make_array, Array, ArrayData, ArrayRef};
@@ -28,9 +30,9 @@ use datafusion::logical_expr::function::ScalarFunctionImplementation;
 use datafusion::logical_expr::ScalarUDF;
 use datafusion::logical_expr::{create_udf, ColumnarValue};
 
-use crate::errors::to_datafusion_err;
+use crate::errors::{py_datafusion_err, to_datafusion_err};
 use crate::expr::PyExpr;
-use crate::utils::parse_volatility;
+use crate::utils::{parse_volatility, validate_pycapsule};
 
 /// Create a Rust callable function from a python function that expects pyarrow arrays
 fn pyarrow_function_to_rust(
@@ -105,6 +107,26 @@ impl PyScalarUDF {
         Ok(Self { function })
     }
 
+    #[staticmethod]
+    fn from_ffi(func: Bound<PyAny>) -> PyResult<Self> {
+        if func.hasattr("__datafusion_scalar_udf__")? {
+            let capsule = func.getattr("__datafusion_scalar_udf__")?.call0()?;
+            let capsule = capsule.downcast::<PyCapsule>()?;
+            validate_pycapsule(capsule, "datafusion_scalar_udf")?;
+
+            let func = unsafe { capsule.reference::<FFI_ScalarUDF>() };
+            let func: ForeignScalarUDF = func.try_into().map_err(py_datafusion_err)?;
+
+            Ok(Self {
+                function: ScalarUDF::from(func),
+            })
+        } else {
+            Err(py_datafusion_err(
+                "__datafusion_table_provider__ does not exist on Table Provider object.",
+            ))
+        }
+    }
+
     /// creates a new PyExpr with the call of the udf
     #[pyo3(signature = (*args))]
     fn __call__(&self, args: Vec<PyExpr>) -> PyResult<PyExpr> {

From c6415f8af6f70fa82629a5d7d3846890a0234b7a Mon Sep 17 00:00:00 2001
From: Tim Saucer <timsaucer@gmail.com>
Date: Fri, 21 Feb 2025 13:40:38 -0500
Subject: [PATCH 3/9] Adding python wrapper classes for FFI scalar udf

---
 python/datafusion/udf.py | 38 ++++++++++++++++++++++++++++----------
 1 file changed, 28 insertions(+), 10 deletions(-)

diff --git a/python/datafusion/udf.py b/python/datafusion/udf.py
index c97f453d0..f87c4f369 100644
--- a/python/datafusion/udf.py
+++ b/python/datafusion/udf.py
@@ -21,7 +21,7 @@
 
 from abc import ABCMeta, abstractmethod
 from enum import Enum
-from typing import TYPE_CHECKING, Callable, List, Optional, TypeVar
+from typing import TYPE_CHECKING, Callable, List, Optional, Protocol, TypeVar
 
 import pyarrow
 
@@ -76,6 +76,15 @@ def __str__(self):
         return self.name.lower()
 
 
+class ScalarUDFExportable(Protocol):
+    """Type hint for object that has __datafusion_scalar_udf__ PyCapsule.
+
+    https://datafusion.apache.org/python/user-guide/common-operations/udf-and-udfa.html
+    """
+
+    def __datafusion_scalar_udf__(self) -> object: ...  # noqa: D105
+
+
 class ScalarUDF:
     """Class for performing scalar user-defined functions (UDF).
 
@@ -86,20 +95,23 @@ class ScalarUDF:
     def __init__(
         self,
         name: Optional[str],
-        func: Callable[..., _R],
-        input_types: pyarrow.DataType | list[pyarrow.DataType],
-        return_type: _R,
-        volatility: Volatility | str,
+        func: Callable[..., _R] | df_internal.ScalarUDF,
+        input_types: pyarrow.DataType | list[pyarrow.DataType] | None,
+        return_type: Optional[_R],
+        volatility: Volatility | str | None,
     ) -> None:
         """Instantiate a scalar user-defined function (UDF).
 
         See helper method :py:func:`udf` for argument details.
         """
-        if isinstance(input_types, pyarrow.DataType):
-            input_types = [input_types]
-        self._udf = df_internal.ScalarUDF(
-            name, func, input_types, return_type, str(volatility)
-        )
+        if isinstance(func, df_internal.ScalarUDF):
+            self._udf = func
+        else:
+            if isinstance(input_types, pyarrow.DataType):
+                input_types = [input_types]
+            self._udf = df_internal.ScalarUDF(
+                name, func, input_types, return_type, str(volatility)
+            )
 
     def __call__(self, *args: Expr) -> Expr:
         """Execute the UDF.
@@ -110,6 +122,12 @@ def __call__(self, *args: Expr) -> Expr:
         args_raw = [arg.expr for arg in args]
         return Expr(self._udf.__call__(*args_raw))
 
+    @staticmethod
+    def from_ffi(func: ScalarUDFExportable) -> ScalarUDF:
+        """Create a User-Defined Function from a provided PyCapsule."""
+        udf = df_internal.ScalarUDF.from_ffi(func)
+        return ScalarUDF(None, udf, None, None, None)
+
     @staticmethod
     def udf(
         func: Callable[..., _R],

From 4496639c9137a0eb29fc46edc0105513b364c7cd Mon Sep 17 00:00:00 2001
From: Tim Saucer <timsaucer@gmail.com>
Date: Fri, 21 Feb 2025 13:41:35 -0500
Subject: [PATCH 4/9] Renaming folder for ffi example since we will be using it
 for more than table provider

---
 examples/{ffi-table-provider => ffi-library}/.cargo/config.toml   | 0
 examples/{ffi-table-provider => ffi-library}/Cargo.lock           | 0
 examples/{ffi-table-provider => ffi-library}/Cargo.toml           | 0
 examples/{ffi-table-provider => ffi-library}/build.rs             | 0
 examples/{ffi-table-provider => ffi-library}/pyproject.toml       | 0
 .../python/tests/_test_table_provider.py                          | 0
 examples/{ffi-table-provider => ffi-library}/src/lib.rs           | 0
 7 files changed, 0 insertions(+), 0 deletions(-)
 rename examples/{ffi-table-provider => ffi-library}/.cargo/config.toml (100%)
 rename examples/{ffi-table-provider => ffi-library}/Cargo.lock (100%)
 rename examples/{ffi-table-provider => ffi-library}/Cargo.toml (100%)
 rename examples/{ffi-table-provider => ffi-library}/build.rs (100%)
 rename examples/{ffi-table-provider => ffi-library}/pyproject.toml (100%)
 rename examples/{ffi-table-provider => ffi-library}/python/tests/_test_table_provider.py (100%)
 rename examples/{ffi-table-provider => ffi-library}/src/lib.rs (100%)

diff --git a/examples/ffi-table-provider/.cargo/config.toml b/examples/ffi-library/.cargo/config.toml
similarity index 100%
rename from examples/ffi-table-provider/.cargo/config.toml
rename to examples/ffi-library/.cargo/config.toml
diff --git a/examples/ffi-table-provider/Cargo.lock b/examples/ffi-library/Cargo.lock
similarity index 100%
rename from examples/ffi-table-provider/Cargo.lock
rename to examples/ffi-library/Cargo.lock
diff --git a/examples/ffi-table-provider/Cargo.toml b/examples/ffi-library/Cargo.toml
similarity index 100%
rename from examples/ffi-table-provider/Cargo.toml
rename to examples/ffi-library/Cargo.toml
diff --git a/examples/ffi-table-provider/build.rs b/examples/ffi-library/build.rs
similarity index 100%
rename from examples/ffi-table-provider/build.rs
rename to examples/ffi-library/build.rs
diff --git a/examples/ffi-table-provider/pyproject.toml b/examples/ffi-library/pyproject.toml
similarity index 100%
rename from examples/ffi-table-provider/pyproject.toml
rename to examples/ffi-library/pyproject.toml
diff --git a/examples/ffi-table-provider/python/tests/_test_table_provider.py b/examples/ffi-library/python/tests/_test_table_provider.py
similarity index 100%
rename from examples/ffi-table-provider/python/tests/_test_table_provider.py
rename to examples/ffi-library/python/tests/_test_table_provider.py
diff --git a/examples/ffi-table-provider/src/lib.rs b/examples/ffi-library/src/lib.rs
similarity index 100%
rename from examples/ffi-table-provider/src/lib.rs
rename to examples/ffi-library/src/lib.rs

From 0db0bf655e2e0976d4d49c1109501ffe4b83fb37 Mon Sep 17 00:00:00 2001
From: Tim Saucer <timsaucer@gmail.com>
Date: Fri, 21 Feb 2025 13:52:57 -0500
Subject: [PATCH 5/9] Move table provider out of lib so we can add in scalar
 udf without confusion

---
 examples/ffi-library/Cargo.lock               | 101 ++++++++++------
 examples/ffi-library/Cargo.toml               |   4 +-
 examples/ffi-library/pyproject.toml           |   2 +-
 .../python/tests/_test_table_provider.py      |   2 +-
 examples/ffi-library/src/lib.rs               |  97 +---------------
 examples/ffi-library/src/table_provider.rs    | 109 ++++++++++++++++++
 6 files changed, 180 insertions(+), 135 deletions(-)
 create mode 100644 examples/ffi-library/src/table_provider.rs

diff --git a/examples/ffi-library/Cargo.lock b/examples/ffi-library/Cargo.lock
index 32af85180..58b7ca362 100644
--- a/examples/ffi-library/Cargo.lock
+++ b/examples/ffi-library/Cargo.lock
@@ -766,7 +766,8 @@ dependencies = [
 [[package]]
 name = "datafusion"
 version = "45.0.0"
-source = "git+https://github.com/apache/datafusion.git?rev=1a29bd3#1a29bd3b62f1759c557aca9eed937ac38f5a5602"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "eae420e7a5b0b7f1c39364cc76cbcd0f5fdc416b2514ae3847c2676bbd60702a"
 dependencies = [
  "arrow",
  "arrow-array",
@@ -816,7 +817,8 @@ dependencies = [
 [[package]]
 name = "datafusion-catalog"
 version = "45.0.0"
-source = "git+https://github.com/apache/datafusion.git?rev=1a29bd3#1a29bd3b62f1759c557aca9eed937ac38f5a5602"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6f27987bc22b810939e8dfecc55571e9d50355d6ea8ec1c47af8383a76a6d0e1"
 dependencies = [
  "arrow",
  "async-trait",
@@ -836,7 +838,8 @@ dependencies = [
 [[package]]
 name = "datafusion-common"
 version = "45.0.0"
-source = "git+https://github.com/apache/datafusion.git?rev=1a29bd3#1a29bd3b62f1759c557aca9eed937ac38f5a5602"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e3f6d5b8c9408cc692f7c194b8aa0c0f9b253e065a8d960ad9cdc2a13e697602"
 dependencies = [
  "ahash",
  "arrow",
@@ -862,7 +865,8 @@ dependencies = [
 [[package]]
 name = "datafusion-common-runtime"
 version = "45.0.0"
-source = "git+https://github.com/apache/datafusion.git?rev=1a29bd3#1a29bd3b62f1759c557aca9eed937ac38f5a5602"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0d4603c8e8a4baf77660ab7074cc66fc15cc8a18f2ce9dfadb755fc6ee294e48"
 dependencies = [
  "log",
  "tokio",
@@ -871,12 +875,14 @@ dependencies = [
 [[package]]
 name = "datafusion-doc"
 version = "45.0.0"
-source = "git+https://github.com/apache/datafusion.git?rev=1a29bd3#1a29bd3b62f1759c557aca9eed937ac38f5a5602"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e5bf4bc68623a5cf231eed601ed6eb41f46a37c4d15d11a0bff24cbc8396cd66"
 
 [[package]]
 name = "datafusion-execution"
 version = "45.0.0"
-source = "git+https://github.com/apache/datafusion.git?rev=1a29bd3#1a29bd3b62f1759c557aca9eed937ac38f5a5602"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "88b491c012cdf8e051053426013429a76f74ee3c2db68496c79c323ca1084d27"
 dependencies = [
  "arrow",
  "dashmap",
@@ -894,7 +900,8 @@ dependencies = [
 [[package]]
 name = "datafusion-expr"
 version = "45.0.0"
-source = "git+https://github.com/apache/datafusion.git?rev=1a29bd3#1a29bd3b62f1759c557aca9eed937ac38f5a5602"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e5a181408d4fc5dc22f9252781a8f39f2d0e5d1b33ec9bde242844980a2689c1"
 dependencies = [
  "arrow",
  "chrono",
@@ -914,7 +921,8 @@ dependencies = [
 [[package]]
 name = "datafusion-expr-common"
 version = "45.0.0"
-source = "git+https://github.com/apache/datafusion.git?rev=1a29bd3#1a29bd3b62f1759c557aca9eed937ac38f5a5602"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d1129b48e8534d8c03c6543bcdccef0b55c8ac0c1272a15a56c67068b6eb1885"
 dependencies = [
  "arrow",
  "datafusion-common",
@@ -925,7 +933,8 @@ dependencies = [
 [[package]]
 name = "datafusion-ffi"
 version = "45.0.0"
-source = "git+https://github.com/apache/datafusion.git?rev=1a29bd3#1a29bd3b62f1759c557aca9eed937ac38f5a5602"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ff47a79d442207c168c6e3e1d970c248589c148e4800e5b285ac1b2cb1a230f8"
 dependencies = [
  "abi_stable",
  "arrow",
@@ -942,10 +951,24 @@ dependencies = [
  "tokio",
 ]
 
+[[package]]
+name = "datafusion-ffi-library"
+version = "0.1.0"
+dependencies = [
+ "arrow",
+ "arrow-array",
+ "arrow-schema",
+ "datafusion",
+ "datafusion-ffi",
+ "pyo3",
+ "pyo3-build-config",
+]
+
 [[package]]
 name = "datafusion-functions"
 version = "45.0.0"
-source = "git+https://github.com/apache/datafusion.git?rev=1a29bd3#1a29bd3b62f1759c557aca9eed937ac38f5a5602"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6125874e4856dfb09b59886784fcb74cde5cfc5930b3a80a1a728ef7a010df6b"
 dependencies = [
  "arrow",
  "arrow-buffer",
@@ -974,7 +997,8 @@ dependencies = [
 [[package]]
 name = "datafusion-functions-aggregate"
 version = "45.0.0"
-source = "git+https://github.com/apache/datafusion.git?rev=1a29bd3#1a29bd3b62f1759c557aca9eed937ac38f5a5602"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f3add7b1d3888e05e7c95f2b281af900ca69ebdcb21069ba679b33bde8b3b9d6"
 dependencies = [
  "ahash",
  "arrow",
@@ -996,7 +1020,8 @@ dependencies = [
 [[package]]
 name = "datafusion-functions-aggregate-common"
 version = "45.0.0"
-source = "git+https://github.com/apache/datafusion.git?rev=1a29bd3#1a29bd3b62f1759c557aca9eed937ac38f5a5602"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6e18baa4cfc3d2f144f74148ed68a1f92337f5072b6dde204a0dbbdf3324989c"
 dependencies = [
  "ahash",
  "arrow",
@@ -1008,7 +1033,8 @@ dependencies = [
 [[package]]
 name = "datafusion-functions-nested"
 version = "45.0.0"
-source = "git+https://github.com/apache/datafusion.git?rev=1a29bd3#1a29bd3b62f1759c557aca9eed937ac38f5a5602"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3ec5ee8cecb0dc370291279673097ddabec03a011f73f30d7f1096457127e03e"
 dependencies = [
  "arrow",
  "arrow-array",
@@ -1031,7 +1057,8 @@ dependencies = [
 [[package]]
 name = "datafusion-functions-table"
 version = "45.0.0"
-source = "git+https://github.com/apache/datafusion.git?rev=1a29bd3#1a29bd3b62f1759c557aca9eed937ac38f5a5602"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2c403ddd473bbb0952ba880008428b3c7febf0ed3ce1eec35a205db20efb2a36"
 dependencies = [
  "arrow",
  "async-trait",
@@ -1046,7 +1073,8 @@ dependencies = [
 [[package]]
 name = "datafusion-functions-window"
 version = "45.0.0"
-source = "git+https://github.com/apache/datafusion.git?rev=1a29bd3#1a29bd3b62f1759c557aca9eed937ac38f5a5602"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1ab18c2fb835614d06a75f24a9e09136d3a8c12a92d97c95a6af316a1787a9c5"
 dependencies = [
  "datafusion-common",
  "datafusion-doc",
@@ -1062,7 +1090,8 @@ dependencies = [
 [[package]]
 name = "datafusion-functions-window-common"
 version = "45.0.0"
-source = "git+https://github.com/apache/datafusion.git?rev=1a29bd3#1a29bd3b62f1759c557aca9eed937ac38f5a5602"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a77b73bc15e7d1967121fdc7a55d819bfb9d6c03766a6c322247dce9094a53a4"
 dependencies = [
  "datafusion-common",
  "datafusion-physical-expr-common",
@@ -1071,7 +1100,8 @@ dependencies = [
 [[package]]
 name = "datafusion-macros"
 version = "45.0.0"
-source = "git+https://github.com/apache/datafusion.git?rev=1a29bd3#1a29bd3b62f1759c557aca9eed937ac38f5a5602"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "09369b8d962291e808977cf94d495fd8b5b38647232d7ef562c27ac0f495b0af"
 dependencies = [
  "datafusion-expr",
  "quote",
@@ -1081,7 +1111,8 @@ dependencies = [
 [[package]]
 name = "datafusion-optimizer"
 version = "45.0.0"
-source = "git+https://github.com/apache/datafusion.git?rev=1a29bd3#1a29bd3b62f1759c557aca9eed937ac38f5a5602"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2403a7e4a84637f3de7d8d4d7a9ccc0cc4be92d89b0161ba3ee5be82f0531c54"
 dependencies = [
  "arrow",
  "chrono",
@@ -1099,7 +1130,8 @@ dependencies = [
 [[package]]
 name = "datafusion-physical-expr"
 version = "45.0.0"
-source = "git+https://github.com/apache/datafusion.git?rev=1a29bd3#1a29bd3b62f1759c557aca9eed937ac38f5a5602"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "86ff72ac702b62dbf2650c4e1d715ebd3e4aab14e3885e72e8549e250307347c"
 dependencies = [
  "ahash",
  "arrow",
@@ -1123,7 +1155,8 @@ dependencies = [
 [[package]]
 name = "datafusion-physical-expr-common"
 version = "45.0.0"
-source = "git+https://github.com/apache/datafusion.git?rev=1a29bd3#1a29bd3b62f1759c557aca9eed937ac38f5a5602"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "60982b7d684e25579ee29754b4333057ed62e2cc925383c5f0bd8cab7962f435"
 dependencies = [
  "ahash",
  "arrow",
@@ -1137,7 +1170,8 @@ dependencies = [
 [[package]]
 name = "datafusion-physical-optimizer"
 version = "45.0.0"
-source = "git+https://github.com/apache/datafusion.git?rev=1a29bd3#1a29bd3b62f1759c557aca9eed937ac38f5a5602"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ac5e85c189d5238a5cf181a624e450c4cd4c66ac77ca551d6f3ff9080bac90bb"
 dependencies = [
  "arrow",
  "arrow-schema",
@@ -1158,7 +1192,8 @@ dependencies = [
 [[package]]
 name = "datafusion-physical-plan"
 version = "45.0.0"
-source = "git+https://github.com/apache/datafusion.git?rev=1a29bd3#1a29bd3b62f1759c557aca9eed937ac38f5a5602"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c36bf163956d7e2542657c78b3383fdc78f791317ef358a359feffcdb968106f"
 dependencies = [
  "ahash",
  "arrow",
@@ -1189,7 +1224,8 @@ dependencies = [
 [[package]]
 name = "datafusion-proto"
 version = "45.0.0"
-source = "git+https://github.com/apache/datafusion.git?rev=1a29bd3#1a29bd3b62f1759c557aca9eed937ac38f5a5602"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2db5d79f0c974041787b899d24dc91bdab2ff112d1942dd71356a4ce3b407e6c"
 dependencies = [
  "arrow",
  "chrono",
@@ -1204,7 +1240,8 @@ dependencies = [
 [[package]]
 name = "datafusion-proto-common"
 version = "45.0.0"
-source = "git+https://github.com/apache/datafusion.git?rev=1a29bd3#1a29bd3b62f1759c557aca9eed937ac38f5a5602"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "de21bde1603aac0ff32cf478e47081be6e3583c6861fe8f57034da911efe7578"
 dependencies = [
  "arrow",
  "datafusion-common",
@@ -1214,7 +1251,8 @@ dependencies = [
 [[package]]
 name = "datafusion-sql"
 version = "45.0.0"
-source = "git+https://github.com/apache/datafusion.git?rev=1a29bd3#1a29bd3b62f1759c557aca9eed937ac38f5a5602"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e13caa4daede211ecec53c78b13c503b592794d125f9a3cc3afe992edf9e7f43"
 dependencies = [
  "arrow",
  "arrow-array",
@@ -1279,19 +1317,6 @@ version = "2.2.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "486f806e73c5707928240ddc295403b1b93c96a02038563881c4a2fd84b81ac4"
 
-[[package]]
-name = "ffi-table-provider"
-version = "0.1.0"
-dependencies = [
- "arrow",
- "arrow-array",
- "arrow-schema",
- "datafusion",
- "datafusion-ffi",
- "pyo3",
- "pyo3-build-config",
-]
-
 [[package]]
 name = "fixedbitset"
 version = "0.5.7"
diff --git a/examples/ffi-library/Cargo.toml b/examples/ffi-library/Cargo.toml
index 0e558fdd0..4ee076973 100644
--- a/examples/ffi-library/Cargo.toml
+++ b/examples/ffi-library/Cargo.toml
@@ -16,7 +16,7 @@
 # under the License.
 
 [package]
-name = "ffi-table-provider"
+name = "datafusion-ffi-library"
 version = "0.1.0"
 edition = "2021"
 
@@ -32,5 +32,5 @@ arrow-schema = { version = "54" }
 pyo3-build-config = "0.23"
 
 [lib]
-name = "ffi_table_provider"
+name = "datafusion_ffi_library"
 crate-type = ["cdylib", "rlib"]
diff --git a/examples/ffi-library/pyproject.toml b/examples/ffi-library/pyproject.toml
index 116efae9c..ca31d709f 100644
--- a/examples/ffi-library/pyproject.toml
+++ b/examples/ffi-library/pyproject.toml
@@ -20,7 +20,7 @@ requires = ["maturin>=1.6,<2.0"]
 build-backend = "maturin"
 
 [project]
-name = "ffi_table_provider"
+name = "datafusion_ffi_library"
 requires-python = ">=3.8"
 classifiers = [
     "Programming Language :: Rust",
diff --git a/examples/ffi-library/python/tests/_test_table_provider.py b/examples/ffi-library/python/tests/_test_table_provider.py
index 0db3ec561..6687a0c01 100644
--- a/examples/ffi-library/python/tests/_test_table_provider.py
+++ b/examples/ffi-library/python/tests/_test_table_provider.py
@@ -17,7 +17,7 @@
 
 import pyarrow as pa
 from datafusion import SessionContext
-from ffi_table_provider import MyTableProvider
+from datafusion_ffi_library import MyTableProvider
 
 
 def test_table_loading():
diff --git a/examples/ffi-library/src/lib.rs b/examples/ffi-library/src/lib.rs
index 88deeece2..b1809f73d 100644
--- a/examples/ffi-library/src/lib.rs
+++ b/examples/ffi-library/src/lib.rs
@@ -15,101 +15,12 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use std::{ffi::CString, sync::Arc};
-
-use arrow_array::ArrayRef;
-use datafusion::{
-    arrow::{
-        array::RecordBatch,
-        datatypes::{DataType, Field, Schema},
-    },
-    datasource::MemTable,
-    error::{DataFusionError, Result},
-};
-use datafusion_ffi::table_provider::FFI_TableProvider;
-use pyo3::{exceptions::PyRuntimeError, prelude::*, types::PyCapsule};
-
-/// In order to provide a test that demonstrates different sized record batches,
-/// the first batch will have num_rows, the second batch num_rows+1, and so on.
-#[pyclass(name = "MyTableProvider", module = "ffi_table_provider", subclass)]
-#[derive(Clone)]
-struct MyTableProvider {
-    num_cols: usize,
-    num_rows: usize,
-    num_batches: usize,
-}
-
-fn create_record_batch(
-    schema: &Arc<Schema>,
-    num_cols: usize,
-    start_value: i32,
-    num_values: usize,
-) -> Result<RecordBatch> {
-    let end_value = start_value + num_values as i32;
-    let row_values: Vec<i32> = (start_value..end_value).collect();
-
-    let columns: Vec<_> = (0..num_cols)
-        .map(|_| {
-            std::sync::Arc::new(arrow::array::Int32Array::from(row_values.clone())) as ArrayRef
-        })
-        .collect();
-
-    RecordBatch::try_new(Arc::clone(schema), columns).map_err(DataFusionError::from)
-}
-
-impl MyTableProvider {
-    fn create_table(&self) -> Result<MemTable> {
-        let fields: Vec<_> = (0..self.num_cols)
-            .map(|idx| (b'A' + idx as u8) as char)
-            .map(|col_name| Field::new(col_name, DataType::Int32, true))
-            .collect();
-
-        let schema = Arc::new(Schema::new(fields));
-
-        let batches: Result<Vec<_>> = (0..self.num_batches)
-            .map(|batch_idx| {
-                let start_value = batch_idx * self.num_rows;
-                create_record_batch(
-                    &schema,
-                    self.num_cols,
-                    start_value as i32,
-                    self.num_rows + batch_idx,
-                )
-            })
-            .collect();
-
-        MemTable::try_new(schema, vec![batches?])
-    }
-}
-
-#[pymethods]
-impl MyTableProvider {
-    #[new]
-    fn new(num_cols: usize, num_rows: usize, num_batches: usize) -> Self {
-        Self {
-            num_cols,
-            num_rows,
-            num_batches,
-        }
-    }
-
-    fn __datafusion_table_provider__<'py>(
-        &self,
-        py: Python<'py>,
-    ) -> PyResult<Bound<'py, PyCapsule>> {
-        let name = CString::new("datafusion_table_provider").unwrap();
-
-        let provider = self
-            .create_table()
-            .map_err(|e| PyRuntimeError::new_err(e.to_string()))?;
-        let provider = FFI_TableProvider::new(Arc::new(provider), false, None);
-
-        PyCapsule::new_bound(py, provider, Some(name.clone()))
-    }
-}
+use pyo3::prelude::*;
+use table_provider::MyTableProvider;
+mod table_provider;
 
 #[pymodule]
-fn ffi_table_provider(m: &Bound<'_, PyModule>) -> PyResult<()> {
+fn datafusion_ffi_library(m: &Bound<'_, PyModule>) -> PyResult<()> {
     m.add_class::<MyTableProvider>()?;
     Ok(())
 }
diff --git a/examples/ffi-library/src/table_provider.rs b/examples/ffi-library/src/table_provider.rs
new file mode 100644
index 000000000..ed4b4a16b
--- /dev/null
+++ b/examples/ffi-library/src/table_provider.rs
@@ -0,0 +1,109 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use std::{ffi::CString, sync::Arc};
+
+use arrow_array::ArrayRef;
+use datafusion::{
+    arrow::{
+        array::RecordBatch,
+        datatypes::{DataType, Field, Schema},
+    },
+    datasource::MemTable,
+    error::{DataFusionError, Result},
+};
+use datafusion_ffi::table_provider::FFI_TableProvider;
+use pyo3::{exceptions::PyRuntimeError, prelude::*, types::PyCapsule};
+
+/// In order to provide a test that demonstrates different sized record batches,
+/// the first batch will have num_rows, the second batch num_rows+1, and so on.
+#[pyclass(name = "MyTableProvider", module = "ffi_table_provider", subclass)]
+#[derive(Clone)]
+pub struct MyTableProvider {
+    num_cols: usize,
+    num_rows: usize,
+    num_batches: usize,
+}
+
+fn create_record_batch(
+    schema: &Arc<Schema>,
+    num_cols: usize,
+    start_value: i32,
+    num_values: usize,
+) -> Result<RecordBatch> {
+    let end_value = start_value + num_values as i32;
+    let row_values: Vec<i32> = (start_value..end_value).collect();
+
+    let columns: Vec<_> = (0..num_cols)
+        .map(|_| {
+            std::sync::Arc::new(arrow::array::Int32Array::from(row_values.clone())) as ArrayRef
+        })
+        .collect();
+
+    RecordBatch::try_new(Arc::clone(schema), columns).map_err(DataFusionError::from)
+}
+
+impl MyTableProvider {
+    fn create_table(&self) -> Result<MemTable> {
+        let fields: Vec<_> = (0..self.num_cols)
+            .map(|idx| (b'A' + idx as u8) as char)
+            .map(|col_name| Field::new(col_name, DataType::Int32, true))
+            .collect();
+
+        let schema = Arc::new(Schema::new(fields));
+
+        let batches: Result<Vec<_>> = (0..self.num_batches)
+            .map(|batch_idx| {
+                let start_value = batch_idx * self.num_rows;
+                create_record_batch(
+                    &schema,
+                    self.num_cols,
+                    start_value as i32,
+                    self.num_rows + batch_idx,
+                )
+            })
+            .collect();
+
+        MemTable::try_new(schema, vec![batches?])
+    }
+}
+
+#[pymethods]
+impl MyTableProvider {
+    #[new]
+    fn new(num_cols: usize, num_rows: usize, num_batches: usize) -> Self {
+        Self {
+            num_cols,
+            num_rows,
+            num_batches,
+        }
+    }
+
+    fn __datafusion_table_provider__<'py>(
+        &self,
+        py: Python<'py>,
+    ) -> PyResult<Bound<'py, PyCapsule>> {
+        let name = CString::new("datafusion_table_provider").unwrap();
+
+        let provider = self
+            .create_table()
+            .map_err(|e| PyRuntimeError::new_err(e.to_string()))?;
+        let provider = FFI_TableProvider::new(Arc::new(provider), false, None);
+
+        PyCapsule::new(py, provider, Some(name.clone()))
+    }
+}

From 345897cf161c598f5fc97bc468375e4aa7fb47b8 Mon Sep 17 00:00:00 2001
From: Tim Saucer <timsaucer@gmail.com>
Date: Fri, 21 Feb 2025 14:36:02 -0500
Subject: [PATCH 6/9] Add integration test with scalar udf

---
 examples/ffi-library/Cargo.lock               | 307 ++++++++++--------
 examples/ffi-library/Cargo.toml               |  11 +-
 .../python/tests/_test_scalar_udf.py          |  41 +++
 examples/ffi-library/src/lib.rs               |   6 +-
 examples/ffi-library/src/scalar_udf.rs        |  70 ++++
 examples/ffi-library/src/table_provider.rs    |   2 +-
 python/datafusion/__init__.py                 |   1 +
 python/datafusion/udf.py                      |   4 +-
 src/udf.rs                                    |   4 +-
 9 files changed, 306 insertions(+), 140 deletions(-)
 create mode 100644 examples/ffi-library/python/tests/_test_scalar_udf.py
 create mode 100644 examples/ffi-library/src/scalar_udf.rs

diff --git a/examples/ffi-library/Cargo.lock b/examples/ffi-library/Cargo.lock
index 58b7ca362..cf9dbf411 100644
--- a/examples/ffi-library/Cargo.lock
+++ b/examples/ffi-library/Cargo.lock
@@ -73,7 +73,7 @@ checksum = "e89da841a80418a9b391ebaea17f5c112ffaaa96f621d2c285b5174da76b9011"
 dependencies = [
  "cfg-if",
  "const-random",
- "getrandom",
+ "getrandom 0.2.15",
  "once_cell",
  "version_check",
  "zerocopy",
@@ -144,9 +144,9 @@ checksum = "7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50"
 
 [[package]]
 name = "arrow"
-version = "54.1.0"
+version = "54.2.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6422e12ac345a0678d7a17e316238e3a40547ae7f92052b77bd86d5e0239f3fc"
+checksum = "755b6da235ac356a869393c23668c663720b8749dd6f15e52b6c214b4b964cc7"
 dependencies = [
  "arrow-arith",
  "arrow-array",
@@ -165,9 +165,9 @@ dependencies = [
 
 [[package]]
 name = "arrow-arith"
-version = "54.1.0"
+version = "54.2.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "23cf34bb1f48c41d3475927bcc7be498665b8e80b379b88f62a840337f8b8248"
+checksum = "64656a1e0b13ca766f8440752e9a93e11014eec7b67909986f83ed0ab1fe37b8"
 dependencies = [
  "arrow-array",
  "arrow-buffer",
@@ -179,9 +179,9 @@ dependencies = [
 
 [[package]]
 name = "arrow-array"
-version = "54.1.0"
+version = "54.2.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "fb4a06d507f54b70a277be22a127c8ffe0cec6cd98c0ad8a48e77779bbda8223"
+checksum = "57a4a6d2896083cfbdf84a71a863b22460d0708f8206a8373c52e326cc72ea1a"
 dependencies = [
  "ahash",
  "arrow-buffer",
@@ -196,9 +196,9 @@ dependencies = [
 
 [[package]]
 name = "arrow-buffer"
-version = "54.1.0"
+version = "54.2.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d69d326d5ad1cb82dcefa9ede3fee8fdca98f9982756b16f9cb142f4aa6edc89"
+checksum = "cef870583ce5e4f3b123c181706f2002fb134960f9a911900f64ba4830c7a43a"
 dependencies = [
  "bytes",
  "half",
@@ -207,9 +207,9 @@ dependencies = [
 
 [[package]]
 name = "arrow-cast"
-version = "54.1.0"
+version = "54.2.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "626e65bd42636a84a238bed49d09c8777e3d825bf81f5087a70111c2831d9870"
+checksum = "1ac7eba5a987f8b4a7d9629206ba48e19a1991762795bbe5d08497b7736017ee"
 dependencies = [
  "arrow-array",
  "arrow-buffer",
@@ -228,9 +228,9 @@ dependencies = [
 
 [[package]]
 name = "arrow-csv"
-version = "54.1.0"
+version = "54.2.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "71c8f959f7a1389b1dbd883cdcd37c3ed12475329c111912f7f69dad8195d8c6"
+checksum = "90f12542b8164398fc9ec595ff783c4cf6044daa89622c5a7201be920e4c0d4c"
 dependencies = [
  "arrow-array",
  "arrow-cast",
@@ -244,9 +244,9 @@ dependencies = [
 
 [[package]]
 name = "arrow-data"
-version = "54.1.0"
+version = "54.2.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1858e7c7d01c44cf71c21a85534fd1a54501e8d60d1195d0d6fbcc00f4b10754"
+checksum = "b095e8a4f3c309544935d53e04c3bfe4eea4e71c3de6fe0416d1f08bb4441a83"
 dependencies = [
  "arrow-buffer",
  "arrow-schema",
@@ -256,9 +256,9 @@ dependencies = [
 
 [[package]]
 name = "arrow-ipc"
-version = "54.1.0"
+version = "54.2.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a6bb3f727f049884c7603f0364bc9315363f356b59e9f605ea76541847e06a1e"
+checksum = "65c63da4afedde2b25ef69825cd4663ca76f78f79ffe2d057695742099130ff6"
 dependencies = [
  "arrow-array",
  "arrow-buffer",
@@ -270,9 +270,9 @@ dependencies = [
 
 [[package]]
 name = "arrow-json"
-version = "54.1.0"
+version = "54.2.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "35de94f165ed8830aede72c35f238763794f0d49c69d30c44d49c9834267ff8c"
+checksum = "9551d9400532f23a370cabbea1dc5a53c49230397d41f96c4c8eedf306199305"
 dependencies = [
  "arrow-array",
  "arrow-buffer",
@@ -290,9 +290,9 @@ dependencies = [
 
 [[package]]
 name = "arrow-ord"
-version = "54.1.0"
+version = "54.2.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8aa06e5f267dc53efbacb933485c79b6fc1685d3ffbe870a16ce4e696fb429da"
+checksum = "6c07223476f8219d1ace8cd8d85fa18c4ebd8d945013f25ef5c72e85085ca4ee"
 dependencies = [
  "arrow-array",
  "arrow-buffer",
@@ -303,9 +303,9 @@ dependencies = [
 
 [[package]]
 name = "arrow-row"
-version = "54.1.0"
+version = "54.2.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "66f1144bb456a2f9d82677bd3abcea019217e572fc8f07de5a7bac4b2c56eb2c"
+checksum = "91b194b38bfd89feabc23e798238989c6648b2506ad639be42ec8eb1658d82c4"
 dependencies = [
  "arrow-array",
  "arrow-buffer",
@@ -316,18 +316,18 @@ dependencies = [
 
 [[package]]
 name = "arrow-schema"
-version = "54.1.0"
+version = "54.2.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "105f01ec0090259e9a33a9263ec18ff223ab91a0ea9fbc18042f7e38005142f6"
+checksum = "0f40f6be8f78af1ab610db7d9b236e21d587b7168e368a36275d2e5670096735"
 dependencies = [
  "bitflags 2.6.0",
 ]
 
 [[package]]
 name = "arrow-select"
-version = "54.1.0"
+version = "54.2.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f690752fdbd2dee278b5f1636fefad8f2f7134c85e20fd59c4199e15a39a6807"
+checksum = "ac265273864a820c4a179fc67182ccc41ea9151b97024e1be956f0f2369c2539"
 dependencies = [
  "ahash",
  "arrow-array",
@@ -339,9 +339,9 @@ dependencies = [
 
 [[package]]
 name = "arrow-string"
-version = "54.1.0"
+version = "54.2.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d0fff9cd745a7039b66c47ecaf5954460f9fa12eed628f65170117ea93e64ee0"
+checksum = "d44c8eed43be4ead49128370f7131f054839d3d6003e52aebf64322470b8fbd0"
 dependencies = [
  "arrow-array",
  "arrow-buffer",
@@ -475,15 +475,16 @@ dependencies = [
 
 [[package]]
 name = "blake3"
-version = "1.5.4"
+version = "1.6.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d82033247fd8e890df8f740e407ad4d038debb9eb1f40533fffb32e7d17dc6f7"
+checksum = "1230237285e3e10cde447185e8975408ae24deaa67205ce684805c25bc0c7937"
 dependencies = [
  "arrayref",
  "arrayvec",
  "cc",
  "cfg-if",
  "constant_time_eq",
+ "memmap2",
 ]
 
 [[package]]
@@ -530,9 +531,9 @@ checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b"
 
 [[package]]
 name = "bytes"
-version = "1.8.0"
+version = "1.10.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9ac0150caa2ae65ca5bd83f25c7de183dea78d4d366469f148435e2acfbad0da"
+checksum = "f61dac84819c6588b558454b194026eb1f09c293b9036ae9b159e74e73ab6cf9"
 
 [[package]]
 name = "bzip2"
@@ -546,19 +547,18 @@ dependencies = [
 
 [[package]]
 name = "bzip2"
-version = "0.5.0"
+version = "0.5.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "bafdbf26611df8c14810e268ddceda071c297570a5fb360ceddf617fe417ef58"
+checksum = "75b89e7c29231c673a61a46e722602bcd138298f6b9e81e71119693534585f5c"
 dependencies = [
  "bzip2-sys",
- "libc",
 ]
 
 [[package]]
 name = "bzip2-sys"
-version = "0.1.11+1.0.8"
+version = "0.1.12+1.0.8"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "736a955f3fa7875102d57c82b8cac37ec45224a07fd32d58f9f7a186b6cd4cdc"
+checksum = "72ebc2f1a417f01e1da30ef264ee86ae31d2dcd2d603ea283d3c244a883ca2a9"
 dependencies = [
  "cc",
  "libc",
@@ -641,7 +641,7 @@ version = "0.1.16"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "f9d839f2a20b0aee515dc581a6172f2321f96cab76c1a38a4c584a194955390e"
 dependencies = [
- "getrandom",
+ "getrandom 0.2.15",
  "once_cell",
  "tiny-keccak",
 ]
@@ -766,28 +766,29 @@ dependencies = [
 [[package]]
 name = "datafusion"
 version = "45.0.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "eae420e7a5b0b7f1c39364cc76cbcd0f5fdc416b2514ae3847c2676bbd60702a"
+source = "git+https://github.com/apache/datafusion.git?rev=8ab0661a39bd69783b31b949e7a768fb518629e7#8ab0661a39bd69783b31b949e7a768fb518629e7"
 dependencies = [
  "arrow",
- "arrow-array",
  "arrow-ipc",
  "arrow-schema",
- "async-compression",
  "async-trait",
  "bytes",
- "bzip2 0.5.0",
+ "bzip2 0.5.1",
  "chrono",
  "datafusion-catalog",
+ "datafusion-catalog-listing",
  "datafusion-common",
  "datafusion-common-runtime",
+ "datafusion-datasource",
  "datafusion-execution",
  "datafusion-expr",
+ "datafusion-expr-common",
  "datafusion-functions",
  "datafusion-functions-aggregate",
  "datafusion-functions-nested",
  "datafusion-functions-table",
  "datafusion-functions-window",
+ "datafusion-macros",
  "datafusion-optimizer",
  "datafusion-physical-expr",
  "datafusion-physical-expr-common",
@@ -796,7 +797,6 @@ dependencies = [
  "datafusion-sql",
  "flate2",
  "futures",
- "glob",
  "itertools 0.14.0",
  "log",
  "object_store",
@@ -807,7 +807,6 @@ dependencies = [
  "sqlparser",
  "tempfile",
  "tokio",
- "tokio-util",
  "url",
  "uuid",
  "xz2",
@@ -817,8 +816,7 @@ dependencies = [
 [[package]]
 name = "datafusion-catalog"
 version = "45.0.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6f27987bc22b810939e8dfecc55571e9d50355d6ea8ec1c47af8383a76a6d0e1"
+source = "git+https://github.com/apache/datafusion.git?rev=8ab0661a39bd69783b31b949e7a768fb518629e7#8ab0661a39bd69783b31b949e7a768fb518629e7"
 dependencies = [
  "arrow",
  "async-trait",
@@ -832,21 +830,37 @@ dependencies = [
  "itertools 0.14.0",
  "log",
  "parking_lot",
- "sqlparser",
+]
+
+[[package]]
+name = "datafusion-catalog-listing"
+version = "45.0.0"
+source = "git+https://github.com/apache/datafusion.git?rev=8ab0661a39bd69783b31b949e7a768fb518629e7#8ab0661a39bd69783b31b949e7a768fb518629e7"
+dependencies = [
+ "arrow",
+ "async-trait",
+ "datafusion-catalog",
+ "datafusion-common",
+ "datafusion-datasource",
+ "datafusion-execution",
+ "datafusion-expr",
+ "datafusion-physical-expr",
+ "datafusion-physical-expr-common",
+ "datafusion-physical-plan",
+ "futures",
+ "log",
+ "object_store",
+ "tokio",
 ]
 
 [[package]]
 name = "datafusion-common"
 version = "45.0.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e3f6d5b8c9408cc692f7c194b8aa0c0f9b253e065a8d960ad9cdc2a13e697602"
+source = "git+https://github.com/apache/datafusion.git?rev=8ab0661a39bd69783b31b949e7a768fb518629e7#8ab0661a39bd69783b31b949e7a768fb518629e7"
 dependencies = [
  "ahash",
  "arrow",
- "arrow-array",
- "arrow-buffer",
  "arrow-ipc",
- "arrow-schema",
  "base64",
  "half",
  "hashbrown 0.14.5",
@@ -865,24 +879,52 @@ dependencies = [
 [[package]]
 name = "datafusion-common-runtime"
 version = "45.0.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0d4603c8e8a4baf77660ab7074cc66fc15cc8a18f2ce9dfadb755fc6ee294e48"
+source = "git+https://github.com/apache/datafusion.git?rev=8ab0661a39bd69783b31b949e7a768fb518629e7#8ab0661a39bd69783b31b949e7a768fb518629e7"
 dependencies = [
  "log",
  "tokio",
 ]
 
+[[package]]
+name = "datafusion-datasource"
+version = "45.0.0"
+source = "git+https://github.com/apache/datafusion.git?rev=8ab0661a39bd69783b31b949e7a768fb518629e7#8ab0661a39bd69783b31b949e7a768fb518629e7"
+dependencies = [
+ "arrow",
+ "async-compression",
+ "async-trait",
+ "bytes",
+ "bzip2 0.5.1",
+ "chrono",
+ "datafusion-catalog",
+ "datafusion-common",
+ "datafusion-common-runtime",
+ "datafusion-execution",
+ "datafusion-expr",
+ "datafusion-physical-plan",
+ "flate2",
+ "futures",
+ "glob",
+ "itertools 0.14.0",
+ "log",
+ "object_store",
+ "rand",
+ "tokio",
+ "tokio-util",
+ "url",
+ "xz2",
+ "zstd",
+]
+
 [[package]]
 name = "datafusion-doc"
 version = "45.0.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e5bf4bc68623a5cf231eed601ed6eb41f46a37c4d15d11a0bff24cbc8396cd66"
+source = "git+https://github.com/apache/datafusion.git?rev=8ab0661a39bd69783b31b949e7a768fb518629e7#8ab0661a39bd69783b31b949e7a768fb518629e7"
 
 [[package]]
 name = "datafusion-execution"
 version = "45.0.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "88b491c012cdf8e051053426013429a76f74ee3c2db68496c79c323ca1084d27"
+source = "git+https://github.com/apache/datafusion.git?rev=8ab0661a39bd69783b31b949e7a768fb518629e7#8ab0661a39bd69783b31b949e7a768fb518629e7"
 dependencies = [
  "arrow",
  "dashmap",
@@ -900,8 +942,7 @@ dependencies = [
 [[package]]
 name = "datafusion-expr"
 version = "45.0.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e5a181408d4fc5dc22f9252781a8f39f2d0e5d1b33ec9bde242844980a2689c1"
+source = "git+https://github.com/apache/datafusion.git?rev=8ab0661a39bd69783b31b949e7a768fb518629e7#8ab0661a39bd69783b31b949e7a768fb518629e7"
 dependencies = [
  "arrow",
  "chrono",
@@ -921,11 +962,11 @@ dependencies = [
 [[package]]
 name = "datafusion-expr-common"
 version = "45.0.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d1129b48e8534d8c03c6543bcdccef0b55c8ac0c1272a15a56c67068b6eb1885"
+source = "git+https://github.com/apache/datafusion.git?rev=8ab0661a39bd69783b31b949e7a768fb518629e7#8ab0661a39bd69783b31b949e7a768fb518629e7"
 dependencies = [
  "arrow",
  "datafusion-common",
+ "indexmap",
  "itertools 0.14.0",
  "paste",
 ]
@@ -933,13 +974,10 @@ dependencies = [
 [[package]]
 name = "datafusion-ffi"
 version = "45.0.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ff47a79d442207c168c6e3e1d970c248589c148e4800e5b285ac1b2cb1a230f8"
+source = "git+https://github.com/apache/datafusion.git?rev=8ab0661a39bd69783b31b949e7a768fb518629e7#8ab0661a39bd69783b31b949e7a768fb518629e7"
 dependencies = [
  "abi_stable",
  "arrow",
- "arrow-array",
- "arrow-schema",
  "async-ffi",
  "async-trait",
  "datafusion",
@@ -967,8 +1005,7 @@ dependencies = [
 [[package]]
 name = "datafusion-functions"
 version = "45.0.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6125874e4856dfb09b59886784fcb74cde5cfc5930b3a80a1a728ef7a010df6b"
+source = "git+https://github.com/apache/datafusion.git?rev=8ab0661a39bd69783b31b949e7a768fb518629e7#8ab0661a39bd69783b31b949e7a768fb518629e7"
 dependencies = [
  "arrow",
  "arrow-buffer",
@@ -982,7 +1019,6 @@ dependencies = [
  "datafusion-expr",
  "datafusion-expr-common",
  "datafusion-macros",
- "hashbrown 0.14.5",
  "hex",
  "itertools 0.14.0",
  "log",
@@ -997,13 +1033,10 @@ dependencies = [
 [[package]]
 name = "datafusion-functions-aggregate"
 version = "45.0.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f3add7b1d3888e05e7c95f2b281af900ca69ebdcb21069ba679b33bde8b3b9d6"
+source = "git+https://github.com/apache/datafusion.git?rev=8ab0661a39bd69783b31b949e7a768fb518629e7#8ab0661a39bd69783b31b949e7a768fb518629e7"
 dependencies = [
  "ahash",
  "arrow",
- "arrow-buffer",
- "arrow-schema",
  "datafusion-common",
  "datafusion-doc",
  "datafusion-execution",
@@ -1020,8 +1053,7 @@ dependencies = [
 [[package]]
 name = "datafusion-functions-aggregate-common"
 version = "45.0.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6e18baa4cfc3d2f144f74148ed68a1f92337f5072b6dde204a0dbbdf3324989c"
+source = "git+https://github.com/apache/datafusion.git?rev=8ab0661a39bd69783b31b949e7a768fb518629e7#8ab0661a39bd69783b31b949e7a768fb518629e7"
 dependencies = [
  "ahash",
  "arrow",
@@ -1033,14 +1065,10 @@ dependencies = [
 [[package]]
 name = "datafusion-functions-nested"
 version = "45.0.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "3ec5ee8cecb0dc370291279673097ddabec03a011f73f30d7f1096457127e03e"
+source = "git+https://github.com/apache/datafusion.git?rev=8ab0661a39bd69783b31b949e7a768fb518629e7#8ab0661a39bd69783b31b949e7a768fb518629e7"
 dependencies = [
  "arrow",
- "arrow-array",
- "arrow-buffer",
  "arrow-ord",
- "arrow-schema",
  "datafusion-common",
  "datafusion-doc",
  "datafusion-execution",
@@ -1057,8 +1085,7 @@ dependencies = [
 [[package]]
 name = "datafusion-functions-table"
 version = "45.0.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2c403ddd473bbb0952ba880008428b3c7febf0ed3ce1eec35a205db20efb2a36"
+source = "git+https://github.com/apache/datafusion.git?rev=8ab0661a39bd69783b31b949e7a768fb518629e7#8ab0661a39bd69783b31b949e7a768fb518629e7"
 dependencies = [
  "arrow",
  "async-trait",
@@ -1073,8 +1100,7 @@ dependencies = [
 [[package]]
 name = "datafusion-functions-window"
 version = "45.0.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1ab18c2fb835614d06a75f24a9e09136d3a8c12a92d97c95a6af316a1787a9c5"
+source = "git+https://github.com/apache/datafusion.git?rev=8ab0661a39bd69783b31b949e7a768fb518629e7#8ab0661a39bd69783b31b949e7a768fb518629e7"
 dependencies = [
  "datafusion-common",
  "datafusion-doc",
@@ -1090,8 +1116,7 @@ dependencies = [
 [[package]]
 name = "datafusion-functions-window-common"
 version = "45.0.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a77b73bc15e7d1967121fdc7a55d819bfb9d6c03766a6c322247dce9094a53a4"
+source = "git+https://github.com/apache/datafusion.git?rev=8ab0661a39bd69783b31b949e7a768fb518629e7#8ab0661a39bd69783b31b949e7a768fb518629e7"
 dependencies = [
  "datafusion-common",
  "datafusion-physical-expr-common",
@@ -1100,8 +1125,7 @@ dependencies = [
 [[package]]
 name = "datafusion-macros"
 version = "45.0.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "09369b8d962291e808977cf94d495fd8b5b38647232d7ef562c27ac0f495b0af"
+source = "git+https://github.com/apache/datafusion.git?rev=8ab0661a39bd69783b31b949e7a768fb518629e7#8ab0661a39bd69783b31b949e7a768fb518629e7"
 dependencies = [
  "datafusion-expr",
  "quote",
@@ -1111,8 +1135,7 @@ dependencies = [
 [[package]]
 name = "datafusion-optimizer"
 version = "45.0.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2403a7e4a84637f3de7d8d4d7a9ccc0cc4be92d89b0161ba3ee5be82f0531c54"
+source = "git+https://github.com/apache/datafusion.git?rev=8ab0661a39bd69783b31b949e7a768fb518629e7#8ab0661a39bd69783b31b949e7a768fb518629e7"
 dependencies = [
  "arrow",
  "chrono",
@@ -1130,14 +1153,10 @@ dependencies = [
 [[package]]
 name = "datafusion-physical-expr"
 version = "45.0.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "86ff72ac702b62dbf2650c4e1d715ebd3e4aab14e3885e72e8549e250307347c"
+source = "git+https://github.com/apache/datafusion.git?rev=8ab0661a39bd69783b31b949e7a768fb518629e7#8ab0661a39bd69783b31b949e7a768fb518629e7"
 dependencies = [
  "ahash",
  "arrow",
- "arrow-array",
- "arrow-buffer",
- "arrow-schema",
  "datafusion-common",
  "datafusion-expr",
  "datafusion-expr-common",
@@ -1155,12 +1174,10 @@ dependencies = [
 [[package]]
 name = "datafusion-physical-expr-common"
 version = "45.0.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "60982b7d684e25579ee29754b4333057ed62e2cc925383c5f0bd8cab7962f435"
+source = "git+https://github.com/apache/datafusion.git?rev=8ab0661a39bd69783b31b949e7a768fb518629e7#8ab0661a39bd69783b31b949e7a768fb518629e7"
 dependencies = [
  "ahash",
  "arrow",
- "arrow-buffer",
  "datafusion-common",
  "datafusion-expr-common",
  "hashbrown 0.14.5",
@@ -1170,11 +1187,9 @@ dependencies = [
 [[package]]
 name = "datafusion-physical-optimizer"
 version = "45.0.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ac5e85c189d5238a5cf181a624e450c4cd4c66ac77ca551d6f3ff9080bac90bb"
+source = "git+https://github.com/apache/datafusion.git?rev=8ab0661a39bd69783b31b949e7a768fb518629e7#8ab0661a39bd69783b31b949e7a768fb518629e7"
 dependencies = [
  "arrow",
- "arrow-schema",
  "datafusion-common",
  "datafusion-execution",
  "datafusion-expr",
@@ -1182,23 +1197,18 @@ dependencies = [
  "datafusion-physical-expr",
  "datafusion-physical-expr-common",
  "datafusion-physical-plan",
- "futures",
  "itertools 0.14.0",
  "log",
  "recursive",
- "url",
 ]
 
 [[package]]
 name = "datafusion-physical-plan"
 version = "45.0.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c36bf163956d7e2542657c78b3383fdc78f791317ef358a359feffcdb968106f"
+source = "git+https://github.com/apache/datafusion.git?rev=8ab0661a39bd69783b31b949e7a768fb518629e7#8ab0661a39bd69783b31b949e7a768fb518629e7"
 dependencies = [
  "ahash",
  "arrow",
- "arrow-array",
- "arrow-buffer",
  "arrow-ord",
  "arrow-schema",
  "async-trait",
@@ -1224,8 +1234,7 @@ dependencies = [
 [[package]]
 name = "datafusion-proto"
 version = "45.0.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2db5d79f0c974041787b899d24dc91bdab2ff112d1942dd71356a4ce3b407e6c"
+source = "git+https://github.com/apache/datafusion.git?rev=8ab0661a39bd69783b31b949e7a768fb518629e7#8ab0661a39bd69783b31b949e7a768fb518629e7"
 dependencies = [
  "arrow",
  "chrono",
@@ -1240,8 +1249,7 @@ dependencies = [
 [[package]]
 name = "datafusion-proto-common"
 version = "45.0.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "de21bde1603aac0ff32cf478e47081be6e3583c6861fe8f57034da911efe7578"
+source = "git+https://github.com/apache/datafusion.git?rev=8ab0661a39bd69783b31b949e7a768fb518629e7#8ab0661a39bd69783b31b949e7a768fb518629e7"
 dependencies = [
  "arrow",
  "datafusion-common",
@@ -1251,12 +1259,9 @@ dependencies = [
 [[package]]
 name = "datafusion-sql"
 version = "45.0.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e13caa4daede211ecec53c78b13c503b592794d125f9a3cc3afe992edf9e7f43"
+source = "git+https://github.com/apache/datafusion.git?rev=8ab0661a39bd69783b31b949e7a768fb518629e7#8ab0661a39bd69783b31b949e7a768fb518629e7"
 dependencies = [
  "arrow",
- "arrow-array",
- "arrow-schema",
  "bigdecimal",
  "datafusion-common",
  "datafusion-expr",
@@ -1468,7 +1473,19 @@ checksum = "c4567c8db10ae91089c99af84c68c38da3ec2f087c3f82960bcdbf3656b6f4d7"
 dependencies = [
  "cfg-if",
  "libc",
- "wasi",
+ "wasi 0.11.0+wasi-snapshot-preview1",
+]
+
+[[package]]
+name = "getrandom"
+version = "0.3.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "43a49c392881ce6d5c3b8cb70f98717b7c07aabbdff06687b9030dbfbe2725f8"
+dependencies = [
+ "cfg-if",
+ "libc",
+ "wasi 0.13.3+wasi-0.2.2",
+ "windows-targets",
 ]
 
 [[package]]
@@ -1910,6 +1927,15 @@ version = "2.7.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3"
 
+[[package]]
+name = "memmap2"
+version = "0.9.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "fd3f7eed9d3848f8b98834af67102b720745c4ec028fcd0aa0239277e7de374f"
+dependencies = [
+ "libc",
+]
+
 [[package]]
 name = "memoffset"
 version = "0.9.1"
@@ -2072,9 +2098,9 @@ dependencies = [
 
 [[package]]
 name = "parquet"
-version = "54.1.0"
+version = "54.2.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8a01a0efa30bbd601ae85b375c728efdb211ade54390281628a7b16708beb235"
+checksum = "761c44d824fe83106e0600d2510c07bf4159a4985bf0569b513ea4288dc1b4fb"
 dependencies = [
  "ahash",
  "arrow-array",
@@ -2349,7 +2375,7 @@ version = "0.6.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c"
 dependencies = [
- "getrandom",
+ "getrandom 0.2.15",
 ]
 
 [[package]]
@@ -2591,11 +2617,12 @@ checksum = "1b6b67fb9a61334225b5b790716f609cd58395f895b3fe8b328786812a40bc3b"
 
 [[package]]
 name = "sqlparser"
-version = "0.53.0"
+version = "0.54.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "05a528114c392209b3264855ad491fcce534b94a38771b0a0b97a79379275ce8"
+checksum = "c66e3b7374ad4a6af849b08b3e7a6eda0edbd82f0fd59b57e22671bf16979899"
 dependencies = [
  "log",
+ "recursive",
  "sqlparser_derive",
 ]
 
@@ -2744,9 +2771,9 @@ dependencies = [
 
 [[package]]
 name = "tokio"
-version = "1.41.1"
+version = "1.43.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "22cfb5bee7a6a52939ca9224d6ac897bb669134078daa8735560897f69de4d33"
+checksum = "3d61fa4ffa3de412bfea335c6ecff681de2b609ba3c77ef3e00e521813a9ed9e"
 dependencies = [
  "backtrace",
  "bytes",
@@ -2756,9 +2783,9 @@ dependencies = [
 
 [[package]]
 name = "tokio-macros"
-version = "2.4.0"
+version = "2.5.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "693d596312e88961bc67d7f1f97af8a70227d9f90c31bba5806eec004978d752"
+checksum = "6e06d43f1345a3bcd39f6a56dbb7dcab2ba47e68e8ac134855e7e2bdbaf8cab8"
 dependencies = [
  "proc-macro2",
  "quote",
@@ -2895,11 +2922,13 @@ checksum = "b6c140620e7ffbb22c2dee59cafe6084a59b5ffc27a8859a5f0d494b5d52b6be"
 
 [[package]]
 name = "uuid"
-version = "1.11.0"
+version = "1.14.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f8c5f0a0af699448548ad1a2fbf920fb4bee257eae39953ba95cb84891a0446a"
+checksum = "93d59ca99a559661b96bf898d8fce28ed87935fd2bea9f05983c1464dd6c71b1"
 dependencies = [
- "getrandom",
+ "getrandom 0.3.1",
+ "js-sys",
+ "wasm-bindgen",
 ]
 
 [[package]]
@@ -2924,6 +2953,15 @@ version = "0.11.0+wasi-snapshot-preview1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423"
 
+[[package]]
+name = "wasi"
+version = "0.13.3+wasi-0.2.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "26816d2e1a4a36a2940b96c5296ce403917633dff8f3440e9b236ed6f6bacad2"
+dependencies = [
+ "wit-bindgen-rt",
+]
+
 [[package]]
 name = "wasm-bindgen"
 version = "0.2.95"
@@ -3111,6 +3149,15 @@ version = "0.52.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec"
 
+[[package]]
+name = "wit-bindgen-rt"
+version = "0.33.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3268f3d866458b787f390cf61f4bbb563b922d091359f9608842999eaee3943c"
+dependencies = [
+ "bitflags 2.6.0",
+]
+
 [[package]]
 name = "write16"
 version = "1.0.0"
diff --git a/examples/ffi-library/Cargo.toml b/examples/ffi-library/Cargo.toml
index 4ee076973..edeb54f7b 100644
--- a/examples/ffi-library/Cargo.toml
+++ b/examples/ffi-library/Cargo.toml
@@ -24,9 +24,9 @@ edition = "2021"
 datafusion = { version = "45.0.0" }
 datafusion-ffi = { version = "45.0.0" }
 pyo3 = { version = "0.23", features = ["extension-module", "abi3", "abi3-py38"] }
-arrow = { version = "54" }
-arrow-array = { version = "54" }
-arrow-schema = { version = "54" }
+arrow = { version = "54.2.0" }
+arrow-array = { version = "54.2.0" }
+arrow-schema = { version = "54.2.0" }
 
 [build-dependencies]
 pyo3-build-config = "0.23"
@@ -34,3 +34,8 @@ pyo3-build-config = "0.23"
 [lib]
 name = "datafusion_ffi_library"
 crate-type = ["cdylib", "rlib"]
+
+# TODO remove once we update datafusion versions to 46
+[patch.crates-io]
+datafusion = { git = "https://github.com/apache/datafusion.git", rev = "8ab0661a39bd69783b31b949e7a768fb518629e7", features = ["avro", "unicode_expressions"] }
+datafusion-ffi = { git = "https://github.com/apache/datafusion.git", rev = "8ab0661a39bd69783b31b949e7a768fb518629e7" }
diff --git a/examples/ffi-library/python/tests/_test_scalar_udf.py b/examples/ffi-library/python/tests/_test_scalar_udf.py
new file mode 100644
index 000000000..9ebcc076b
--- /dev/null
+++ b/examples/ffi-library/python/tests/_test_scalar_udf.py
@@ -0,0 +1,41 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import pyarrow as pa
+from datafusion import SessionContext, col, ffi_udf
+from datafusion_ffi_library import IsEvenFunction
+
+
+def test_table_loading():
+    ctx = SessionContext()
+    df = ctx.from_pydict({"a": [-3, -2, None, 0, 1, 2]})
+
+    is_even = ffi_udf(IsEvenFunction())
+
+    result = df.select(is_even(col("a"))).collect()
+    df.with_column("is_even", is_even(col("a"))).show()
+    print(result)
+
+    assert len(result) == 1
+    assert result[0].num_columns == 1
+
+    result = [r.column(0) for r in result]
+    expected = [
+        pa.array([False, True, None, None, False, True], type=pa.bool_()),
+    ]
+
+    assert result == expected
diff --git a/examples/ffi-library/src/lib.rs b/examples/ffi-library/src/lib.rs
index b1809f73d..904bdf29d 100644
--- a/examples/ffi-library/src/lib.rs
+++ b/examples/ffi-library/src/lib.rs
@@ -16,11 +16,13 @@
 // under the License.
 
 use pyo3::prelude::*;
-use table_provider::MyTableProvider;
+mod scalar_udf;
 mod table_provider;
 
 #[pymodule]
 fn datafusion_ffi_library(m: &Bound<'_, PyModule>) -> PyResult<()> {
-    m.add_class::<MyTableProvider>()?;
+    m.add_class::<table_provider::MyTableProvider>()?;
+    m.add_class::<scalar_udf::IsEvenFunction>()?;
+
     Ok(())
 }
diff --git a/examples/ffi-library/src/scalar_udf.rs b/examples/ffi-library/src/scalar_udf.rs
new file mode 100644
index 000000000..6ac75df43
--- /dev/null
+++ b/examples/ffi-library/src/scalar_udf.rs
@@ -0,0 +1,70 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use std::{ffi::CString, sync::Arc};
+
+use arrow::array::BooleanArray;
+use arrow_array::ArrayRef;
+use datafusion::common::cast::as_int64_array;
+use datafusion::logical_expr::create_udf;
+use datafusion::logical_expr::Volatility;
+use datafusion::physical_plan::ColumnarValue;
+use datafusion::{arrow::datatypes::DataType, error::Result};
+use datafusion_ffi::udf::FFI_ScalarUDF;
+use pyo3::{prelude::*, types::PyCapsule};
+
+#[pyclass(name = "IsEvenFunction", module = "datafusion_ffi_library", subclass)]
+#[derive(Clone)]
+pub struct IsEvenFunction {}
+
+fn is_even(args: &[ColumnarValue]) -> Result<ColumnarValue> {
+    assert_eq!(args.len(), 1);
+    let args = ColumnarValue::values_to_arrays(args)?;
+
+    let values = as_int64_array(&args[0]).expect("cast failed");
+
+    let array = values
+        .iter()
+        .map(|value| value.and_then(|v| if v == 0 { None } else { Some(v % 2 == 0) }))
+        .collect::<BooleanArray>();
+
+    Ok(ColumnarValue::from(Arc::new(array) as ArrayRef))
+}
+
+#[pymethods]
+impl IsEvenFunction {
+    #[new]
+    fn new() -> Self {
+        Self {}
+    }
+
+    fn __datafusion_scalar_udf__<'py>(&self, py: Python<'py>) -> PyResult<Bound<'py, PyCapsule>> {
+        let name = CString::new("datafusion_scalar_udf").unwrap();
+
+        let func = create_udf(
+            "is_even",
+            vec![DataType::Int64],
+            DataType::Boolean,
+            Volatility::Immutable,
+            Arc::new(is_even),
+        );
+
+        let ffi_func: FFI_ScalarUDF = (Arc::new(func)).try_into()?;
+
+        PyCapsule::new(py, ffi_func, Some(name))
+    }
+}
diff --git a/examples/ffi-library/src/table_provider.rs b/examples/ffi-library/src/table_provider.rs
index ed4b4a16b..2dfc645e9 100644
--- a/examples/ffi-library/src/table_provider.rs
+++ b/examples/ffi-library/src/table_provider.rs
@@ -31,7 +31,7 @@ use pyo3::{exceptions::PyRuntimeError, prelude::*, types::PyCapsule};
 
 /// In order to provide a test that demonstrates different sized record batches,
 /// the first batch will have num_rows, the second batch num_rows+1, and so on.
-#[pyclass(name = "MyTableProvider", module = "ffi_table_provider", subclass)]
+#[pyclass(name = "MyTableProvider", module = "datafusion_ffi_library", subclass)]
 #[derive(Clone)]
 pub struct MyTableProvider {
     num_cols: usize,
diff --git a/python/datafusion/__init__.py b/python/datafusion/__init__.py
index 85aefcce7..330ac7a6f 100644
--- a/python/datafusion/__init__.py
+++ b/python/datafusion/__init__.py
@@ -118,6 +118,7 @@ def lit(value):
 
 
 udf = ScalarUDF.udf
+ffi_udf = ScalarUDF.ffi_udf
 
 udaf = AggregateUDF.udaf
 
diff --git a/python/datafusion/udf.py b/python/datafusion/udf.py
index f87c4f369..99d468883 100644
--- a/python/datafusion/udf.py
+++ b/python/datafusion/udf.py
@@ -123,9 +123,9 @@ def __call__(self, *args: Expr) -> Expr:
         return Expr(self._udf.__call__(*args_raw))
 
     @staticmethod
-    def from_ffi(func: ScalarUDFExportable) -> ScalarUDF:
+    def ffi_udf(func: ScalarUDFExportable) -> ScalarUDF:
         """Create a User-Defined Function from a provided PyCapsule."""
-        udf = df_internal.ScalarUDF.from_ffi(func)
+        udf = df_internal.ScalarUDF.ffi_udf(func)
         return ScalarUDF(None, udf, None, None, None)
 
     @staticmethod
diff --git a/src/udf.rs b/src/udf.rs
index 3102b418b..6c6c1afb4 100644
--- a/src/udf.rs
+++ b/src/udf.rs
@@ -108,7 +108,7 @@ impl PyScalarUDF {
     }
 
     #[staticmethod]
-    fn from_ffi(func: Bound<PyAny>) -> PyResult<Self> {
+    fn ffi_udf(func: Bound<PyAny>) -> PyResult<Self> {
         if func.hasattr("__datafusion_scalar_udf__")? {
             let capsule = func.getattr("__datafusion_scalar_udf__")?.call0()?;
             let capsule = capsule.downcast::<PyCapsule>()?;
@@ -122,7 +122,7 @@ impl PyScalarUDF {
             })
         } else {
             Err(py_datafusion_err(
-                "__datafusion_table_provider__ does not exist on Table Provider object.",
+                "__datafusion_scalar_udf__ does not exist on Scalar UDF object.",
             ))
         }
     }

From 80fd30153f104ccd2f06824c757ee4776ef27b10 Mon Sep 17 00:00:00 2001
From: Tim Saucer <timsaucer@gmail.com>
Date: Mon, 17 Feb 2025 13:13:27 -0500
Subject: [PATCH 7/9] There was no guarantee that the record batches would be
 returned in a single partition, so update the unit test to check all
 partitions.

---
 python/tests/test_dataframe.py | 21 ++++++++++++++-------
 1 file changed, 14 insertions(+), 7 deletions(-)

diff --git a/python/tests/test_dataframe.py b/python/tests/test_dataframe.py
index 5bc3fb094..c636e896a 100644
--- a/python/tests/test_dataframe.py
+++ b/python/tests/test_dataframe.py
@@ -755,13 +755,20 @@ def test_execution_plan(aggregate_df):
     assert "CsvExec:" in indent
 
     ctx = SessionContext()
-    stream = ctx.execute(plan, 0)
-    # get the one and only batch
-    batch = stream.next()
-    assert batch is not None
-    # there should be no more batches
-    with pytest.raises(StopIteration):
-        stream.next()
+    rows_returned = 0
+    for idx in range(0, plan.partition_count):
+        stream = ctx.execute(plan, idx)
+        try:
+            batch = stream.next()
+            assert batch is not None
+            rows_returned += len(batch.to_pyarrow()[0])
+        except StopIteration:
+            # This is one of the partitions with no values
+            pass
+        with pytest.raises(StopIteration):
+            stream.next()
+
+    assert rows_returned == 5
 
 
 def test_repartition(df):

From 5439d1981b81ef967f92f1526bcb31ba79f6fc63 Mon Sep 17 00:00:00 2001
From: Tim Saucer <timsaucer@gmail.com>
Date: Fri, 21 Feb 2025 14:39:06 -0500
Subject: [PATCH 8/9] Update unit test due to change in name of the upstream
 exec function

---
 python/tests/test_dataframe.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/tests/test_dataframe.py b/python/tests/test_dataframe.py
index c636e896a..6ae1115e5 100644
--- a/python/tests/test_dataframe.py
+++ b/python/tests/test_dataframe.py
@@ -752,7 +752,7 @@ def test_execution_plan(aggregate_df):
     assert "AggregateExec:" in indent
     assert "CoalesceBatchesExec:" in indent
     assert "RepartitionExec:" in indent
-    assert "CsvExec:" in indent
+    assert "DataSourceExec:" in indent
 
     ctx = SessionContext()
     rows_returned = 0

From 5538b7eb351465f6225a401088f039170e77ffcd Mon Sep 17 00:00:00 2001
From: Tim Saucer <timsaucer@gmail.com>
Date: Fri, 21 Feb 2025 14:52:49 -0500
Subject: [PATCH 9/9] Update path in CI for integration tests

---
 .github/workflows/test.yaml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml
index c1d9ac838..713420042 100644
--- a/.github/workflows/test.yaml
+++ b/.github/workflows/test.yaml
@@ -89,9 +89,9 @@ jobs:
 
       - name: FFI unit tests
         run: |
-          cd examples/ffi-table-provider
+          cd examples/ffi-library
           uv run --no-project maturin develop --uv
-          uv run --no-project pytest python/tests/_test_table_provider.py
+          uv run --no-project pytest python/tests/_*.py
 
       - name: Cache the generated dataset
         id: cache-tpch-dataset