pandas-dev · arthurlw · May 23, 2025 · May 23, 2025 · May 23, 2025 · May 24, 2025
diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst
@@ -30,6 +30,7 @@ Other enhancements
 ^^^^^^^^^^^^^^^^^^
 - :class:`pandas.api.typing.FrozenList` is available for typing the outputs of :attr:`MultiIndex.names`, :attr:`MultiIndex.codes` and :attr:`MultiIndex.levels` (:issue:`58237`)
 - :class:`pandas.api.typing.SASReader` is available for typing the output of :func:`read_sas` (:issue:`55689`)
+- :meth:`DataFrame.apply` accepts Numba as an engine by passing the JIT decorator directly, e.g. ``df.apply(func, engine=numba.jit)`` (:issue:`61458`)
 - Added :meth:`.Styler.to_typst` to write Styler objects to file, buffer or string in Typst format (:issue:`57617`)
 - Added missing :meth:`pandas.Series.info` to API reference (:issue:`60926`)
 - :class:`pandas.api.typing.NoDefault` is available for typing ``no_default``

diff --git a/pandas/core/apply.py b/pandas/core/apply.py
@@ -178,6 +178,84 @@ def apply(
         """
 
 
+class NumbaExecutionEngine(BaseExecutionEngine):
+    """
+    Numba-based execution engine for pandas apply and map operations.
+    """
+
+    @staticmethod
+    def map(
+        data: np.ndarray | Series | DataFrame,
+        func,
+        args: tuple,
+        kwargs: dict,
+        decorator: Callable | None,
+        skip_na: bool,
+    ):
+        """
+        Elementwise map for the Numba engine. Currently not supported.
+        """
+        raise NotImplementedError("Numba map is not implemented yet.")
+
+    @staticmethod
+    def apply(
+        data: np.ndarray | Series | DataFrame,
+        func,
+        args: tuple,
+        kwargs: dict,
+        decorator: Callable,
+        axis: int | str,
+    ):
+        """
+        Apply `func` along the given axis using Numba.
+        """
+
+        if is_list_like(func):
+            raise NotImplementedError(
+                "the 'numba' engine doesn't support lists of callables yet"
+            )
+
+        if isinstance(func, str):
+            raise NotImplementedError(
+                "the 'numba' engine doesn't support using "
+                "a string as the callable function"
+            )
+
+        elif isinstance(func, np.ufunc):
+            raise NotImplementedError(
+                "the 'numba' engine doesn't support "
+                "using a numpy ufunc as the callable function"
+            )
+
+        # check for data typing
+        if not isinstance(data, np.ndarray):
+            if len(data.columns) == 0 and len(data.index) == 0:
+                return data.copy() # mimic apply_empty_result()
+            return FrameApply.apply_standard()
+
+        engine_kwargs: dict[str, bool] | None = (
+            decorator if isinstance(decorator, dict) else None
+        )
+
+        looper_args, looper_kwargs = prepare_function_arguments(
+            func,
+            args,
+            kwargs,
+            num_required_args=1,
+        )
+        # error: Argument 1 to "__call__" of "_lru_cache_wrapper" has
+        # incompatible type "Callable[..., Any] | str | list[Callable
+        # [..., Any] | str] | dict[Hashable,Callable[..., Any] | str |
+        # list[Callable[..., Any] | str]]"; expected "Hashable"
+        nb_looper = generate_apply_looper(
+            func,
+            **get_jit_arguments(engine_kwargs),
+        )
+        result = nb_looper(data, axis, *looper_args)
+        # If we made the result 2-D, squeeze it back to 1-D
+        return np.squeeze(result)
+
+
 def frame_apply(
     obj: DataFrame,
     func: AggFuncType,
@@ -957,10 +1035,6 @@ def apply(self) -> DataFrame | Series:
 
         # dispatch to handle list-like or dict-like
         if is_list_like(self.func):
-            if self.engine == "numba":
-                raise NotImplementedError(
-                    "the 'numba' engine doesn't support lists of callables yet"
-                )
             return self.apply_list_or_dict_like()
 
         # all empty
@@ -969,31 +1043,17 @@ def apply(self) -> DataFrame | Series:
 
         # string dispatch
         if isinstance(self.func, str):
-            if self.engine == "numba":
-                raise NotImplementedError(
-                    "the 'numba' engine doesn't support using "
-                    "a string as the callable function"
-                )
             return self.apply_str()
 
         # ufunc
         elif isinstance(self.func, np.ufunc):
-            if self.engine == "numba":
-                raise NotImplementedError(
-                    "the 'numba' engine doesn't support "
-                    "using a numpy ufunc as the callable function"
-                )
             with np.errstate(all="ignore"):
                 results = self.obj._mgr.apply("apply", func=self.func)
             # _constructor will retain self.index and self.columns
             return self.obj._constructor_from_mgr(results, axes=results.axes)
 
         # broadcasting
         if self.result_type == "broadcast":
-            if self.engine == "numba":
-                raise NotImplementedError(
-                    "the 'numba' engine doesn't support result_type='broadcast'"
-                )
             return self.apply_broadcast(self.obj)
 
         # one axis empty
@@ -1094,23 +1154,19 @@ def wrapper(*args, **kwargs):
             return wrapper
 
         if engine == "numba":
-            args, kwargs = prepare_function_arguments(
-                self.func,  # type: ignore[arg-type]
+            numba = import_optional_dependency("numba")
+
+            if not hasattr(numba.jit, "__pandas_udf__"):
+                numba.jit.__pandas_udf__ = NumbaExecutionEngine
+
+            result = numba.jit.__pandas_udf__.apply(
+                self.values,
+                self.func,
                 self.args,
                 self.kwargs,
-                num_required_args=1,
-            )
-            # error: Argument 1 to "__call__" of "_lru_cache_wrapper" has
-            # incompatible type "Callable[..., Any] | str | list[Callable
-            # [..., Any] | str] | dict[Hashable,Callable[..., Any] | str |
-            # list[Callable[..., Any] | str]]"; expected "Hashable"
-            nb_looper = generate_apply_looper(
-                self.func,  # type: ignore[arg-type]
-                **get_jit_arguments(engine_kwargs),
+                engine_kwargs,
+                self.axis,
             )
-            result = nb_looper(self.values, self.axis, *args)
-            # If we made the result 2-D, squeeze it back to 1-D
-            result = np.squeeze(result)
         else:
             result = np.apply_along_axis(
                 wrap_function(self.func),

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -129,7 +129,7 @@
     roperator,
 )
 from pandas.core.accessor import Accessor
-from pandas.core.apply import reconstruct_and_relabel_result
+from pandas.core.apply import NumbaExecutionEngine, reconstruct_and_relabel_result
 from pandas.core.array_algos.take import take_2d_multi
 from pandas.core.arraylike import OpsMixin
 from pandas.core.arrays import (
@@ -10616,14 +10616,17 @@ def apply(
         significant amount of time to run. Fast functions are unlikely to run faster
         with JIT compilation.
         """
-        if engine is None or isinstance(engine, str):
-            from pandas.core.apply import frame_apply
-
-            if engine is None:
-                engine = "python"
+        if engine == "numba":
+            numba = import_optional_dependency("numba")
+            if engine_kwargs is not None:
+                numba_jit = numba.jit(**engine_kwargs)
+            else:
+                numba_jit = numba.jit()
+            numba_jit.__pandas_udf__ = NumbaExecutionEngine
+            engine = numba_jit
 
-            if engine not in ["python", "numba"]:
-                raise ValueError(f"Unknown engine '{engine}'")
+        if engine is None or engine == "python":
+            from pandas.core.apply import frame_apply
 
             op = frame_apply(
                 self,
@@ -10632,7 +10635,7 @@ def apply(
                 raw=raw,
                 result_type=result_type,
                 by_row=by_row,
-                engine=engine,
+                engine="python",
                 engine_kwargs=engine_kwargs,
                 args=args,
                 kwargs=kwargs,