NVIDIA · ZelboK · Jul 27, 2024 · Jul 30, 2024 · Jul 30, 2024 · Jul 30, 2024
diff --git a/include/matx/operators/any.h b/include/matx/operators/any.h
@@ -32,6 +32,8 @@
 
 #pragma once
 
+#include <thrust/reduce.h>
+#include <thrust/device_ptr.h>
 
 #include "matx/core/type_utils.h"
 #include "matx/operators/base_operator.h"
@@ -71,8 +73,17 @@ namespace detail {
       };
 
       template <typename Out, typename Executor>
-      void Exec(Out &&out, Executor &&ex) const {
-        any_impl(cuda::std::get<0>(out), a_, ex);
+      void Exec(Out &&out, Executor) const {
+        auto output_tensor = cuda::std::get<0>(out);
+        using out_tensor_t = decltype(output_tensor);
+        auto inp_ptr = thrust::device_pointer_cast(a_.Data());
+        auto result_ptr = output_tensor.Data();
+        auto op = detail::reduceOpAny<typename out_tensor_t::value_type>();
+        auto result = thrust::reduce(inp_ptr, 
+          inp_ptr + a_.TotalSize(), 
+          op.Init(), 
+          op);
+        *result_ptr = result;
       }
 
       static __MATX_INLINE__ constexpr __MATX_HOST__ __MATX_DEVICE__ int32_t Rank()

diff --git a/include/matx/transforms/reduce.h b/include/matx/transforms/reduce.h
@@ -798,17 +798,24 @@ template <typename T> class reduceOpMax {
  * Performs a reduction of two values of type T by returning 1 if either
  * of the values are non-zero.
  */
-template <typename T> class reduceOpAny {
+template <typename T>
+class reduceOpAny {
 public:
+  using type = T; // This type is for Thrust
   using matx_reduce = bool;
   using matx_no_cub_reduce = bool; // Don't use CUB for this reduction type
-  __MATX_HOST__ __MATX_DEVICE__ __MATX_INLINE__ T Reduce(const T &v1, const T &v2)
-  {
+
+  __MATX_HOST__ __MATX_DEVICE__ __MATX_INLINE__ T operator()(const T &v1, const T &v2) const {
     return (v1 != 0) || (v2 != 0);
   }
-  __MATX_HOST__ __MATX_DEVICE__ __MATX_INLINE__ T operator()(T &v1, T &v2) { v1 = ((v1 != 0) || (v2 != 0)); return v1; }
-  __MATX_HOST__ __MATX_DEVICE__ __MATX_INLINE__ T Init() { return (T)(0); }
-  __MATX_DEVICE__ __MATX_INLINE__ void atomicReduce(T *addr, T val) { atomicAny(addr, val); }
+
+  __MATX_HOST__ __MATX_DEVICE__ __MATX_INLINE__ T Init() const {
+    return static_cast<T>(0);
+  }
+
+  __MATX_DEVICE__ __MATX_INLINE__ void atomicReduce(T *addr, T val) const {
+    atomicAny(addr, val);
+  }
 };
 
 /**