NVIDIA · aliceb-nv · Aug 25, 2025 · Aug 25, 2025 · Aug 25, 2025 · Aug 26, 2025
@@ -0,0 +1,225 @@
+<?xml version="1.0" encoding="utf-8"?>
+<ComputeSanitizerOutput>
+  <record>
+    <kind>Initcheck</kind>
+    <what>
+      <text>Uninitialized __global__ memory read of size 4 bytes</text>
+      <size>4</size>
+    </what>
+    <where>
+      <func>.*</func>
+    </where>
+    <hostStack>
+      <frame>
+        <module>.*libcuda.so.*</module>
+      </frame>
+      <frame>
+        <func>cusparseCsr2cscEx2</func>
+        <module>.*libcusparse.so.*</module>
+      </frame>
+    </hostStack>
+  </record>
+  <record>
+    <kind>Initcheck</kind>
+    <what>
+      <text>Uninitialized __global__ memory read of size 4 bytes</text>
+      <size>4</size>
+    </what>
+    <where>
+      <func>ThreadLoad</func>
+    </where>
+    <hostStack>
+      <frame>
+        <module>.*libcuda.so.*</module>
+      </frame>
+      <frame>
+        <module>libcudart.*</module>
+      </frame>
+      <frame>
+        <func>cudaLaunchKernel</func>
+      </frame>
+      <frame>
+        <func>.*cub::.*::Device(Segmented)?(Reduce|Scan)(SingleTile)?Kernel.*</func>
+      </frame>
+    </hostStack>
+  </record>
+  <record>
+    <kind>Initcheck</kind>
+    <what>
+      <text>Uninitialized __global__ memory read of size 2 bytes</text>
+      <size>2</size>
+    </what>
+    <where>
+      <func>ThreadLoad</func>
+    </where>
+    <hostStack>
+      <frame>
+        <module>.*libcuda.so.*</module>
+      </frame>
+      <frame>
+        <module>libcudart.*</module>
+      </frame>
+      <frame>
+        <func>cudaLaunchKernel</func>
+      </frame>
+      <frame>
+        <func>.*cub::.*::Device(Segmented)?(Reduce|Scan)(SingleTile)?Kernel.*</func>
+      </frame>
+    </hostStack>
+  </record>
+  <record>
+    <kind>Initcheck</kind>
+    <what>
+      <text>Uninitialized __global__ memory read of size 8 bytes</text>
+      <size>8</size>
+    </what>
+    <where>
+      <func>DeviceSegmentedReduceKernel</func>
+    </where>
+  </record>
+  <record>
+    <kind>Initcheck</kind>
+    <what>
+      <text>Uninitialized __global__ memory read of size 4 bytes</text>
+      <size>4</size>
+    </what>
+    <where>
+      <func>ThreadLoad</func>
+    </where>
+    <hostStack>
+      <frame>
+        <module>.*libcuda.so.*</module>
+      </frame>
+      <frame>
+        <module>libcudart.*</module>
+      </frame>
+      <frame>
+        <module>libcudart.*</module>
+      </frame>
+      <frame>
+        <module>.*libcuopt.*</module>
+      </frame>
+      <frame>
+        <func>.*Device(Reduce|Scan)Kernel.*</func>
+      </frame>
+    </hostStack>
+  </record>
+  <!-- Rule matching cccl's pattern of copying tuples back to host after reduce_by_keys, which contain uninitialized padding  -->
+  <!-- Because of aggressive inlining, thrust calls are elided out of the host stack, which prevents a more finely grained rule. In practice this is good enough -->
+  <record>
+    <kind>InitcheckApiError</kind>
+    <level>Error</level>
+    <what>
+      <text>Host API uninitialized memory access</text>
+      <accessSize>16</accessSize>
+    </what>
+    <hostStack>
+      <frame>
+        <func>cuMemcpyDtoHAsync.*</func>
+        <module>.*libcuda.so.*</module>
+      </frame>
+    </hostStack>
+  </record>
+  <!-- Suppress uninit copies on rmm::device_vector copy constructor - often vector members are allocated but not filled -->
+  <record>
+    <kind>InitcheckApiError</kind>
+    <level>Error</level>
+    <what>
+      <text>Host API uninitialized memory access</text>
+    </what>
+    <hostStack>
+      <frame>
+        <func>cuMemcpyAsync</func>
+        <module>.*libcuda.so.*</module>
+      </frame>
+      <frame>
+        <module>.*libcudart.so.*</module>
+      </frame>
+      <frame>
+        <module>.*libcudart.so.*</module>
+      </frame>
+      <frame>
+        <module>.*libcudart.so.*</module>
+      </frame>
+      <frame>
+        <module>.*librmm.so.*</module>
+      </frame>
+      <frame>
+        <func>rmm::device_buffer::device_buffer</func>
+        <module>.*librmm.so.*</module>
+      </frame>
+    </hostStack>
+  </record>
+  <record>
+    <kind>InitcheckApiError</kind>
+    <level>Error</level>
+    <what>
+      <text>Host API uninitialized memory access</text>
+    </what>
+    <hostStack>
+      <frame>
+        <func>cuMemcpyAsync</func>
+        <module>.*libcuda.so.*</module>
+      </frame>
+      <frame>
+        <module>.*libcudart.so.*</module>
+      </frame>
+      <frame>
+        <module>.*libcudart.so.*</module>
+      </frame>
+      <frame>
+        <module>.*libcudart.so.*</module>
+      </frame>
+      <frame>
+        <module>.*librmm.so.*</module>
+      </frame>
+      <frame>
+        <module>.*librmm.so.*</module>
+      </frame>
+      <frame>
+        <func>rmm::device_uvector.*::device_uvector</func>
+        <module>.*libcuopt.so.*</module>
+      </frame>
+    </hostStack>
+  </record>
+  <!-- Uninitialized device-to-device copies are usually harmless - if actualy bogus, errors may be caught later on -->
+  <record>
+    <kind>InitcheckApiError</kind>
+    <level>Error</level>
+    <what>
+      <text>Host API uninitialized memory access</text>
+    </what>
+    <hostStack>
+      <frame>
+        <func>cuMemcpyDtoDAsync.*</func>
+        <module>.*libcuda.so.*</module>
+      </frame>
+    </hostStack>
+  </record>
+  <record>
+    <kind>InitcheckApiError</kind>
+    <level>Error</level>
+    <what>
+      <text>Host API uninitialized memory access</text>
+    </what>
+    <hostStack>
+      <frame>
+        <func>cuMemcpyAsync</func>
+        <module>.*libcuda.so.*</module>
+      </frame>
+      <frame>
+        <module>.*libcudart.so.*</module>
+      </frame>
+      <frame>
+        <module>.*libcudart.so.*</module>
+      </frame>
+      <frame>
+        <func>cudaMemcpyAsync</func>
+      </frame>
+      <frame>
+        <func>rmm::device_buffer::resize</func>
+        <module>.*librmm.so.*</module>
+      </frame>
+    </hostStack>
+  </record>
+</ComputeSanitizerOutput>
@@ -1139,7 +1139,7 @@ void mps_parser_t<i_t, f_t>::parse_bounds(std::string_view line)
     c_values.emplace_back(f_t(0));
     variable_lower_bounds.emplace_back(0);
     variable_upper_bounds.emplace_back(+std::numeric_limits<f_t>::infinity());
-    var_types.resize(var_types.size() + 1);
+    var_types.emplace_back('C');
     itr = var_names_map.find(std::string(var_name));
   }
   i_t var_id = itr->second;

@@ -20,6 +20,7 @@
 #include <linear_programming/cusparse_view.hpp>
 #include <linear_programming/utils.cuh>
 #include <mip/mip_constants.hpp>
+#include <utilities/cuda_helpers.cuh>
 
 #include <raft/sparse/detail/cusparse_macros.h>
 #include <raft/sparse/detail/cusparse_wrappers.h>
@@ -541,12 +542,54 @@ cusparse_view_t<i_t, f_t>::cusparse_view_t(
     A_indices_(dummy_int)
 {
 }
+template <typename T>
+cusparseStatus_t cusparsespmv_wrapper(cusparseHandle_t handle,
+                                      cusparseOperation_t opA,
+                                      const T* alpha,
+                                      const cusparseSpMatDescr_t matA,
+                                      const cusparseDnVecDescr_t vecX,
+                                      const T* beta,
+                                      const cusparseDnVecDescr_t vecY,
+                                      cusparseSpMVAlg_t alg,
+                                      T* externalBuffer,
+                                      cudaStream_t stream)
+{
+  void* dest_ptr;
+  int64_t dest_size;
+  cudaDataType valtype;
+  RAFT_CUSPARSE_TRY(cusparseDnVecGet(vecY, &dest_size, &dest_ptr, &valtype));
+  // cusparse flags a false positive here on the destination tmp buffer, silence it
+  cuopt::mark_memory_as_initialized(dest_ptr, dest_size, stream);
+
+  return raft::sparse::detail::cusparsespmv(
+    handle, opA, alpha, matA, vecX, beta, vecY, alg, externalBuffer, stream);
+}
 
 #if MIP_INSTANTIATE_FLOAT
 template class cusparse_view_t<int, float>;
+template cusparseStatus_t cusparsespmv_wrapper<float>(cusparseHandle_t handle,
+                                                      cusparseOperation_t opA,
+                                                      const float* alpha,
+                                                      const cusparseSpMatDescr_t matA,
+                                                      const cusparseDnVecDescr_t vecX,
+                                                      const float* beta,
+                                                      const cusparseDnVecDescr_t vecY,
+                                                      cusparseSpMVAlg_t alg,
+                                                      float* externalBuffer,
+                                                      cudaStream_t stream);
 #endif
 #if MIP_INSTANTIATE_DOUBLE
 template class cusparse_view_t<int, double>;
+template cusparseStatus_t cusparsespmv_wrapper<double>(cusparseHandle_t handle,
+                                                       cusparseOperation_t opA,
+                                                       const double* alpha,
+                                                       const cusparseSpMatDescr_t matA,
+                                                       const cusparseDnVecDescr_t vecX,
+                                                       const double* beta,
+                                                       const cusparseDnVecDescr_t vecY,
+                                                       cusparseSpMVAlg_t alg,
+                                                       double* externalBuffer,
+                                                       cudaStream_t stream);
 #endif
 
 }  // namespace cuopt::linear_programming::detail
@@ -103,4 +103,17 @@ class cusparse_view_t {
   const rmm::device_uvector<i_t>& A_offsets_;
   const rmm::device_uvector<i_t>& A_indices_;
 };
+
+template <typename T>
+cusparseStatus_t cusparsespmv_wrapper(cusparseHandle_t handle,
+                                      cusparseOperation_t opA,
+                                      const T* alpha,
+                                      const cusparseSpMatDescr_t matA,
+                                      const cusparseDnVecDescr_t vecX,
+                                      const T* beta,
+                                      const cusparseDnVecDescr_t vecY,
+                                      cusparseSpMVAlg_t alg,
+                                      T* externalBuffer,
+                                      cudaStream_t stream);
+
 }  // namespace cuopt::linear_programming::detail
@@ -19,6 +19,8 @@
 #include <linear_programming/utilities/ping_pong_graph.cuh>
 #include <linear_programming/utils.cuh>
 #include <mip/mip_constants.hpp>
+#include <utilities/copy_helpers.hpp>
+#include <utilities/cuda_helpers.cuh>
 
 #include <raft/sparse/detail/cusparse_macros.h>
 #include <raft/sparse/detail/cusparse_wrappers.h>
@@ -85,17 +87,16 @@ void pdhg_solver_t<i_t, f_t>::compute_next_dual_solution(rmm::device_scalar<f_t>
   // Done in previous function
 
   // K(x'+delta_x)
-  RAFT_CUSPARSE_TRY(
-    raft::sparse::detail::cusparsespmv(handle_ptr_->get_cusparse_handle(),
-                                       CUSPARSE_OPERATION_NON_TRANSPOSE,
-                                       reusable_device_scalar_value_1_.data(),  // 1
-                                       cusparse_view_.A,
-                                       cusparse_view_.tmp_primal,
-                                       reusable_device_scalar_value_0_.data(),  // 1
-                                       cusparse_view_.dual_gradient,
-                                       CUSPARSE_SPMV_CSR_ALG2,
-                                       (f_t*)cusparse_view_.buffer_non_transpose.data(),
-                                       stream_view_));
+  RAFT_CUSPARSE_TRY(cusparsespmv_wrapper(handle_ptr_->get_cusparse_handle(),
+                                         CUSPARSE_OPERATION_NON_TRANSPOSE,
+                                         reusable_device_scalar_value_1_.data(),  // 1
+                                         cusparse_view_.A,
+                                         cusparse_view_.tmp_primal,
+                                         reusable_device_scalar_value_0_.data(),  // 1
+                                         cusparse_view_.dual_gradient,
+                                         CUSPARSE_SPMV_CSR_ALG2,
+                                         (f_t*)cusparse_view_.buffer_non_transpose.data(),
+                                         stream_view_));
 
   // y - (sigma*dual_gradient)
   // max(min(0, sigma*constraint_upper+primal_product), sigma*constraint_lower+primal_product)
@@ -122,16 +123,16 @@ void pdhg_solver_t<i_t, f_t>::compute_At_y()
 {
   // A_t @ y
 
-  RAFT_CUSPARSE_TRY(raft::sparse::detail::cusparsespmv(handle_ptr_->get_cusparse_handle(),
-                                                       CUSPARSE_OPERATION_NON_TRANSPOSE,
-                                                       reusable_device_scalar_value_1_.data(),
-                                                       cusparse_view_.A_T,
-                                                       cusparse_view_.dual_solution,
-                                                       reusable_device_scalar_value_0_.data(),
-                                                       cusparse_view_.current_AtY,
-                                                       CUSPARSE_SPMV_CSR_ALG2,
-                                                       (f_t*)cusparse_view_.buffer_transpose.data(),
-                                                       stream_view_));
+  RAFT_CUSPARSE_TRY(cusparsespmv_wrapper(handle_ptr_->get_cusparse_handle(),
+                                         CUSPARSE_OPERATION_NON_TRANSPOSE,
+                                         reusable_device_scalar_value_1_.data(),
+                                         cusparse_view_.A_T,
+                                         cusparse_view_.dual_solution,
+                                         reusable_device_scalar_value_0_.data(),
+                                         cusparse_view_.current_AtY,
+                                         CUSPARSE_SPMV_CSR_ALG2,
+                                         (f_t*)cusparse_view_.buffer_transpose.data(),
+                                         stream_view_));
 }
 
 template <typename i_t, typename f_t>

@@ -22,6 +22,8 @@
 #include <linear_programming/pdlp.cuh>
 #include <linear_programming/utils.cuh>
 #include <mip/mip_constants.hpp>
+#include <utilities/copy_helpers.hpp>
+#include <utilities/cuda_helpers.cuh>
 #include "cuopt/linear_programming/pdlp/solver_solution.hpp"
 
 #include <raft/common/nvtx.hpp>
@@ -1048,6 +1050,9 @@ optimization_problem_solution_t<i_t, f_t> pdlp_solver_t<i_t, f_t>::run_solver(
                             primal_size_h_,
                             clamp<f_t>(),
                             stream_view_);
+    // Triggers a false positive in compute-sanitizer otherwise (lack of initialization doesn't
+    // matter here)
+    cuopt::mark_span_as_initialized(make_span(unscaled_primal_avg_solution_), stream_view_);
     raft::linalg::ternaryOp(unscaled_primal_avg_solution_.data(),
                             unscaled_primal_avg_solution_.data(),
                             op_problem_scaled_.variable_lower_bounds.data(),