From 1cf9cbb68719ca1eea7024ea65ee8024c226601b Mon Sep 17 00:00:00 2001
From: Hugo Linsenmaier <hlinsenmaier@gmail.com>
Date: Mon, 6 Oct 2025 07:45:23 -0700
Subject: [PATCH 01/40] Enable green context

---
 cpp/src/dual_simplex/device_sparse_matrix.cuh | 2 +-
 cpp/src/dual_simplex/sparse_cholesky.cuh      | 2 ++
 2 files changed, 3 insertions(+), 1 deletion(-)
diff --git a/cpp/src/dual_simplex/device_sparse_matrix.cuh b/cpp/src/dual_simplex/device_sparse_matrix.cuh
index f347f956b..00c198d3f 100644
--- a/cpp/src/dual_simplex/device_sparse_matrix.cuh
+++ b/cpp/src/dual_simplex/device_sparse_matrix.cuh
@@ -184,7 +184,7 @@ class device_csc_matrix_t {
 
     // Inclusive cumulative sum to have the corresponding column for each entry
     rmm::device_buffer d_temp_storage;
-    size_t temp_storage_bytes;
+    size_t temp_storage_bytes{0};
     cub::DeviceScan::InclusiveSum(
       nullptr, temp_storage_bytes, col_index.data(), col_index.data(), col_index.size(), stream);
     d_temp_storage.resize(temp_storage_bytes, stream);
diff --git a/cpp/src/dual_simplex/sparse_cholesky.cuh b/cpp/src/dual_simplex/sparse_cholesky.cuh
index 489ee98d0..a751d67a6 100644
--- a/cpp/src/dual_simplex/sparse_cholesky.cuh
+++ b/cpp/src/dual_simplex/sparse_cholesky.cuh
@@ -29,6 +29,8 @@
 #include "cuda.h"
 #include "cudss.h"
 
+#define SPLIT_SM_FOR_BARRIER
+
 namespace cuopt::linear_programming::dual_simplex {
 
 template <typename i_t, typename f_t>

From 04aa7bcbf396a9a95690e22421dbed9d7a0d3fa1 Mon Sep 17 00:00:00 2001
From: Ramakrishna Prabhu <ramakrishnap@nvidia.com>
Date: Mon, 6 Oct 2025 11:15:10 -0500
Subject: [PATCH 02/40] update to docs

---
 cpp/src/dual_simplex/device_sparse_matrix.cuh |   2 +-
 .../source/cuopt-c/lp-milp/lp-milp-c-api.rst  |  26 ++++
 .../routing/routing-example.ipynb             | 140 ++++++++++++++----
 docs/cuopt/source/introduction.rst            |  11 +-
 docs/cuopt/source/lp-features.rst             |   9 +-
 docs/cuopt/source/lp-milp-settings.rst        |  86 ++++++++++-
 docs/cuopt/source/milp-features.rst           |   1 +
 docs/cuopt/source/system-requirements.rst     |   1 +
 .../thirdparty_modeling_languages/index.rst   |   7 +
 9 files changed, 239 insertions(+), 44 deletions(-)

diff --git a/cpp/src/dual_simplex/device_sparse_matrix.cuh b/cpp/src/dual_simplex/device_sparse_matrix.cuh
index f347f956b..edae7af36 100644
--- a/cpp/src/dual_simplex/device_sparse_matrix.cuh
+++ b/cpp/src/dual_simplex/device_sparse_matrix.cuh
@@ -184,7 +184,7 @@ class device_csc_matrix_t {
 
     // Inclusive cumulative sum to have the corresponding column for each entry
     rmm::device_buffer d_temp_storage;
-    size_t temp_storage_bytes;
+    size_t temp_storage_bytes = 0;
     cub::DeviceScan::InclusiveSum(
       nullptr, temp_storage_bytes, col_index.data(), col_index.data(), col_index.size(), stream);
     d_temp_storage.resize(temp_storage_bytes, stream);
diff --git a/docs/cuopt/source/cuopt-c/lp-milp/lp-milp-c-api.rst b/docs/cuopt/source/cuopt-c/lp-milp/lp-milp-c-api.rst
index 6d942bde6..7c4be9834 100644
--- a/docs/cuopt/source/cuopt-c/lp-milp/lp-milp-c-api.rst
+++ b/docs/cuopt/source/cuopt-c/lp-milp/lp-milp-c-api.rst
@@ -16,6 +16,13 @@ You may use the following functions to determine the number of bytes used to rep
 .. doxygenfunction:: cuOptGetIntSize
 .. doxygenfunction:: cuOptGetFloatSize
 
+Version Information
+-------------------
+
+You may use the following function to get the version of the cuOpt library
+
+.. doxygenfunction:: cuOptGetVersion
+
 Status Codes
 ------------
 
@@ -25,6 +32,9 @@ Every function in the C API returns a status code that indicates success or fail
 .. doxygendefine:: CUOPT_INVALID_ARGUMENT
 .. doxygendefine:: CUOPT_MPS_FILE_ERROR
 .. doxygendefine:: CUOPT_MPS_PARSE_ERROR
+.. doxygendefine:: CUOPT_VALIDATION_ERROR
+.. doxygendefine:: CUOPT_OUT_OF_MEMORY
+.. doxygendefine:: CUOPT_RUNTIME_ERROR
 
 Optimization Problem
 --------------------
@@ -156,9 +166,21 @@ These constants are used as parameter names in the :c:func:`cuOptSetParameter`,
 .. doxygendefine:: CUOPT_MIP_ABSOLUTE_TOLERANCE
 .. doxygendefine:: CUOPT_MIP_RELATIVE_TOLERANCE
 .. doxygendefine:: CUOPT_MIP_INTEGRALITY_TOLERANCE
+.. doxygendefine:: CUOPT_MIP_ABSOLUTE_GAP
+.. doxygendefine:: CUOPT_MIP_RELATIVE_GAP
 .. doxygendefine:: CUOPT_MIP_SCALING
 .. doxygendefine:: CUOPT_MIP_HEURISTICS_ONLY
+.. doxygendefine:: CUOPT_MIP_PRESOLVE
 .. doxygendefine:: CUOPT_PRESOLVE
+.. doxygendefine:: CUOPT_LOG_TO_CONSOLE
+.. doxygendefine:: CUOPT_CROSSOVER
+.. doxygendefine:: CUOPT_FOLDING
+.. doxygendefine:: CUOPT_AUGMENTED
+.. doxygendefine:: CUOPT_DUALIZE
+.. doxygendefine:: CUOPT_ORDERING
+.. doxygendefine:: CUOPT_ELIMINATE_DENSE_COLUMNS
+.. doxygendefine:: CUOPT_CUDSS_DETERMINISTIC
+.. doxygendefine:: CUOPT_DUAL_POSTSOLVE
 .. doxygendefine:: CUOPT_SOLUTION_FILE
 .. doxygendefine:: CUOPT_NUM_CPU_THREADS
 .. doxygendefine:: CUOPT_USER_PROBLEM_FILE
@@ -186,6 +208,7 @@ These constants are used to configure `CUOPT_METHOD` via :c:func:`cuOptSetIntege
 .. doxygendefine:: CUOPT_METHOD_CONCURRENT
 .. doxygendefine:: CUOPT_METHOD_PDLP
 .. doxygendefine:: CUOPT_METHOD_DUAL_SIMPLEX
+.. doxygendefine:: CUOPT_METHOD_BARRIER
 
 
 Solving an LP or MIP
@@ -206,12 +229,15 @@ The output of a solve is a `cuOptSolution` object.
 The following functions may be used to access information from a `cuOptSolution`
 
 .. doxygenfunction:: cuOptGetTerminationStatus
+.. doxygenfunction:: cuOptGetErrorStatus
+.. doxygenfunction:: cuOptGetErrorString
 .. doxygenfunction:: cuOptGetPrimalSolution
 .. doxygenfunction:: cuOptGetObjectiveValue
 .. doxygenfunction:: cuOptGetSolveTime
 .. doxygenfunction:: cuOptGetMIPGap
 .. doxygenfunction:: cuOptGetSolutionBound
 .. doxygenfunction:: cuOptGetDualSolution
+.. doxygenfunction:: cuOptGetDualObjectiveValue
 .. doxygenfunction:: cuOptGetReducedCosts
 
 When you are finished with a `cuOptSolution` object you should destory it with
diff --git a/docs/cuopt/source/cuopt-python/routing/routing-example.ipynb b/docs/cuopt/source/cuopt-python/routing/routing-example.ipynb
index 2cf903c46..b376ac8e4 100644
--- a/docs/cuopt/source/cuopt-python/routing/routing-example.ipynb
+++ b/docs/cuopt/source/cuopt-python/routing/routing-example.ipynb
@@ -12,10 +12,62 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 36,
+   "execution_count": 1,
    "id": "2cb694f7",
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/home/luffy/.local/lib/python3.12/site-packages/cudf/utils/_ptxcompiler.py:64: UserWarning: Error getting driver and runtime versions:\n",
+      "\n",
+      "stdout:\n",
+      "\n",
+      "\n",
+      "\n",
+      "stderr:\n",
+      "\n",
+      "Traceback (most recent call last):\n",
+      "  File \"<string>\", line 4, in <module>\n",
+      "  File \"/home/luffy/miniforge3/envs/cuopt/lib/python3.12/site-packages/numba_cuda/numba/cuda/cudadrv/driver.py\", line 393, in safe_cuda_api_call\n",
+      "    return self._check_cuda_python_error(fname, libfn(*args))\n",
+      "                                                ^^^^^^^^^^^^\n",
+      "TypeError: cuDriverGetVersion() takes no arguments (1 given)\n",
+      "\n",
+      "\n",
+      "Not patching Numba\n",
+      "  warnings.warn(msg, UserWarning)\n",
+      "/home/luffy/.local/lib/python3.12/site-packages/cupy/_environment.py:596: UserWarning: \n",
+      "--------------------------------------------------------------------------------\n",
+      "\n",
+      "  CuPy may not function correctly because multiple CuPy packages are installed\n",
+      "  in your environment:\n",
+      "\n",
+      "    cupy, cupy-cuda12x\n",
+      "\n",
+      "  Follow these steps to resolve this issue:\n",
+      "\n",
+      "    1. For all packages listed above, run the following command to remove all\n",
+      "       existing CuPy installations:\n",
+      "\n",
+      "         $ pip uninstall <package_name>\n",
+      "\n",
+      "      If you previously installed CuPy via conda, also run the following:\n",
+      "\n",
+      "         $ conda uninstall cupy\n",
+      "\n",
+      "    2. Install the appropriate CuPy package.\n",
+      "       Refer to the Installation Guide for detailed instructions.\n",
+      "\n",
+      "         https://docs.cupy.dev/en/stable/install.html\n",
+      "\n",
+      "--------------------------------------------------------------------------------\n",
+      "\n",
+      "  warnings.warn(f'''\n"
+     ]
+    }
+   ],
    "source": [
     "from cuopt import routing\n",
     "from cuopt import distance_engine\n",
@@ -61,7 +113,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 37,
+   "execution_count": 2,
    "id": "5d12f05d",
    "metadata": {},
    "outputs": [],
@@ -100,7 +152,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 38,
+   "execution_count": 3,
    "id": "2c824c99",
    "metadata": {},
    "outputs": [],
@@ -122,7 +174,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 39,
+   "execution_count": 4,
    "id": "4e08f664",
    "metadata": {},
    "outputs": [],
@@ -152,22 +204,50 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 40,
+   "execution_count": 5,
    "id": "9975bf1a",
    "metadata": {},
    "outputs": [
     {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Waypoint graph node to time matrix index mapping \n",
-      "{np.int64(0): 0, np.int64(4): 1, np.int64(5): 2, np.int64(6): 3}\n",
-      "\n",
-      "     0    1    2    3\n",
-      "0  0.0  6.0  4.0  6.0\n",
-      "1  6.0  0.0  4.0  6.0\n",
-      "2  4.0  4.0  0.0  4.0\n",
-      "3  6.0  6.0  4.0  0.0\n"
+     "ename": "RuntimeError",
+     "evalue": "CuPy failed to load libnvrtc.so.12: OSError: libnvrtc.so.12: cannot open shared object file: No such file or directory",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[31m---------------------------------------------------------------------------\u001b[39m",
+      "\u001b[31mOSError\u001b[39m                                   Traceback (most recent call last)",
+      "\u001b[36mFile \u001b[39m\u001b[32mcupy_backends/cuda/_softlink.pyx:25\u001b[39m, in \u001b[36mcupy_backends.cuda._softlink.SoftLink.__init__\u001b[39m\u001b[34m()\u001b[39m\n",
+      "\u001b[36mFile \u001b[39m\u001b[32m~/miniforge3/envs/cuopt/lib/python3.12/ctypes/__init__.py:379\u001b[39m, in \u001b[36mCDLL.__init__\u001b[39m\u001b[34m(self, name, mode, handle, use_errno, use_last_error, winmode)\u001b[39m\n\u001b[32m    378\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m handle \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[32m--> \u001b[39m\u001b[32m379\u001b[39m     \u001b[38;5;28mself\u001b[39m._handle = \u001b[43m_dlopen\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43m_name\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmode\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m    380\u001b[39m \u001b[38;5;28;01melse\u001b[39;00m:\n",
+      "\u001b[31mOSError\u001b[39m: libnvrtc.so.12: cannot open shared object file: No such file or directory",
+      "\nThe above exception was the direct cause of the following exception:\n",
+      "\u001b[31mRuntimeError\u001b[39m                              Traceback (most recent call last)",
+      "\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[5]\u001b[39m\u001b[32m, line 6\u001b[39m\n\u001b[32m      1\u001b[39m waypoint_graph = distance_engine.WaypointMatrix(\n\u001b[32m      2\u001b[39m     offsets,\n\u001b[32m      3\u001b[39m     edges,\n\u001b[32m      4\u001b[39m     weights\n\u001b[32m      5\u001b[39m )\n\u001b[32m----> \u001b[39m\u001b[32m6\u001b[39m cost_matrix = \u001b[43mwaypoint_graph\u001b[49m\u001b[43m.\u001b[49m\u001b[43mcompute_cost_matrix\u001b[49m\u001b[43m(\u001b[49m\u001b[43mtarget_locations\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m      7\u001b[39m transit_time_matrix = cost_matrix.copy(deep=\u001b[38;5;28;01mTrue\u001b[39;00m)\n\u001b[32m      8\u001b[39m target_map = {v:k \u001b[38;5;28;01mfor\u001b[39;00m k, v \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28menumerate\u001b[39m(target_locations)}\n",
+      "\u001b[36mFile \u001b[39m\u001b[32m~/miniforge3/envs/cuopt/lib/python3.12/site-packages/cuopt/utilities/exception_handler.py:60\u001b[39m, in \u001b[36mcatch_cuopt_exception.<locals>.func\u001b[39m\u001b[34m(*args, **kwargs)\u001b[39m\n\u001b[32m     58\u001b[39m             \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mRuntimeError\u001b[39;00m(err[\u001b[33m\"\u001b[39m\u001b[33mmsg\u001b[39m\u001b[33m\"\u001b[39m])\n\u001b[32m     59\u001b[39m     \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[32m---> \u001b[39m\u001b[32m60\u001b[39m         \u001b[38;5;28;01mraise\u001b[39;00m e\n\u001b[32m     61\u001b[39m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[32m     62\u001b[39m     \u001b[38;5;28;01mraise\u001b[39;00m e\n",
+      "\u001b[36mFile \u001b[39m\u001b[32m~/miniforge3/envs/cuopt/lib/python3.12/site-packages/cuopt/utilities/exception_handler.py:36\u001b[39m, in \u001b[36mcatch_cuopt_exception.<locals>.func\u001b[39m\u001b[34m(*args, **kwargs)\u001b[39m\n\u001b[32m     33\u001b[39m \u001b[38;5;129m@functools\u001b[39m.wraps(f)\n\u001b[32m     34\u001b[39m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34mfunc\u001b[39m(*args, **kwargs):\n\u001b[32m     35\u001b[39m     \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[32m---> \u001b[39m\u001b[32m36\u001b[39m         \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mf\u001b[49m\u001b[43m(\u001b[49m\u001b[43m*\u001b[49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m*\u001b[49m\u001b[43m*\u001b[49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m     37\u001b[39m     \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mRuntimeError\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[32m     38\u001b[39m         err_msg = \u001b[38;5;28mstr\u001b[39m(e)\n",
+      "\u001b[36mFile \u001b[39m\u001b[32m~/miniforge3/envs/cuopt/lib/python3.12/site-packages/cuopt/distance_engine/waypoint_matrix.py:133\u001b[39m, in \u001b[36mWaypointMatrix.compute_cost_matrix\u001b[39m\u001b[34m(self, target_locations)\u001b[39m\n\u001b[32m    130\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m target_locations.shape[\u001b[32m0\u001b[39m] <= \u001b[32m0\u001b[39m:\n\u001b[32m    131\u001b[39m     \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\u001b[33m\"\"\"\u001b[39m\u001b[33mTarget_locations length must be positive\u001b[39m\u001b[33m\"\"\"\u001b[39m)\n\u001b[32m--> \u001b[39m\u001b[32m133\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43msuper\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[43m.\u001b[49m\u001b[43mcompute_cost_matrix\u001b[49m\u001b[43m(\u001b[49m\u001b[43mtarget_locations\u001b[49m\u001b[43m)\u001b[49m\n",
+      "\u001b[36mFile \u001b[39m\u001b[32m~/miniforge3/envs/cuopt/lib/python3.12/site-packages/cuopt/distance_engine/waypoint_matrix_wrapper.pyx:81\u001b[39m, in \u001b[36mcuopt.distance_engine.waypoint_matrix_wrapper.WaypointMatrix.compute_cost_matrix\u001b[39m\u001b[34m()\u001b[39m\n",
+      "\u001b[36mFile \u001b[39m\u001b[32m~/.local/lib/python3.12/site-packages/cudf/utils/performance_tracking.py:51\u001b[39m, in \u001b[36m_performance_tracking.<locals>.wrapper\u001b[39m\u001b[34m(*args, **kwargs)\u001b[39m\n\u001b[32m     43\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m nvtx.enabled():\n\u001b[32m     44\u001b[39m     stack.enter_context(\n\u001b[32m     45\u001b[39m         nvtx.annotate(\n\u001b[32m     46\u001b[39m             message=func.\u001b[34m__qualname__\u001b[39m,\n\u001b[32m   (...)\u001b[39m\u001b[32m     49\u001b[39m         )\n\u001b[32m     50\u001b[39m     )\n\u001b[32m---> \u001b[39m\u001b[32m51\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[43m*\u001b[49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m*\u001b[49m\u001b[43m*\u001b[49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
+      "\u001b[36mFile \u001b[39m\u001b[32m~/.local/lib/python3.12/site-packages/cudf/core/dataframe.py:810\u001b[39m, in \u001b[36mDataFrame.__init__\u001b[39m\u001b[34m(self, data, index, columns, dtype, copy, nan_as_null)\u001b[39m\n\u001b[32m    808\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[33m\"\u001b[39m\u001b[33mdescr\u001b[39m\u001b[33m\"\u001b[39m \u001b[38;5;129;01min\u001b[39;00m arr_interface:\n\u001b[32m    809\u001b[39m     \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mlen\u001b[39m(arr_interface[\u001b[33m\"\u001b[39m\u001b[33mdescr\u001b[39m\u001b[33m\"\u001b[39m]) == \u001b[32m1\u001b[39m:\n\u001b[32m--> \u001b[39m\u001b[32m810\u001b[39m         new_df = \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43m_from_arrays\u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m    811\u001b[39m \u001b[43m            \u001b[49m\u001b[43mdata\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mindex\u001b[49m\u001b[43m=\u001b[49m\u001b[43mindex\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcolumns\u001b[49m\u001b[43m=\u001b[49m\u001b[43mcolumns\u001b[49m\n\u001b[32m    812\u001b[39m \u001b[43m        \u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m    813\u001b[39m     \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[32m    814\u001b[39m         new_df = \u001b[38;5;28mself\u001b[39m.from_records(\n\u001b[32m    815\u001b[39m             data, index=index, columns=columns\n\u001b[32m    816\u001b[39m         )\n",
+      "\u001b[36mFile \u001b[39m\u001b[32m~/.local/lib/python3.12/site-packages/cudf/utils/performance_tracking.py:51\u001b[39m, in \u001b[36m_performance_tracking.<locals>.wrapper\u001b[39m\u001b[34m(*args, **kwargs)\u001b[39m\n\u001b[32m     43\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m nvtx.enabled():\n\u001b[32m     44\u001b[39m     stack.enter_context(\n\u001b[32m     45\u001b[39m         nvtx.annotate(\n\u001b[32m     46\u001b[39m             message=func.\u001b[34m__qualname__\u001b[39m,\n\u001b[32m   (...)\u001b[39m\u001b[32m     49\u001b[39m         )\n\u001b[32m     50\u001b[39m     )\n\u001b[32m---> \u001b[39m\u001b[32m51\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[43m*\u001b[49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m*\u001b[49m\u001b[43m*\u001b[49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
+      "\u001b[36mFile \u001b[39m\u001b[32m~/.local/lib/python3.12/site-packages/cudf/core/dataframe.py:5947\u001b[39m, in \u001b[36mDataFrame._from_arrays\u001b[39m\u001b[34m(cls, data, index, columns, nan_as_null)\u001b[39m\n\u001b[32m   5945\u001b[39m array_data: np.ndarray | cupy.ndarray\n\u001b[32m   5946\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mhasattr\u001b[39m(data, \u001b[33m\"\u001b[39m\u001b[33m__cuda_array_interface__\u001b[39m\u001b[33m\"\u001b[39m):\n\u001b[32m-> \u001b[39m\u001b[32m5947\u001b[39m     array_data = \u001b[43mcupy\u001b[49m\u001b[43m.\u001b[49m\u001b[43masarray\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdata\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43morder\u001b[49m\u001b[43m=\u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mF\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[32m   5948\u001b[39m \u001b[38;5;28;01melif\u001b[39;00m \u001b[38;5;28mhasattr\u001b[39m(data, \u001b[33m\"\u001b[39m\u001b[33m__array_interface__\u001b[39m\u001b[33m\"\u001b[39m):\n\u001b[32m   5949\u001b[39m     array_data = np.asarray(data, order=\u001b[33m\"\u001b[39m\u001b[33mF\u001b[39m\u001b[33m\"\u001b[39m)\n",
+      "\u001b[36mFile \u001b[39m\u001b[32m~/.local/lib/python3.12/site-packages/cupy/_creation/from_data.py:88\u001b[39m, in \u001b[36masarray\u001b[39m\u001b[34m(a, dtype, order, blocking)\u001b[39m\n\u001b[32m     56\u001b[39m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34masarray\u001b[39m(a, dtype=\u001b[38;5;28;01mNone\u001b[39;00m, order=\u001b[38;5;28;01mNone\u001b[39;00m, *, blocking=\u001b[38;5;28;01mFalse\u001b[39;00m):\n\u001b[32m     57\u001b[39m \u001b[38;5;250m    \u001b[39m\u001b[33;03m\"\"\"Converts an object to array.\u001b[39;00m\n\u001b[32m     58\u001b[39m \n\u001b[32m     59\u001b[39m \u001b[33;03m    This is equivalent to ``array(a, dtype, copy=False, order=order)``.\u001b[39;00m\n\u001b[32m   (...)\u001b[39m\u001b[32m     86\u001b[39m \n\u001b[32m     87\u001b[39m \u001b[33;03m    \"\"\"\u001b[39;00m\n\u001b[32m---> \u001b[39m\u001b[32m88\u001b[39m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43m_core\u001b[49m\u001b[43m.\u001b[49m\u001b[43marray\u001b[49m\u001b[43m(\u001b[49m\u001b[43ma\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdtype\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43morder\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mblocking\u001b[49m\u001b[43m=\u001b[49m\u001b[43mblocking\u001b[49m\u001b[43m)\u001b[49m\n",
+      "\u001b[36mFile \u001b[39m\u001b[32mcupy/_core/core.pyx:2502\u001b[39m, in \u001b[36mcupy._core.core.array\u001b[39m\u001b[34m()\u001b[39m\n",
+      "\u001b[36mFile \u001b[39m\u001b[32mcupy/_core/core.pyx:2512\u001b[39m, in \u001b[36mcupy._core.core.array\u001b[39m\u001b[34m()\u001b[39m\n",
+      "\u001b[36mFile \u001b[39m\u001b[32mcupy/_core/core.pyx:2543\u001b[39m, in \u001b[36mcupy._core.core._array_from_cupy_ndarray\u001b[39m\u001b[34m()\u001b[39m\n",
+      "\u001b[36mFile \u001b[39m\u001b[32mcupy/_core/core.pyx:618\u001b[39m, in \u001b[36mcupy._core.core._ndarray_base.astype\u001b[39m\u001b[34m()\u001b[39m\n",
+      "\u001b[36mFile \u001b[39m\u001b[32mcupy/_core/core.pyx:686\u001b[39m, in \u001b[36mcupy._core.core._ndarray_base.astype\u001b[39m\u001b[34m()\u001b[39m\n",
+      "\u001b[36mFile \u001b[39m\u001b[32mcupy/_core/_kernel.pyx:1374\u001b[39m, in \u001b[36mcupy._core._kernel.ufunc.__call__\u001b[39m\u001b[34m()\u001b[39m\n",
+      "\u001b[36mFile \u001b[39m\u001b[32mcupy/_core/_kernel.pyx:1401\u001b[39m, in \u001b[36mcupy._core._kernel.ufunc._get_ufunc_kernel\u001b[39m\u001b[34m()\u001b[39m\n",
+      "\u001b[36mFile \u001b[39m\u001b[32mcupy/_core/_kernel.pyx:1082\u001b[39m, in \u001b[36mcupy._core._kernel._get_ufunc_kernel\u001b[39m\u001b[34m()\u001b[39m\n",
+      "\u001b[36mFile \u001b[39m\u001b[32mcupy/_core/_kernel.pyx:94\u001b[39m, in \u001b[36mcupy._core._kernel._get_simple_elementwise_kernel\u001b[39m\u001b[34m()\u001b[39m\n",
+      "\u001b[36mFile \u001b[39m\u001b[32mcupy/_core/_kernel.pyx:82\u001b[39m, in \u001b[36mcupy._core._kernel._get_simple_elementwise_kernel_from_code\u001b[39m\u001b[34m()\u001b[39m\n",
+      "\u001b[36mFile \u001b[39m\u001b[32mcupy/_core/core.pyx:2375\u001b[39m, in \u001b[36mcupy._core.core.compile_with_cache\u001b[39m\u001b[34m()\u001b[39m\n",
+      "\u001b[36mFile \u001b[39m\u001b[32mcupy/_core/core.pyx:2320\u001b[39m, in \u001b[36mcupy._core.core.assemble_cupy_compiler_options\u001b[39m\u001b[34m()\u001b[39m\n",
+      "\u001b[36mFile \u001b[39m\u001b[32mcupy_backends/cuda/libs/nvrtc.pyx:57\u001b[39m, in \u001b[36mcupy_backends.cuda.libs.nvrtc.getVersion\u001b[39m\u001b[34m()\u001b[39m\n",
+      "\u001b[36mFile \u001b[39m\u001b[32mcupy_backends/cuda/libs/_cnvrtc.pxi:72\u001b[39m, in \u001b[36mcupy_backends.cuda.libs.nvrtc.initialize\u001b[39m\u001b[34m()\u001b[39m\n",
+      "\u001b[36mFile \u001b[39m\u001b[32mcupy_backends/cuda/libs/_cnvrtc.pxi:75\u001b[39m, in \u001b[36mcupy_backends.cuda.libs.nvrtc._initialize\u001b[39m\u001b[34m()\u001b[39m\n",
+      "\u001b[36mFile \u001b[39m\u001b[32mcupy_backends/cuda/libs/_cnvrtc.pxi:153\u001b[39m, in \u001b[36mcupy_backends.cuda.libs.nvrtc._get_softlink\u001b[39m\u001b[34m()\u001b[39m\n",
+      "\u001b[36mFile \u001b[39m\u001b[32mcupy_backends/cuda/_softlink.pyx:32\u001b[39m, in \u001b[36mcupy_backends.cuda._softlink.SoftLink.__init__\u001b[39m\u001b[34m()\u001b[39m\n",
+      "\u001b[31mRuntimeError\u001b[39m: CuPy failed to load libnvrtc.so.12: OSError: libnvrtc.so.12: cannot open shared object file: No such file or directory"
      ]
     }
    ],
@@ -230,7 +310,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 41,
+   "execution_count": null,
    "id": "72b715c7",
    "metadata": {},
    "outputs": [
@@ -409,7 +489,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 42,
+   "execution_count": null,
    "id": "9e17e899",
    "metadata": {},
    "outputs": [
@@ -496,7 +576,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 43,
+   "execution_count": null,
    "id": "2e765325",
    "metadata": {},
    "outputs": [],
@@ -525,7 +605,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 44,
+   "execution_count": null,
    "id": "c936b137",
    "metadata": {},
    "outputs": [
@@ -567,7 +647,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 45,
+   "execution_count": null,
    "id": "87c2d9f8",
    "metadata": {},
    "outputs": [],
@@ -596,7 +676,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 46,
+   "execution_count": null,
    "id": "1d325f4b",
    "metadata": {},
    "outputs": [
@@ -642,7 +722,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 47,
+   "execution_count": null,
    "id": "064978ca",
    "metadata": {},
    "outputs": [],
@@ -666,7 +746,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 48,
+   "execution_count": null,
    "id": "b3f328e3",
    "metadata": {},
    "outputs": [],
@@ -708,7 +788,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 49,
+   "execution_count": null,
    "id": "a6babc11",
    "metadata": {
     "scrolled": true
@@ -732,7 +812,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 50,
+   "execution_count": null,
    "id": "28a05ace",
    "metadata": {},
    "outputs": [
@@ -792,7 +872,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 51,
+   "execution_count": null,
    "id": "e0d98709",
    "metadata": {},
    "outputs": [
@@ -838,7 +918,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 52,
+   "execution_count": null,
    "id": "c13cfbf3",
    "metadata": {
     "scrolled": true
@@ -945,7 +1025,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.12.10"
+   "version": "3.12.11"
   }
  },
  "nbformat": 4,
diff --git a/docs/cuopt/source/introduction.rst b/docs/cuopt/source/introduction.rst
index 100282128..af28b8ded 100644
--- a/docs/cuopt/source/introduction.rst
+++ b/docs/cuopt/source/introduction.rst
@@ -66,9 +66,15 @@ This is a linear program.
 
 How cuOpt Solves the Linear Programming Problem
 ------------------------------------------------
-cuOpt includes an LP solver based on `PDLP <https://arxiv.org/abs/2106.04756>`__, a new First-Order Method (FOM) used to solve large-scale LPs. This solver implements gradient descent, enhanced by heuristics, and performing massively parallel operations efficiently by leveraging the latest NVIDIA GPUs.
+cuOpt includes three LP solving methods:
 
-In addition to PDLP, cuOpt includes a dual simplex solver that runs on the CPU. Both algorithms can be run concurrently on the GPU and CPU.
+* **PDLP**: Based on `PDLP <https://arxiv.org/abs/2106.04756>`__, a First-Order Method (FOM) for solving large-scale LPs. This solver implements gradient descent, enhanced by heuristics, performing massively parallel operations efficiently by leveraging NVIDIA GPUs.
+
+* **Barrier (Interior-Point)**: A primal-dual interior-point method that uses GPU-accelerated sparse Cholesky and LDLT solves via cuDSS, and sparse matrix operations via cuSparse. The barrier method includes advanced techniques like folding, dense column elimination, and color refinement for weighted graphs.
+
+* **Dual Simplex**: A CPU-based dual simplex solver for small to medium-sized problems.
+
+All three algorithms can be run concurrently on both GPU and CPU, with the fastest solution returned automatically.
 
 Mixed Integer Linear Programming (MILP)
 =========================================
@@ -121,6 +127,7 @@ cuOpt supports the following APIs:
    - `AMPL <https://www.ampl.com/>`_
    - `GAMS <https://www.gams.com/>`_
    - `PuLP <https://pypi.org/project/PuLP/>`_
+   - `JuMP <https://github.com/jump-dev/cuOpt.jl>`_
 
 
 ==================================
diff --git a/docs/cuopt/source/lp-features.rst b/docs/cuopt/source/lp-features.rst
index c5d589907..610371b32 100644
--- a/docs/cuopt/source/lp-features.rst
+++ b/docs/cuopt/source/lp-features.rst
@@ -13,6 +13,7 @@ The LP solver can be accessed in the following ways:
    -  AMPL
    -  GAMS
    -  PuLP
+   -  JuMP
 
 - **C API**: A native C API that provides direct low-level access to cuOpt's LP capabilities, enabling integration into any application or system that can interface with C.
 
@@ -65,9 +66,11 @@ Users can control how the solver will operate by specifying the PDLP solver mode
 Method
 ------
 
-**Concurrent**: The default method for solving linear programs. When concurrent is selected, cuOpt runs two algorithms at the same time: PDLP on the GPU and dual simplex on the CPU. A solution is returned from the algorithm that finishes first.
+**Concurrent**: The default method for solving linear programs. When concurrent is selected, cuOpt runs three algorithms in parallel: PDLP on the GPU, barrier (interior-point) on the GPU, and dual simplex on the CPU. A solution is returned from the algorithm that finishes first.
 
-**PDLP**: Primal-Dual Hybrid Gradient for Linear Program is an algorithm for solving large-scale linear programming problems on the GPU. PDLP does not attempt to any matrix factorizations during the course of the solve. Select this method if your LP is so large that factorization will not fit into memory. By default PDLP solves to low relative tolerance and the solutions it returns do not lie at a vertex of the feasible region. Enable crossover to obtain a highly accurate basic solution from a PDLP solution.
+**PDLP**: Primal-Dual Hybrid Gradient for Linear Program is an algorithm for solving large-scale linear programming problems on the GPU. PDLP does not attempt any matrix factorizations during the course of the solve. Select this method if your LP is so large that factorization will not fit into memory. By default PDLP solves to low relative tolerance and the solutions it returns do not lie at a vertex of the feasible region. Enable crossover to obtain a highly accurate basic solution from a PDLP solution.
+
+**Barrier**: The barrier method (also known as interior-point method) solves linear programs using a primal-dual interior-point algorithm. This method uses GPU-accelerated sparse Cholesky and sparse LDLT solves via cuDSS, and GPU-accelerated sparse matrix-vector and matrix-matrix operations via cuSparse. Barrier is particularly effective for large-scale problems and can automatically apply techniques like folding and dense column elimination to improve performance. The method can solve either the augmented system or the normal equations (ADAT), and supports advanced features like color refinement for weighted graphs. Crossover can be enabled to convert the barrier solution to a basic solution.
 
 **Dual Simplex**: Dual simplex is the simplex method applied to the dual of the linear program. Dual simplex requires the basis factorization of linear program fit into memory. Select this method if your LP is small to medium sized, or if you require a high-quality basic solution.
 
@@ -75,7 +78,7 @@ Method
 Crossover
 ---------
 
-Crossover allows you to obtain a high-quality basic solution from the results of a PDLP solve. More details can be found :ref:`here <crossover>`.
+Crossover allows you to obtain a high-quality basic solution from the results of a PDLP or barrier solve. When enabled, crossover converts an interior-point solution to a vertex solution (basic solution) with high accuracy. More details can be found :ref:`here <crossover>`.
 
 
 Presolve
diff --git a/docs/cuopt/source/lp-milp-settings.rst b/docs/cuopt/source/lp-milp-settings.rst
index 258695419..2e72f5ff0 100644
--- a/docs/cuopt/source/lp-milp-settings.rst
+++ b/docs/cuopt/source/lp-milp-settings.rst
@@ -78,20 +78,19 @@ We now describe the parameter settings used to control cuOpt's Linear Programmin
 Method
 ^^^^^^
 
-``CUOPT_METHOD`` controls the method to solve the linear programming problem. Three methods are available:
+``CUOPT_METHOD`` controls the method to solve the linear programming problem. Four methods are available:
 
-* ``Concurrent``: Use both PDLP and dual simplex in parallel.
+* ``Concurrent``: Use PDLP, dual simplex, and barrier in parallel (default).
 * ``PDLP``: Use the PDLP method.
 * ``Dual Simplex``: Use the dual simplex method.
+* ``Barrier``: Use the barrier (interior-point) method.
 
-Note: The default method is ``Concurrent``.
+Note: The default method is ``Concurrent``, which now includes barrier along with PDLP and dual simplex.
 
 C API users should use the constants defined in :ref:`method-constants` for this parameter.
 
 Server Thin client users should use the :class:`cuopt_sh_client.SolverMethod` for this parameter.
 
-
-
 PDLP Solver Mode
 ^^^^^^^^^^^^^^^^
 
@@ -146,14 +145,14 @@ Note: the default value is false.
 Crossover
 ^^^^^^^^^
 
-``CUOPT_CROSSOVER`` controls whether PDLP should crossover to a basic solution after a optimal solution is found.
+``CUOPT_CROSSOVER`` controls whether PDLP or barrier should crossover to a basic solution after an optimal solution is found.
 Changing this value has a significant impact on accuracy and runtime.
-By default the solutions provided by PDLP are low accuracy and may have many variables that lie
+By default the solutions provided by PDLP and barrier are interior-point solutions that may have many variables that lie
 between their bounds. Enabling crossover allows the user to obtain a high-quality basic solution
 that lies at a vertex of the feasible region. If n is the number of variables, and m is the number of
 constraints, n - m variables will be on their bounds in a basic solution.
 
-Note: the default value is false.
+Note: the default value is false. Crossover has been updated to use hypersparse solves for improved performance.
 
 Save Best Primal So Far
 ^^^^^^^^^^^^^^^^^^^^^^^
@@ -180,6 +179,75 @@ Per Constraint Residual
 
 Note: the default value is false.
 
+Barrier Solver Settings
+^^^^^^^^^^^^^^^^^^^^^^^^
+
+The following settings control the behavior of the barrier (interior-point) method:
+
+Folding
+"""""""
+
+``CUOPT_FOLDING`` controls whether to fold the linear program. Folding can reduce problem size by exploiting problem structure.
+
+* ``-1``: Automatic (default) - cuOpt decides whether to fold based on problem characteristics
+* ``0``: Folding disabled
+* ``1``: Folding enabled
+
+Note: the default value is ``-1`` (automatic).
+
+Dualize
+"""""""
+
+``CUOPT_DUALIZE`` controls whether to dualize the linear program in presolve. Dualizing can improve solve time for problems where the dual has better structure.
+
+* ``-1``: Automatic (default) - cuOpt decides whether to dualize based on problem characteristics
+* ``0``: Don't dualize
+* ``1``: Force dualize
+
+Note: the default value is ``-1`` (automatic).
+
+Ordering
+""""""""
+
+``CUOPT_ORDERING`` controls the ordering algorithm used by cuDSS for sparse factorizations. The ordering can significantly impact solve performance.
+
+* ``-1``: Automatic (default) - cuOpt selects the best ordering
+* ``0``: cuDSS default ordering
+* ``1``: AMD (Approximate Minimum Degree) ordering
+
+Note: the default value is ``-1`` (automatic).
+
+Augmented System
+""""""""""""""""
+
+``CUOPT_AUGMENTED`` controls which linear system to solve in the barrier method.
+
+* ``-1``: Automatic (default) - cuOpt selects the best formulation
+* ``0``: Solve the ADAT system (normal equations)
+* ``1``: Solve the augmented system
+
+Note: the default value is ``-1`` (automatic). The augmented system may be more stable for some problems, while ADAT may be faster for others.
+
+Eliminate Dense Columns
+""""""""""""""""""""""""
+
+``CUOPT_ELIMINATE_DENSE_COLUMNS`` controls whether to eliminate dense columns from the constraint matrix before solving. Eliminating dense columns can improve performance by reducing fill-in during factorization.
+
+* ``true``: Eliminate dense columns (default)
+* ``false``: Don't eliminate dense columns
+
+Note: the default value is ``true``.
+
+cuDSS Deterministic Mode
+"""""""""""""""""""""""""
+
+``CUOPT_CUDSS_DETERMINISTIC`` controls whether cuDSS operates in deterministic mode. Deterministic mode ensures reproducible results across runs but may be slower.
+
+* ``true``: Use deterministic mode
+* ``false``: Use non-deterministic mode (default)
+
+Note: the default value is ``false``. Enable deterministic mode if reproducibility is more important than performance.
+
 Absolute Primal Tolerance
 ^^^^^^^^^^^^^^^^^^^^^^^^^
 
@@ -329,3 +397,5 @@ If the Best Objective and the Dual Bound are both zero the gap is zero. If the b
 gap is infinity.
 
 Note: the default value is ``1e-4``.
+
+
diff --git a/docs/cuopt/source/milp-features.rst b/docs/cuopt/source/milp-features.rst
index 40eba5c40..429bc0530 100644
--- a/docs/cuopt/source/milp-features.rst
+++ b/docs/cuopt/source/milp-features.rst
@@ -13,6 +13,7 @@ The MILP solver can be accessed in the following ways:
    - AMPL
    - GAMS
    - PuLP
+   - JuMP
 
 - **C API**: A native C API that provides direct low-level access to cuOpt's MILP solver, enabling integration into any application or system that can interface with C.
 
diff --git a/docs/cuopt/source/system-requirements.rst b/docs/cuopt/source/system-requirements.rst
index e7d963ae5..7ad702194 100644
--- a/docs/cuopt/source/system-requirements.rst
+++ b/docs/cuopt/source/system-requirements.rst
@@ -47,6 +47,7 @@ Dependencies are installed automatically when using the pip and Conda installati
       - CUDA 12.2 with Driver 535.86.10+
       - CUDA 12.5 with Driver 555.42.06+
       - CUDA 12.9 with Driver 570.42.01+
+      - CUDA 13.0 with Driver 580.65.06+
 
 .. dropdown:: Recommended Requirements for Best Performance
 
diff --git a/docs/cuopt/source/thirdparty_modeling_languages/index.rst b/docs/cuopt/source/thirdparty_modeling_languages/index.rst
index 3fa6c5466..0acda399a 100644
--- a/docs/cuopt/source/thirdparty_modeling_languages/index.rst
+++ b/docs/cuopt/source/thirdparty_modeling_languages/index.rst
@@ -21,3 +21,10 @@ PuLP Support
 
 PuLP can be used with near zero code changes: simply switch to cuOpt as a solver to solve linear and mixed-integer programming problems.
 Please refer to the `PuLP documentation <https://pypi.org/project/PuLP/>`_ for more information. Also, see the example notebook in the `cuopt-examples <https://github.com/NVIDIA/cuopt-examples>`_ repository.
+
+--------------------------
+JuMP Support
+--------------------------
+
+JuMP can be used with near zero code changes: simply switch to cuOpt as a solver to solve linear and mixed-integer programming problems.
+Please refer to the `JuMP documentation <https://github.com/jump-dev/cuOpt.jl>`_ for more information.

From 74409aae2784481a0a8f89e8b10bc2ddd54342d0 Mon Sep 17 00:00:00 2001
From: Ramakrishna Prabhu <ramakrishnap@nvidia.com>
Date: Mon, 6 Oct 2025 11:15:57 -0500
Subject: [PATCH 03/40] fix style

---
 docs/cuopt/source/lp-milp-settings.rst | 2 --
 1 file changed, 2 deletions(-)

diff --git a/docs/cuopt/source/lp-milp-settings.rst b/docs/cuopt/source/lp-milp-settings.rst
index 2e72f5ff0..d556b36e8 100644
--- a/docs/cuopt/source/lp-milp-settings.rst
+++ b/docs/cuopt/source/lp-milp-settings.rst
@@ -397,5 +397,3 @@ If the Best Objective and the Dual Bound are both zero the gap is zero. If the b
 gap is infinity.
 
 Note: the default value is ``1e-4``.
-
-

From 381f7bf7a8c330fa5b9487936774b68440dad8c8 Mon Sep 17 00:00:00 2001
From: Chris Maes <cmaes@nvidia.com>
Date: Mon, 6 Oct 2025 09:24:39 -0700
Subject: [PATCH 04/40] Update introduction.rst

---
 docs/cuopt/source/introduction.rst | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/docs/cuopt/source/introduction.rst b/docs/cuopt/source/introduction.rst
index af28b8ded..b3c582168 100644
--- a/docs/cuopt/source/introduction.rst
+++ b/docs/cuopt/source/introduction.rst
@@ -68,9 +68,9 @@ How cuOpt Solves the Linear Programming Problem
 ------------------------------------------------
 cuOpt includes three LP solving methods:
 
-* **PDLP**: Based on `PDLP <https://arxiv.org/abs/2106.04756>`__, a First-Order Method (FOM) for solving large-scale LPs. This solver implements gradient descent, enhanced by heuristics, performing massively parallel operations efficiently by leveraging NVIDIA GPUs.
+* **PDLP**: Based on `PDLP <https://arxiv.org/abs/2106.04756>`__, a First-Order Method (FOM) for solving large-scale LPs. This solver implements primal-dual hybrid gradient enhanced by heuristics. Sparse matrix-vector products are perfomed efficiently on NVIDIA GPUs.
 
-* **Barrier (Interior-Point)**: A primal-dual interior-point method that uses GPU-accelerated sparse Cholesky and LDLT solves via cuDSS, and sparse matrix operations via cuSparse. The barrier method includes advanced techniques like folding, dense column elimination, and color refinement for weighted graphs.
+* **Barrier (Interior-Point)**: A primal-dual interior-point method that uses GPU-accelerated sparse Cholesky and LDLT solves via cuDSS, and sparse matrix operations via cuSparse. 
 
 * **Dual Simplex**: A CPU-based dual simplex solver for small to medium-sized problems.
 

From abcd4860cd2b4d55076bbc713197b042314239d1 Mon Sep 17 00:00:00 2001
From: Chris Maes <cmaes@nvidia.com>
Date: Mon, 6 Oct 2025 09:26:46 -0700
Subject: [PATCH 05/40] Update lp-features.rst

---
 docs/cuopt/source/lp-features.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/cuopt/source/lp-features.rst b/docs/cuopt/source/lp-features.rst
index 610371b32..d15ca8d2f 100644
--- a/docs/cuopt/source/lp-features.rst
+++ b/docs/cuopt/source/lp-features.rst
@@ -70,7 +70,7 @@ Method
 
 **PDLP**: Primal-Dual Hybrid Gradient for Linear Program is an algorithm for solving large-scale linear programming problems on the GPU. PDLP does not attempt any matrix factorizations during the course of the solve. Select this method if your LP is so large that factorization will not fit into memory. By default PDLP solves to low relative tolerance and the solutions it returns do not lie at a vertex of the feasible region. Enable crossover to obtain a highly accurate basic solution from a PDLP solution.
 
-**Barrier**: The barrier method (also known as interior-point method) solves linear programs using a primal-dual interior-point algorithm. This method uses GPU-accelerated sparse Cholesky and sparse LDLT solves via cuDSS, and GPU-accelerated sparse matrix-vector and matrix-matrix operations via cuSparse. Barrier is particularly effective for large-scale problems and can automatically apply techniques like folding and dense column elimination to improve performance. The method can solve either the augmented system or the normal equations (ADAT), and supports advanced features like color refinement for weighted graphs. Crossover can be enabled to convert the barrier solution to a basic solution.
+**Barrier**: The barrier method (also known as interior-point method) solves linear programs using a primal-dual interior-point algorithm. This method uses GPU-accelerated sparse Cholesky and sparse LDLT solves via cuDSS, and GPU-accelerated sparse matrix-vector and matrix-matrix operations via cuSparse. Barrier is particularly effective for large-scale problems and can automatically apply techniques like folding, dualization, and dense column elimination to improve performance. This method solves the linear systems at each iteration using the augmented system or the normal equations (ADAT). Crossover can be enabled to convert the barrier solution to a basic solution.
 
 **Dual Simplex**: Dual simplex is the simplex method applied to the dual of the linear program. Dual simplex requires the basis factorization of linear program fit into memory. Select this method if your LP is small to medium sized, or if you require a high-quality basic solution.
 

From 23a2046bcdecf552d220ed1138aaea192e40686b Mon Sep 17 00:00:00 2001
From: Chris Maes <cmaes@nvidia.com>
Date: Mon, 6 Oct 2025 09:27:26 -0700
Subject: [PATCH 06/40] Update lp-milp-settings.rst

---
 docs/cuopt/source/lp-milp-settings.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/cuopt/source/lp-milp-settings.rst b/docs/cuopt/source/lp-milp-settings.rst
index d556b36e8..971310a27 100644
--- a/docs/cuopt/source/lp-milp-settings.rst
+++ b/docs/cuopt/source/lp-milp-settings.rst
@@ -85,7 +85,7 @@ Method
 * ``Dual Simplex``: Use the dual simplex method.
 * ``Barrier``: Use the barrier (interior-point) method.
 
-Note: The default method is ``Concurrent``, which now includes barrier along with PDLP and dual simplex.
+Note: The default method is ``Concurrent``.
 
 C API users should use the constants defined in :ref:`method-constants` for this parameter.
 

From da63ef22b12c1256acc68e434be24e634b961828 Mon Sep 17 00:00:00 2001
From: Chris Maes <cmaes@nvidia.com>
Date: Mon, 6 Oct 2025 09:33:42 -0700
Subject: [PATCH 07/40] Update lp-milp-settings.rst

---
 docs/cuopt/source/lp-milp-settings.rst | 21 ++++++++++++---------
 1 file changed, 12 insertions(+), 9 deletions(-)

diff --git a/docs/cuopt/source/lp-milp-settings.rst b/docs/cuopt/source/lp-milp-settings.rst
index 971310a27..6f96dcfe5 100644
--- a/docs/cuopt/source/lp-milp-settings.rst
+++ b/docs/cuopt/source/lp-milp-settings.rst
@@ -147,12 +147,12 @@ Crossover
 
 ``CUOPT_CROSSOVER`` controls whether PDLP or barrier should crossover to a basic solution after an optimal solution is found.
 Changing this value has a significant impact on accuracy and runtime.
-By default the solutions provided by PDLP and barrier are interior-point solutions that may have many variables that lie
+By default the solutions provided by PDLP and barrier do not lie at a vertex and thus may have many variables that lie
 between their bounds. Enabling crossover allows the user to obtain a high-quality basic solution
 that lies at a vertex of the feasible region. If n is the number of variables, and m is the number of
 constraints, n - m variables will be on their bounds in a basic solution.
 
-Note: the default value is false. Crossover has been updated to use hypersparse solves for improved performance.
+Note: the default value is false. 
 
 Save Best Primal So Far
 ^^^^^^^^^^^^^^^^^^^^^^^
@@ -187,21 +187,21 @@ The following settings control the behavior of the barrier (interior-point) meth
 Folding
 """""""
 
-``CUOPT_FOLDING`` controls whether to fold the linear program. Folding can reduce problem size by exploiting problem structure.
+``CUOPT_FOLDING`` controls whether to fold the linear program. Folding can reduce problem size by exploiting symmetry in the problem.
 
 * ``-1``: Automatic (default) - cuOpt decides whether to fold based on problem characteristics
-* ``0``: Folding disabled
-* ``1``: Folding enabled
+* ``0``: Disable folding
+* ``1``: Force folding to run
 
 Note: the default value is ``-1`` (automatic).
 
 Dualize
 """""""
 
-``CUOPT_DUALIZE`` controls whether to dualize the linear program in presolve. Dualizing can improve solve time for problems where the dual has better structure.
+``CUOPT_DUALIZE`` controls whether to dualize the linear program in presolve. Dualizing can improve solve time for problems, with inequality constraints, where there are more constraints than variables.
 
 * ``-1``: Automatic (default) - cuOpt decides whether to dualize based on problem characteristics
-* ``0``: Don't dualize
+* ``0``: Don't attempt to dualize
 * ``1``: Force dualize
 
 Note: the default value is ``-1`` (automatic).
@@ -209,7 +209,7 @@ Note: the default value is ``-1`` (automatic).
 Ordering
 """"""""
 
-``CUOPT_ORDERING`` controls the ordering algorithm used by cuDSS for sparse factorizations. The ordering can significantly impact solve performance.
+``CUOPT_ORDERING`` controls the ordering algorithm used by cuDSS for sparse factorizations. The ordering can significantly impact solver run time.
 
 * ``-1``: Automatic (default) - cuOpt selects the best ordering
 * ``0``: cuDSS default ordering
@@ -222,7 +222,7 @@ Augmented System
 
 ``CUOPT_AUGMENTED`` controls which linear system to solve in the barrier method.
 
-* ``-1``: Automatic (default) - cuOpt selects the best formulation
+* ``-1``: Automatic (default) - cuOpt selects the best linear system
 * ``0``: Solve the ADAT system (normal equations)
 * ``1``: Solve the augmented system
 
@@ -232,10 +232,13 @@ Eliminate Dense Columns
 """"""""""""""""""""""""
 
 ``CUOPT_ELIMINATE_DENSE_COLUMNS`` controls whether to eliminate dense columns from the constraint matrix before solving. Eliminating dense columns can improve performance by reducing fill-in during factorization.
+However, extra solves must be performed at each iteration.
 
 * ``true``: Eliminate dense columns (default)
 * ``false``: Don't eliminate dense columns
 
+This setting only has an effect when the ADAT (normal equation) system is solved.
+
 Note: the default value is ``true``.
 
 cuDSS Deterministic Mode

From a4d1f2b64da822c67a7ed0c6fa6cbe2d54fb0e78 Mon Sep 17 00:00:00 2001
From: Chris Maes <cmaes@nvidia.com>
Date: Mon, 6 Oct 2025 09:37:18 -0700
Subject: [PATCH 08/40] Update lp-features.rst

---
 docs/cuopt/source/lp-features.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/cuopt/source/lp-features.rst b/docs/cuopt/source/lp-features.rst
index d15ca8d2f..9bc6463ce 100644
--- a/docs/cuopt/source/lp-features.rst
+++ b/docs/cuopt/source/lp-features.rst
@@ -70,7 +70,7 @@ Method
 
 **PDLP**: Primal-Dual Hybrid Gradient for Linear Program is an algorithm for solving large-scale linear programming problems on the GPU. PDLP does not attempt any matrix factorizations during the course of the solve. Select this method if your LP is so large that factorization will not fit into memory. By default PDLP solves to low relative tolerance and the solutions it returns do not lie at a vertex of the feasible region. Enable crossover to obtain a highly accurate basic solution from a PDLP solution.
 
-**Barrier**: The barrier method (also known as interior-point method) solves linear programs using a primal-dual interior-point algorithm. This method uses GPU-accelerated sparse Cholesky and sparse LDLT solves via cuDSS, and GPU-accelerated sparse matrix-vector and matrix-matrix operations via cuSparse. Barrier is particularly effective for large-scale problems and can automatically apply techniques like folding, dualization, and dense column elimination to improve performance. This method solves the linear systems at each iteration using the augmented system or the normal equations (ADAT). Crossover can be enabled to convert the barrier solution to a basic solution.
+**Barrier**: The barrier method (also known as interior-point method) solves linear programs using a primal-dual predictor-corrector algorithm. This method uses GPU-accelerated sparse Cholesky and sparse LDLT solves via cuDSS, and GPU-accelerated sparse matrix-vector and matrix-matrix operations via cuSparse. Barrier is particularly effective for large-scale problems and can automatically apply techniques like folding, dualization, and dense column elimination to improve performance. This method solves the linear systems at each iteration using the augmented system or the normal equations (ADAT). Enable crossover to obtain a highly accurate basic solution from a barrier solution.
 
 **Dual Simplex**: Dual simplex is the simplex method applied to the dual of the linear program. Dual simplex requires the basis factorization of linear program fit into memory. Select this method if your LP is small to medium sized, or if you require a high-quality basic solution.
 

From 82cbb2acad976a632464b303b73baab9c310ce38 Mon Sep 17 00:00:00 2001
From: Chris Maes <cmaes@nvidia.com>
Date: Mon, 6 Oct 2025 09:38:42 -0700
Subject: [PATCH 09/40] Update lp-features.rst

---
 docs/cuopt/source/lp-features.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/cuopt/source/lp-features.rst b/docs/cuopt/source/lp-features.rst
index 9bc6463ce..fc450736b 100644
--- a/docs/cuopt/source/lp-features.rst
+++ b/docs/cuopt/source/lp-features.rst
@@ -78,7 +78,7 @@ Method
 Crossover
 ---------
 
-Crossover allows you to obtain a high-quality basic solution from the results of a PDLP or barrier solve. When enabled, crossover converts an interior-point solution to a vertex solution (basic solution) with high accuracy. More details can be found :ref:`here <crossover>`.
+Crossover allows you to obtain a high-quality basic solution from the results of a PDLP or barrier solve. When enabled, crossover converts these solutions to a vertex solution (basic solution) with high accuracy. More details can be found :ref:`here <crossover>`.
 
 
 Presolve

From 84acac3e7720898602ae078e60bcd89359f37d2d Mon Sep 17 00:00:00 2001
From: Chris Maes <cmaes@nvidia.com>
Date: Mon, 6 Oct 2025 09:43:25 -0700
Subject: [PATCH 10/40] Update lp-milp-settings.rst

---
 docs/cuopt/source/lp-milp-settings.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/cuopt/source/lp-milp-settings.rst b/docs/cuopt/source/lp-milp-settings.rst
index 6f96dcfe5..912e66cef 100644
--- a/docs/cuopt/source/lp-milp-settings.rst
+++ b/docs/cuopt/source/lp-milp-settings.rst
@@ -222,7 +222,7 @@ Augmented System
 
 ``CUOPT_AUGMENTED`` controls which linear system to solve in the barrier method.
 
-* ``-1``: Automatic (default) - cuOpt selects the best linear system
+* ``-1``: Automatic (default) - cuOpt selects the best linear system to solve
 * ``0``: Solve the ADAT system (normal equations)
 * ``1``: Solve the augmented system
 

From 54f4d4f92d67c82409598df3d4ae17bc11e7278f Mon Sep 17 00:00:00 2001
From: Ramakrishna Prabhu <ramakrishnap@nvidia.com>
Date: Mon, 6 Oct 2025 11:48:14 -0500
Subject: [PATCH 11/40] fix style

---
 docs/cuopt/source/introduction.rst     | 2 +-
 docs/cuopt/source/lp-milp-settings.rst | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/docs/cuopt/source/introduction.rst b/docs/cuopt/source/introduction.rst
index b3c582168..de24f4746 100644
--- a/docs/cuopt/source/introduction.rst
+++ b/docs/cuopt/source/introduction.rst
@@ -70,7 +70,7 @@ cuOpt includes three LP solving methods:
 
 * **PDLP**: Based on `PDLP <https://arxiv.org/abs/2106.04756>`__, a First-Order Method (FOM) for solving large-scale LPs. This solver implements primal-dual hybrid gradient enhanced by heuristics. Sparse matrix-vector products are perfomed efficiently on NVIDIA GPUs.
 
-* **Barrier (Interior-Point)**: A primal-dual interior-point method that uses GPU-accelerated sparse Cholesky and LDLT solves via cuDSS, and sparse matrix operations via cuSparse. 
+* **Barrier (Interior-Point)**: A primal-dual interior-point method that uses GPU-accelerated sparse Cholesky and LDLT solves via cuDSS, and sparse matrix operations via cuSparse.
 
 * **Dual Simplex**: A CPU-based dual simplex solver for small to medium-sized problems.
 
diff --git a/docs/cuopt/source/lp-milp-settings.rst b/docs/cuopt/source/lp-milp-settings.rst
index 912e66cef..e815960b3 100644
--- a/docs/cuopt/source/lp-milp-settings.rst
+++ b/docs/cuopt/source/lp-milp-settings.rst
@@ -152,7 +152,7 @@ between their bounds. Enabling crossover allows the user to obtain a high-qualit
 that lies at a vertex of the feasible region. If n is the number of variables, and m is the number of
 constraints, n - m variables will be on their bounds in a basic solution.
 
-Note: the default value is false. 
+Note: the default value is false.
 
 Save Best Primal So Far
 ^^^^^^^^^^^^^^^^^^^^^^^

From ceb865882fd0219d572b8c6be42f69590ac75100 Mon Sep 17 00:00:00 2001
From: Hugo Linsenmaier <hlinsenmaier@gmail.com>
Date: Mon, 6 Oct 2025 07:56:18 -0700
Subject: [PATCH 12/40] Disable explicit cuda driver calls

---
 cpp/CMakeLists.txt                       |  1 -
 cpp/src/dual_simplex/sparse_cholesky.cuh | 97 ++++++++++++++----------
 cpp/src/utilities/macros.cuh             |  4 +-
 3 files changed, 59 insertions(+), 43 deletions(-)

diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index e03d2581f..65db6f406 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -283,7 +283,6 @@ set(CUOPT_PRIVATE_CUDA_LIBS
   CUDA::curand
   CUDA::cusolver
   TBB::tbb
-  CUDA::cuda_driver
   OpenMP::OpenMP_CXX)
 
 list(PREPEND CUOPT_PRIVATE_CUDA_LIBS CUDA::cublasLt)
diff --git a/cpp/src/dual_simplex/sparse_cholesky.cuh b/cpp/src/dual_simplex/sparse_cholesky.cuh
index a751d67a6..7b814efbf 100644
--- a/cpp/src/dual_simplex/sparse_cholesky.cuh
+++ b/cpp/src/dual_simplex/sparse_cholesky.cuh
@@ -23,14 +23,12 @@
 #include "dual_simplex/tic_toc.hpp"
 
 #include <cuda_runtime.h>
+#include <utilities/driver_helpers.cuh>
 
 #include <raft/common/nvtx.hpp>
 
-#include "cuda.h"
 #include "cudss.h"
 
-#define SPLIT_SM_FOR_BARRIER
-
 namespace cuopt::linear_programming::dual_simplex {
 
 template <typename i_t, typename f_t>
@@ -159,22 +157,25 @@ class sparse_cholesky_cudss_t : public sparse_cholesky_base_t<i_t, f_t> {
     cudssGetProperty(PATCH_LEVEL, &patch);
     settings.log.printf("cuDSS Version               : %d.%d.%d\n", major, minor, patch);
 
-    CU_CHECK(cuDriverGetVersion(&driver_version));
-    settings_.log.printf("CUDA Driver Version         : %d\n", driver_version);
-
     cuda_error = cudaSuccess;
     status     = CUDSS_STATUS_SUCCESS;
 
-    if (settings_.concurrent_halt != nullptr && driver_version >= 13000) {
-#if defined(SPLIT_SM_FOR_BARRIER) && CUDART_VERSION >= 13000
+    if (CUDART_VERSION >= 13000 && settings_.concurrent_halt != nullptr) {
+      cuGetErrorString_func = cuopt::get_driver_entry_point("cuGetErrorString");
       // 1. Set up the GPU resources
       CUdevResource initial_device_GPU_resources = {};
-      CU_CHECK(cuDeviceGetDevResource(
-        handle_ptr_->get_device(), &initial_device_GPU_resources, CU_DEV_RESOURCE_TYPE_SM));
+      auto cuDeviceGetDevResource_func = cuopt::get_driver_entry_point("cuDeviceGetDevResource");
+      CU_CHECK(reinterpret_cast<decltype(::cuDeviceGetDevResource)*>(cuDeviceGetDevResource_func)(
+                 handle_ptr_->get_device(), &initial_device_GPU_resources, CU_DEV_RESOURCE_TYPE_SM),
+               reinterpret_cast<decltype(::cuGetErrorString)*>(cuGetErrorString_func));
+
 #ifdef DEBUG
-      std::cout << "Initial GPU resources retrieved via cuDeviceGetDevResource() have type "
-                << initial_device_GPU_resources.type << " and SM count "
-                << initial_device_GPU_resources.sm.smCount << std::endl;
+      printf(
+        "   Initial GPU resources retrieved via "
+        "cuDeviceGetDevResource() have type "
+        "%d and SM count %d\n",
+        initial_device_GPU_resources.type,
+        initial_device_GPU_resources.sm.smCount);
 #endif
 
       // 2. Partition the GPU resources
@@ -182,14 +183,20 @@ class sparse_cholesky_cudss_t : public sparse_cholesky_base_t<i_t, f_t> {
       auto barrier_sms = raft::alignTo(static_cast<i_t>(total_SMs * 0.75f), 8);
       CUdevResource input;
       CUdevResource resource;
+      auto cuDevSmResourceSplitByCount_func =
+        cuopt::get_driver_entry_point("cuDevSmResourceSplitByCount");
       auto n_groups  = 1u;
       auto use_flags = CU_DEV_SM_RESOURCE_SPLIT_IGNORE_SM_COSCHEDULING;  // or 0
-      CU_CHECK(cuDevSmResourceSplitByCount(
-        &resource, &n_groups, &initial_device_GPU_resources, nullptr, use_flags, barrier_sms));
+      CU_CHECK(
+        reinterpret_cast<decltype(::cuDevSmResourceSplitByCount)*>(
+          cuDevSmResourceSplitByCount_func)(
+          &resource, &n_groups, &initial_device_GPU_resources, nullptr, use_flags, barrier_sms),
+        reinterpret_cast<decltype(::cuGetErrorString)*>(cuGetErrorString_func));
 #ifdef DEBUG
       printf(
-        "   Resources were split into %d resource groups (had requested %d) with %d SMs each (had "
-        "requested %d)\n",
+        "   Resources were split into %d resource groups (had "
+        "requested %d) with %d SMs each (had "
+        "requested % d)\n",
         n_groups,
         n_groups,
         resource.sm.smCount,
@@ -198,34 +205,41 @@ class sparse_cholesky_cudss_t : public sparse_cholesky_base_t<i_t, f_t> {
       // 3. Create the resource descriptor
       auto constexpr const n_resource_desc = 1;
       CUdevResourceDesc resource_desc;
-      CU_CHECK(cuDevResourceGenerateDesc(&resource_desc, &resource, n_resource_desc));
+      auto cuDevResourceGenerateDesc_func =
+        cuopt::get_driver_entry_point("cuDevResourceGenerateDesc");
+      CU_CHECK(reinterpret_cast<decltype(::cuDevResourceGenerateDesc)*>(
+                 cuDevResourceGenerateDesc_func)(&resource_desc, &resource, n_resource_desc),
+               reinterpret_cast<decltype(::cuGetErrorString)*>(cuGetErrorString_func));
 #ifdef DEBUG
       printf(
-        "   For the resource descriptor of barrier green context we will combine %d resources of "
-        "%d "
-        "SMs each\n",
+        "   For the resource descriptor of barrier green context "
+        "we will combine %d resources of "
+        "%d SMs each\n",
         n_resource_desc,
         resource.sm.smCount);
 #endif
 
       // Only perform this if CUDA version is more than 13
-      // (all resource splitting and descriptor creation already above)
-      // No additional code needed here as the logic is already guarded above.
-      // 4. Create the green context and stream for that green context
-      // CUstream barrier_green_ctx_stream;
+      // (all resource splitting and descriptor creation already
+      // above) No additional code needed here as the logic is
+      // already guarded above.
+      // 4. Create the green context and stream for that green
+      // context CUstream barrier_green_ctx_stream;
       i_t stream_priority;
       cudaStream_t cuda_stream    = handle_ptr_->get_stream();
       cudaError_t priority_result = cudaStreamGetPriority(cuda_stream, &stream_priority);
       RAFT_CUDA_TRY(priority_result);
-      CU_CHECK(cuGreenCtxCreate(
-        &barrier_green_ctx, resource_desc, handle_ptr_->get_device(), CU_GREEN_CTX_DEFAULT_STREAM));
-      CU_CHECK(cuGreenCtxStreamCreate(
-        &stream, barrier_green_ctx, CU_STREAM_NON_BLOCKING, stream_priority));
-#endif
-    } else {
-      // Convert runtime API stream to driver API stream for consistency
-      cudaStream_t cuda_stream = handle_ptr_->get_stream();
-      stream                   = reinterpret_cast<CUstream>(cuda_stream);
+      auto cuGreenCtxCreate_func = cuopt::get_driver_entry_point("cuGreenCtxCreate");
+      CU_CHECK(reinterpret_cast<decltype(::cuGreenCtxCreate)*>(cuGreenCtxCreate_func)(
+                 &barrier_green_ctx,
+                 resource_desc,
+                 handle_ptr_->get_device(),
+                 CU_GREEN_CTX_DEFAULT_STREAM),
+               reinterpret_cast<decltype(::cuGetErrorString)*>(cuGetErrorString_func));
+      auto cuGreenCtxStreamCreate_func = cuopt::get_driver_entry_point("cuGreenCtxStreamCreate");
+      CU_CHECK(reinterpret_cast<decltype(::cuGreenCtxStreamCreate)*>(cuGreenCtxStreamCreate_func)(
+                 &stream, barrier_green_ctx, CU_STREAM_NON_BLOCKING, stream_priority),
+               reinterpret_cast<decltype(::cuGetErrorString)*>(cuGetErrorString_func));
     }
 
     CUDSS_CALL_AND_CHECK_EXIT(cudssCreate(&handle), status, "cudssCreate");
@@ -338,12 +352,15 @@ class sparse_cholesky_cudss_t : public sparse_cholesky_base_t<i_t, f_t> {
     CUDSS_CALL_AND_CHECK_EXIT(cudssConfigDestroy(solverConfig), status, "cudssConfigDestroy");
     CUDSS_CALL_AND_CHECK_EXIT(cudssDestroy(handle), status, "cudssDestroy");
     CUDA_CALL_AND_CHECK_EXIT(cudaStreamSynchronize(stream), "cudaStreamSynchronize");
-#ifdef SPLIT_SM_FOR_BARRIER
-    if (settings_.concurrent_halt != nullptr && driver_version >= 13000) {
-      CU_CHECK(cuStreamDestroy(stream));
 #if CUDART_VERSION >= 13000
-      CU_CHECK(cuGreenCtxDestroy(barrier_green_ctx));
-#endif
+    if (settings_.concurrent_halt != nullptr) {
+      auto cuStreamDestroy_func = cuopt::get_driver_entry_point("cuStreamDestroy");
+      CU_CHECK(reinterpret_cast<decltype(::cuStreamDestroy)*>(cuStreamDestroy_func)(stream),
+               reinterpret_cast<decltype(::cuGetErrorString)*>(cuGetErrorString_func));
+      auto cuGreenCtxDestroy_func = cuopt::get_driver_entry_point("cuGreenCtxDestroy");
+      CU_CHECK(
+        reinterpret_cast<decltype(::cuGreenCtxDestroy)*>(cuGreenCtxDestroy_func)(barrier_green_ctx),
+        reinterpret_cast<decltype(::cuGetErrorString)*>(cuGetErrorString_func));
       handle_ptr_->get_stream().synchronize();
     }
 #endif
@@ -798,11 +815,11 @@ class sparse_cholesky_cudss_t : public sparse_cholesky_base_t<i_t, f_t> {
   f_t* csr_values_d;
   f_t* x_values_d;
   f_t* b_values_d;
-  i_t driver_version;
 
   const simplex_solver_settings_t<i_t, f_t>& settings_;
   CUgreenCtx barrier_green_ctx;
   CUstream stream;
+  void* cuGetErrorString_func;
 };
 
 }  // namespace cuopt::linear_programming::dual_simplex
diff --git a/cpp/src/utilities/macros.cuh b/cpp/src/utilities/macros.cuh
index c641103bc..f5401d52a 100644
--- a/cpp/src/utilities/macros.cuh
+++ b/cpp/src/utilities/macros.cuh
@@ -38,12 +38,12 @@
 #endif
 
 // For CUDA Driver API
-#define CU_CHECK(expr_to_check)                                               \
+#define CU_CHECK(expr_to_check, err_func)                                     \
   do {                                                                        \
     CUresult result = expr_to_check;                                          \
     if (result != CUDA_SUCCESS) {                                             \
       const char* pErrStr;                                                    \
-      cuGetErrorString(result, &pErrStr);                                     \
+      err_func(result, &pErrStr);                                             \
       fprintf(stderr, "CUDA Error: %s:%i:%s\n", __FILE__, __LINE__, pErrStr); \
     }                                                                         \
   } while (0)

From 2be9dbec27e89e250cf2a0bf49686c4b3a88215d Mon Sep 17 00:00:00 2001
From: Hugo Linsenmaier <hlinsenmaier@gmail.com>
Date: Mon, 6 Oct 2025 10:41:57 -0700
Subject: [PATCH 13/40] Add missing file

---
 cpp/src/utilities/driver_helpers.cuh | 35 ++++++++++++++++++++++++++++
 1 file changed, 35 insertions(+)
 create mode 100644 cpp/src/utilities/driver_helpers.cuh

diff --git a/cpp/src/utilities/driver_helpers.cuh b/cpp/src/utilities/driver_helpers.cuh
new file mode 100644
index 000000000..bb6f08be5
--- /dev/null
+++ b/cpp/src/utilities/driver_helpers.cuh
@@ -0,0 +1,35 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights
+ * reserved. SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include "cuda.h"
+
+namespace cuopt {
+
+inline auto get_driver_entry_point(const char* name)
+{
+  void* func;
+  cudaDriverEntryPointQueryResult driver_status;
+  cudaGetDriverEntryPointByVersion(name, &func, CUDART_VERSION, cudaEnableDefault, &driver_status);
+  if (driver_status != cudaDriverEntryPointSuccess) {
+    fprintf(stderr, "Failed to fetch symbol for %s\n", name);
+  }
+  return func;
+}
+
+}  // namespace cuopt

From 7c523033d1dcce7b7801bf7a0c1c5f2bcc9f3e55 Mon Sep 17 00:00:00 2001
From: Ramakrishna Prabhu <ramakrishnap@nvidia.com>
Date: Mon, 6 Oct 2025 15:19:03 -0500
Subject: [PATCH 14/40] add tests

---
 .../linear_programming/test_python_API.py     | 107 +++++++++++++++++-
 .../cuopt_server/tests/test_lp.py             |  62 ++++++++++
 .../linear_programming/data_definition.py     |   2 +
 .../utils/linear_programming/solver.py        |   8 +-
 4 files changed, 174 insertions(+), 5 deletions(-)

diff --git a/python/cuopt/cuopt/tests/linear_programming/test_python_API.py b/python/cuopt/cuopt/tests/linear_programming/test_python_API.py
index 1f0ade10e..6bbd0a339 100644
--- a/python/cuopt/cuopt/tests/linear_programming/test_python_API.py
+++ b/python/cuopt/cuopt/tests/linear_programming/test_python_API.py
@@ -34,10 +34,20 @@
     sense,
 )
 from cuopt.linear_programming.solver.solver_parameters import (
+    CUOPT_AUGMENTED,
+    CUOPT_CUDSS_DETERMINISTIC,
+    CUOPT_DUALIZE,
+    CUOPT_ELIMINATE_DENSE_COLUMNS,
+    CUOPT_FOLDING,
     CUOPT_INFEASIBILITY_DETECTION,
+    CUOPT_METHOD,
+    CUOPT_ORDERING,
     CUOPT_PDLP_SOLVER_MODE,
 )
-from cuopt.linear_programming.solver_settings import PDLPSolverMode
+from cuopt.linear_programming.solver_settings import (
+    PDLPSolverMode,
+    SolverMethod,
+)
 
 RAPIDS_DATASET_ROOT_DIR = os.getenv("RAPIDS_DATASET_ROOT_DIR")
 if RAPIDS_DATASET_ROOT_DIR is None:
@@ -449,3 +459,98 @@ def test_problem_update():
     prob.updateObjective(constant=5, sense=MINIMIZE)
     prob.solve()
     assert prob.ObjValue == pytest.approx(5)
+
+
+def test_barrier_solver():
+    """
+    Test the barrier solver with different configurations.
+
+    Problem:
+        maximize   5*xs + 20*xl
+        subject to  1*xs +  3*xl <= 200
+                    3*xs +  2*xl <= 160
+                    xs, xl >= 0
+
+    Expected Solution:
+        Optimal objective: 1333.33
+        xs = 0, xl = 66.67 (corner solution where constraint 1 is binding)
+    """
+    prob = Problem("Barrier Test")
+
+    # Add variables
+    xs = prob.addVariable(lb=0, vtype=VType.CONTINUOUS, name="xs")
+    xl = prob.addVariable(lb=0, vtype=VType.CONTINUOUS, name="xl")
+
+    # Add constraints
+    prob.addConstraint(xs + 3 * xl <= 200, name="constraint1")
+    prob.addConstraint(3 * xs + 2 * xl <= 160, name="constraint2")
+
+    # Set objective: maximize 5*xs + 20 * xl
+    prob.setObjective(5 * xs + 20 * xl, sense=MAXIMIZE)
+
+    # Test 1: Default barrier settings
+    settings = SolverSettings()
+    settings.set_parameter(CUOPT_METHOD, SolverMethod.Barrier)
+    settings.set_parameter("time_limit", 10)
+
+    prob.solve(settings)
+
+    assert prob.solved
+    assert prob.Status.name == "Optimal"
+    assert prob.ObjValue == pytest.approx(1333.33, rel=0.01)
+    assert xs.Value == pytest.approx(0.0, abs=1e-4)
+    assert xl.Value == pytest.approx(66.67, rel=0.01)
+
+    # Test 2: Barrier with forced settings
+    settings_forced = SolverSettings()
+    settings_forced.set_parameter(CUOPT_METHOD, SolverMethod.Barrier)
+    settings_forced.set_parameter(CUOPT_FOLDING, 1)  # Force folding
+    settings_forced.set_parameter(CUOPT_DUALIZE, 1)  # Force dualize
+    settings_forced.set_parameter(CUOPT_ORDERING, 1)  # AMD ordering
+    settings_forced.set_parameter(CUOPT_AUGMENTED, 1)  # Augmented system
+    settings_forced.set_parameter(CUOPT_ELIMINATE_DENSE_COLUMNS, True)
+    settings_forced.set_parameter(CUOPT_CUDSS_DETERMINISTIC, True)
+    settings_forced.set_parameter("time_limit", 10)
+
+    prob.solve(settings_forced)
+
+    assert prob.solved
+    assert prob.Status.name == "Optimal"
+    assert prob.ObjValue == pytest.approx(1333.33, rel=0.01)
+
+    # Test 3: Barrier with features disabled
+    settings_disabled = SolverSettings()
+    settings_disabled.set_parameter(CUOPT_METHOD, SolverMethod.Barrier)
+    settings_disabled.set_parameter(CUOPT_FOLDING, 0)  # No folding
+    settings_disabled.set_parameter(CUOPT_DUALIZE, 0)  # No dualization
+    settings_disabled.set_parameter(CUOPT_ORDERING, 0)  # cuDSS default
+    settings_disabled.set_parameter(CUOPT_AUGMENTED, 0)  # ADAT system
+    settings_disabled.set_parameter(CUOPT_ELIMINATE_DENSE_COLUMNS, False)
+    settings_disabled.set_parameter(CUOPT_CUDSS_DETERMINISTIC, False)
+    settings_disabled.set_parameter("time_limit", 10)
+
+    prob.solve(settings_disabled)
+
+    assert prob.solved
+    assert prob.Status.name == "Optimal"
+    assert prob.ObjValue == pytest.approx(1333.33, rel=0.01)
+
+    # Test 4: Barrier with automatic settings (default -1 values)
+    settings_auto = SolverSettings()
+    settings_auto.set_parameter(CUOPT_METHOD, SolverMethod.Barrier)
+    settings_auto.set_parameter(CUOPT_FOLDING, -1)  # Automatic
+    settings_auto.set_parameter(CUOPT_DUALIZE, -1)  # Automatic
+    settings_auto.set_parameter(CUOPT_ORDERING, -1)  # Automatic
+    settings_auto.set_parameter(CUOPT_AUGMENTED, -1)  # Automatic
+    settings_auto.set_parameter("time_limit", 10)
+
+    prob.solve(settings_auto)
+
+    assert prob.solved
+    assert prob.Status.name == "Optimal"
+    assert prob.ObjValue == pytest.approx(1333.33, rel=0.01)
+
+    # Verify constraint slacks are non-negative
+    for c in prob.getConstraints():
+        # For <= constraints with optimal solution, slack should be >= 0
+        assert c.Slack >= -1e-6  # Allow small numerical tolerance
diff --git a/python/cuopt_server/cuopt_server/tests/test_lp.py b/python/cuopt_server/cuopt_server/tests/test_lp.py
index 8fc85aa3a..3356f9bfa 100644
--- a/python/cuopt_server/cuopt_server/tests/test_lp.py
+++ b/python/cuopt_server/cuopt_server/tests/test_lp.py
@@ -146,3 +146,65 @@ def test_sample_milp(
         res.json()["response"]["solver_response"],
         expected_status,
     )
+
+
+@pytest.mark.parametrize(
+    "folding, dualize, ordering, augmented, eliminate_dense, cudss_determ",
+    [
+        # Test automatic settings (default)
+        (-1, -1, -1, -1, True, False),
+        # Test folding off, no dualization, cuDSS default ordering, ADAT system
+        (0, 0, 0, 0, True, False),
+        # Test folding on, force dualization, AMD ordering, augmented system
+        (1, 1, 1, 1, True, True),
+        # Test mixed settings: automatic folding, no dualize, AMD, augmented
+        (-1, 0, 1, 1, False, False),
+        # Test no folding, automatic dualize, cuDSS default, ADAT
+        (0, -1, 0, 0, True, True),
+    ],
+)
+def test_barrier_solver_options(
+    cuoptproc,  # noqa
+    folding,
+    dualize,
+    ordering,
+    augmented,
+    eliminate_dense,
+    cudss_determ,
+):
+    """
+    Test the barrier solver (method=3) with various configuration options:
+    - folding: (-1) automatic, (0) off, (1) on
+    - dualize: (-1) automatic, (0) don't dualize, (1) force dualize
+    - ordering: (-1) automatic, (0) cuDSS default, (1) AMD
+    - augmented: (-1) automatic, (0) ADAT, (1) augmented system
+    - eliminate_dense_columns: True to eliminate, False to not
+    - cudss_deterministic: True for deterministic, False for nondeterministic
+    """
+    data = get_std_data_for_lp()
+
+    # Use barrier solver (method=3)
+    data["solver_config"]["method"] = 3
+
+    # Configure barrier solver options
+    data["solver_config"]["folding"] = folding
+    data["solver_config"]["dualize"] = dualize
+    data["solver_config"]["ordering"] = ordering
+    data["solver_config"]["augmented"] = augmented
+    data["solver_config"]["eliminate_dense_columns"] = eliminate_dense
+    data["solver_config"]["cudss_deterministic"] = cudss_determ
+
+    res = get_lp(client, data)
+
+    assert res.status_code == 200
+
+    print("\n=== Barrier Solver Test Configuration ===")
+    print(f"folding={folding}, dualize={dualize}, ordering={ordering}")
+    print(f"augmented={augmented}, eliminate_dense={eliminate_dense}")
+    print(f"cudss_deterministic={cudss_determ}")
+    print(res.json())
+
+    validate_lp_result(
+        res.json()["response"]["solver_response"],
+        LPTerminationStatus.Optimal.name,
+    )
diff --git a/python/cuopt_server/cuopt_server/utils/linear_programming/data_definition.py b/python/cuopt_server/cuopt_server/utils/linear_programming/data_definition.py
index 080b276f3..7b18f0fa2 100644
--- a/python/cuopt_server/cuopt_server/utils/linear_programming/data_definition.py
+++ b/python/cuopt_server/cuopt_server/utils/linear_programming/data_definition.py
@@ -520,6 +520,8 @@ class SolverConfig(StrictModel):
         "<br>"
         "- Dual Simplex: 2, Dual Simplex method"
         "<br>"
+        "- Barrier: 3, Barrier method"
+        "<br>"
         "Note: Not supported for MILP. ",
     )
     mip_scaling: Optional[bool] = Field(
diff --git a/python/cuopt_server/cuopt_server/utils/linear_programming/solver.py b/python/cuopt_server/cuopt_server/utils/linear_programming/solver.py
index c79aa8313..0f0ab2ba8 100644
--- a/python/cuopt_server/cuopt_server/utils/linear_programming/solver.py
+++ b/python/cuopt_server/cuopt_server/utils/linear_programming/solver.py
@@ -439,15 +439,15 @@ def is_mip(var_types):
             solver_settings.set_parameter(
                 CUOPT_LOG_FILE, solver_config.log_file
             )
-        if solver_config.augmented != "":
+        if solver_config.augmented is not None:
             solver_settings.set_parameter(
                 CUOPT_AUGMENTED, solver_config.augmented
             )
-        if solver_config.folding != "":
+        if solver_config.folding is not None:
             solver_settings.set_parameter(CUOPT_FOLDING, solver_config.folding)
-        if solver_config.dualize != "":
+        if solver_config.dualize is not None:
             solver_settings.set_parameter(CUOPT_DUALIZE, solver_config.dualize)
-        if solver_config.ordering != "":
+        if solver_config.ordering is not None:
             solver_settings.set_parameter(
                 CUOPT_ORDERING, solver_config.ordering
             )

From cf398c52f4977ae2c20b455d11542757dda9bba1 Mon Sep 17 00:00:00 2001
From: Hugo Linsenmaier <hlinsenmaier@gmail.com>
Date: Mon, 6 Oct 2025 14:18:34 -0700
Subject: [PATCH 15/40] Remove unused var

---
 cpp/src/dual_simplex/sparse_cholesky.cuh | 1 -
 1 file changed, 1 deletion(-)

diff --git a/cpp/src/dual_simplex/sparse_cholesky.cuh b/cpp/src/dual_simplex/sparse_cholesky.cuh
index 7b814efbf..37b52e754 100644
--- a/cpp/src/dual_simplex/sparse_cholesky.cuh
+++ b/cpp/src/dual_simplex/sparse_cholesky.cuh
@@ -181,7 +181,6 @@ class sparse_cholesky_cudss_t : public sparse_cholesky_base_t<i_t, f_t> {
       // 2. Partition the GPU resources
       auto total_SMs   = initial_device_GPU_resources.sm.smCount;
       auto barrier_sms = raft::alignTo(static_cast<i_t>(total_SMs * 0.75f), 8);
-      CUdevResource input;
       CUdevResource resource;
       auto cuDevSmResourceSplitByCount_func =
         cuopt::get_driver_entry_point("cuDevSmResourceSplitByCount");

From 0b738722dd15c55debcef5a7919afcc3789f3d0c Mon Sep 17 00:00:00 2001
From: Hugo Linsenmaier <hlinsenmaier@gmail.com>
Date: Mon, 6 Oct 2025 14:19:22 -0700
Subject: [PATCH 16/40] Use logger

---
 cpp/src/dual_simplex/sparse_cholesky.cuh | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/cpp/src/dual_simplex/sparse_cholesky.cuh b/cpp/src/dual_simplex/sparse_cholesky.cuh
index 37b52e754..3b54e2acb 100644
--- a/cpp/src/dual_simplex/sparse_cholesky.cuh
+++ b/cpp/src/dual_simplex/sparse_cholesky.cuh
@@ -170,7 +170,7 @@ class sparse_cholesky_cudss_t : public sparse_cholesky_base_t<i_t, f_t> {
                reinterpret_cast<decltype(::cuGetErrorString)*>(cuGetErrorString_func));
 
 #ifdef DEBUG
-      printf(
+      settings_.log.printf(
         "   Initial GPU resources retrieved via "
         "cuDeviceGetDevResource() have type "
         "%d and SM count %d\n",
@@ -192,7 +192,7 @@ class sparse_cholesky_cudss_t : public sparse_cholesky_base_t<i_t, f_t> {
           &resource, &n_groups, &initial_device_GPU_resources, nullptr, use_flags, barrier_sms),
         reinterpret_cast<decltype(::cuGetErrorString)*>(cuGetErrorString_func));
 #ifdef DEBUG
-      printf(
+      settings_.log.printf(
         "   Resources were split into %d resource groups (had "
         "requested %d) with %d SMs each (had "
         "requested % d)\n",
@@ -210,7 +210,7 @@ class sparse_cholesky_cudss_t : public sparse_cholesky_base_t<i_t, f_t> {
                  cuDevResourceGenerateDesc_func)(&resource_desc, &resource, n_resource_desc),
                reinterpret_cast<decltype(::cuGetErrorString)*>(cuGetErrorString_func));
 #ifdef DEBUG
-      printf(
+      settings_.log.printf(
         "   For the resource descriptor of barrier green context "
         "we will combine %d resources of "
         "%d SMs each\n",
@@ -491,7 +491,7 @@ class sparse_cholesky_cudss_t : public sparse_cholesky_base_t<i_t, f_t> {
 
     auto d_nnz = Arow.row_start.element(Arow.m, Arow.row_start.stream());
     if (nnz != d_nnz) {
-      printf("Error: nnz %d != A_in.col_start[A_in.n] %d\n", nnz, d_nnz);
+      settings_.log.printf("Error: nnz %d != A_in.col_start[A_in.n] %d\n", nnz, d_nnz);
       exit(1);
     }
 

From 692afabc4d86e157423fd8d26473d1dfb0acfd7c Mon Sep 17 00:00:00 2001
From: Hugo Linsenmaier <hlinsenmaier@gmail.com>
Date: Mon, 6 Oct 2025 14:21:57 -0700
Subject: [PATCH 17/40] Add detail namespace

---
 cpp/src/dual_simplex/sparse_cholesky.cuh | 24 +++++++++++++-----------
 cpp/src/utilities/driver_helpers.cuh     |  3 +++
 2 files changed, 16 insertions(+), 11 deletions(-)

diff --git a/cpp/src/dual_simplex/sparse_cholesky.cuh b/cpp/src/dual_simplex/sparse_cholesky.cuh
index 3b54e2acb..51145a36b 100644
--- a/cpp/src/dual_simplex/sparse_cholesky.cuh
+++ b/cpp/src/dual_simplex/sparse_cholesky.cuh
@@ -161,16 +161,17 @@ class sparse_cholesky_cudss_t : public sparse_cholesky_base_t<i_t, f_t> {
     status     = CUDSS_STATUS_SUCCESS;
 
     if (CUDART_VERSION >= 13000 && settings_.concurrent_halt != nullptr) {
-      cuGetErrorString_func = cuopt::get_driver_entry_point("cuGetErrorString");
+      cuGetErrorString_func = cuopt::detail::get_driver_entry_point("cuGetErrorString");
       // 1. Set up the GPU resources
       CUdevResource initial_device_GPU_resources = {};
-      auto cuDeviceGetDevResource_func = cuopt::get_driver_entry_point("cuDeviceGetDevResource");
+      auto cuDeviceGetDevResource_func =
+        cuopt::detail::get_driver_entry_point("cuDeviceGetDevResource");
       CU_CHECK(reinterpret_cast<decltype(::cuDeviceGetDevResource)*>(cuDeviceGetDevResource_func)(
                  handle_ptr_->get_device(), &initial_device_GPU_resources, CU_DEV_RESOURCE_TYPE_SM),
                reinterpret_cast<decltype(::cuGetErrorString)*>(cuGetErrorString_func));
 
 #ifdef DEBUG
-      settings_.log.printf(
+      settings.log.printf(
         "   Initial GPU resources retrieved via "
         "cuDeviceGetDevResource() have type "
         "%d and SM count %d\n",
@@ -183,7 +184,7 @@ class sparse_cholesky_cudss_t : public sparse_cholesky_base_t<i_t, f_t> {
       auto barrier_sms = raft::alignTo(static_cast<i_t>(total_SMs * 0.75f), 8);
       CUdevResource resource;
       auto cuDevSmResourceSplitByCount_func =
-        cuopt::get_driver_entry_point("cuDevSmResourceSplitByCount");
+        cuopt::detail::get_driver_entry_point("cuDevSmResourceSplitByCount");
       auto n_groups  = 1u;
       auto use_flags = CU_DEV_SM_RESOURCE_SPLIT_IGNORE_SM_COSCHEDULING;  // or 0
       CU_CHECK(
@@ -192,7 +193,7 @@ class sparse_cholesky_cudss_t : public sparse_cholesky_base_t<i_t, f_t> {
           &resource, &n_groups, &initial_device_GPU_resources, nullptr, use_flags, barrier_sms),
         reinterpret_cast<decltype(::cuGetErrorString)*>(cuGetErrorString_func));
 #ifdef DEBUG
-      settings_.log.printf(
+      settings.log.printf(
         "   Resources were split into %d resource groups (had "
         "requested %d) with %d SMs each (had "
         "requested % d)\n",
@@ -205,12 +206,12 @@ class sparse_cholesky_cudss_t : public sparse_cholesky_base_t<i_t, f_t> {
       auto constexpr const n_resource_desc = 1;
       CUdevResourceDesc resource_desc;
       auto cuDevResourceGenerateDesc_func =
-        cuopt::get_driver_entry_point("cuDevResourceGenerateDesc");
+        cuopt::detail::get_driver_entry_point("cuDevResourceGenerateDesc");
       CU_CHECK(reinterpret_cast<decltype(::cuDevResourceGenerateDesc)*>(
                  cuDevResourceGenerateDesc_func)(&resource_desc, &resource, n_resource_desc),
                reinterpret_cast<decltype(::cuGetErrorString)*>(cuGetErrorString_func));
 #ifdef DEBUG
-      settings_.log.printf(
+      settings.log.printf(
         "   For the resource descriptor of barrier green context "
         "we will combine %d resources of "
         "%d SMs each\n",
@@ -228,14 +229,15 @@ class sparse_cholesky_cudss_t : public sparse_cholesky_base_t<i_t, f_t> {
       cudaStream_t cuda_stream    = handle_ptr_->get_stream();
       cudaError_t priority_result = cudaStreamGetPriority(cuda_stream, &stream_priority);
       RAFT_CUDA_TRY(priority_result);
-      auto cuGreenCtxCreate_func = cuopt::get_driver_entry_point("cuGreenCtxCreate");
+      auto cuGreenCtxCreate_func = cuopt::detail::get_driver_entry_point("cuGreenCtxCreate");
       CU_CHECK(reinterpret_cast<decltype(::cuGreenCtxCreate)*>(cuGreenCtxCreate_func)(
                  &barrier_green_ctx,
                  resource_desc,
                  handle_ptr_->get_device(),
                  CU_GREEN_CTX_DEFAULT_STREAM),
                reinterpret_cast<decltype(::cuGetErrorString)*>(cuGetErrorString_func));
-      auto cuGreenCtxStreamCreate_func = cuopt::get_driver_entry_point("cuGreenCtxStreamCreate");
+      auto cuGreenCtxStreamCreate_func =
+        cuopt::detail::get_driver_entry_point("cuGreenCtxStreamCreate");
       CU_CHECK(reinterpret_cast<decltype(::cuGreenCtxStreamCreate)*>(cuGreenCtxStreamCreate_func)(
                  &stream, barrier_green_ctx, CU_STREAM_NON_BLOCKING, stream_priority),
                reinterpret_cast<decltype(::cuGetErrorString)*>(cuGetErrorString_func));
@@ -353,10 +355,10 @@ class sparse_cholesky_cudss_t : public sparse_cholesky_base_t<i_t, f_t> {
     CUDA_CALL_AND_CHECK_EXIT(cudaStreamSynchronize(stream), "cudaStreamSynchronize");
 #if CUDART_VERSION >= 13000
     if (settings_.concurrent_halt != nullptr) {
-      auto cuStreamDestroy_func = cuopt::get_driver_entry_point("cuStreamDestroy");
+      auto cuStreamDestroy_func = cuopt::detail::get_driver_entry_point("cuStreamDestroy");
       CU_CHECK(reinterpret_cast<decltype(::cuStreamDestroy)*>(cuStreamDestroy_func)(stream),
                reinterpret_cast<decltype(::cuGetErrorString)*>(cuGetErrorString_func));
-      auto cuGreenCtxDestroy_func = cuopt::get_driver_entry_point("cuGreenCtxDestroy");
+      auto cuGreenCtxDestroy_func = cuopt::detail::get_driver_entry_point("cuGreenCtxDestroy");
       CU_CHECK(
         reinterpret_cast<decltype(::cuGreenCtxDestroy)*>(cuGreenCtxDestroy_func)(barrier_green_ctx),
         reinterpret_cast<decltype(::cuGetErrorString)*>(cuGetErrorString_func));
diff --git a/cpp/src/utilities/driver_helpers.cuh b/cpp/src/utilities/driver_helpers.cuh
index bb6f08be5..71065b47d 100644
--- a/cpp/src/utilities/driver_helpers.cuh
+++ b/cpp/src/utilities/driver_helpers.cuh
@@ -21,6 +21,8 @@
 
 namespace cuopt {
 
+namespace detail {
+
 inline auto get_driver_entry_point(const char* name)
 {
   void* func;
@@ -32,4 +34,5 @@ inline auto get_driver_entry_point(const char* name)
   return func;
 }
 
+}  // namespace detail
 }  // namespace cuopt

From ef11801727445a6b8c22cf042d48fa63862ea578 Mon Sep 17 00:00:00 2001
From: Hugo Linsenmaier <hlinsenmaier@gmail.com>
Date: Mon, 6 Oct 2025 14:24:58 -0700
Subject: [PATCH 18/40] Remove driver dependencies

---
 conda/recipes/libcuopt/recipe.yaml | 8 --------
 1 file changed, 8 deletions(-)

diff --git a/conda/recipes/libcuopt/recipe.yaml b/conda/recipes/libcuopt/recipe.yaml
index ae9a2420f..e18cea2a6 100644
--- a/conda/recipes/libcuopt/recipe.yaml
+++ b/conda/recipes/libcuopt/recipe.yaml
@@ -73,7 +73,6 @@ cache:
       - libcurand-dev
       - libcusparse-dev
       - cuda-cudart-dev
-      - cuda-driver-dev
       - boost
       - tbb-devel
       - zlib
@@ -89,8 +88,6 @@ outputs:
           cmake --install cpp/libmps_parser/build
       dynamic_linking:
         overlinking_behavior: "error"
-        missing_dso_allowlist:
-          - libcuda.so.1
       prefix_detection:
         ignore:
           # See https://github.com/rapidsai/build-planning/issues/160
@@ -138,8 +135,6 @@ outputs:
           cmake --install cpp/build
       dynamic_linking:
         overlinking_behavior: "error"
-        missing_dso_allowlist:
-          - libcuda.so.1
       prefix_detection:
         ignore:
           # See https://github.com/rapidsai/build-planning/issues/160
@@ -156,7 +151,6 @@ outputs:
         - rapids-logger =0.1
         - librmm =${{ dep_minor_version }}
         - cuda-cudart-dev
-        - cuda-driver-dev
         - libcublas
         - libcudss-dev >=0.7
         - libcusparse-dev
@@ -198,8 +192,6 @@ outputs:
           cmake --install cpp/build --component testing
       dynamic_linking:
         overlinking_behavior: "error"
-        missing_dso_allowlist:
-          - libcuda.so.1
       string: cuda${{ cuda_major }}_${{ date_string }}_${{ head_rev }}
     requirements:
       build:

From 640f1328617c2934a7488865038e566735d7146c Mon Sep 17 00:00:00 2001
From: Ramakrishna Prabhu <ramakrishnap@nvidia.com>
Date: Mon, 6 Oct 2025 17:57:28 -0500
Subject: [PATCH 19/40] skip pytest for varrier

---
 python/cuopt/cuopt/tests/linear_programming/test_python_API.py | 1 +
 python/cuopt_server/cuopt_server/tests/test_lp.py              | 1 +
 2 files changed, 2 insertions(+)

diff --git a/python/cuopt/cuopt/tests/linear_programming/test_python_API.py b/python/cuopt/cuopt/tests/linear_programming/test_python_API.py
index 6bbd0a339..73a0c30eb 100644
--- a/python/cuopt/cuopt/tests/linear_programming/test_python_API.py
+++ b/python/cuopt/cuopt/tests/linear_programming/test_python_API.py
@@ -461,6 +461,7 @@ def test_problem_update():
     assert prob.ObjValue == pytest.approx(5)
 
 
+@pytest.mark.skip(reason="Skipping barrier solver test")
 def test_barrier_solver():
     """
     Test the barrier solver with different configurations.
diff --git a/python/cuopt_server/cuopt_server/tests/test_lp.py b/python/cuopt_server/cuopt_server/tests/test_lp.py
index 3356f9bfa..ff35cfc6f 100644
--- a/python/cuopt_server/cuopt_server/tests/test_lp.py
+++ b/python/cuopt_server/cuopt_server/tests/test_lp.py
@@ -148,6 +148,7 @@ def test_sample_milp(
     )
 
 
+@pytest.mark.skip(reason="Skipping barrier solver options test")
 @pytest.mark.parametrize(
     "folding, dualize, ordering, augmented, eliminate_dense, cudss_determ",
     [

From fb31450306c94167991c5c515e8bb1dce3275f06 Mon Sep 17 00:00:00 2001
From: Ramakrishnap <42624703+rgsl888prabhu@users.noreply.github.com>
Date: Tue, 7 Oct 2025 12:08:32 -0500
Subject: [PATCH 20/40] Update device_sparse_matrix.cuh

---
 cpp/src/dual_simplex/device_sparse_matrix.cuh | 2 --
 1 file changed, 2 deletions(-)

diff --git a/cpp/src/dual_simplex/device_sparse_matrix.cuh b/cpp/src/dual_simplex/device_sparse_matrix.cuh
index 715123b57..8113db973 100644
--- a/cpp/src/dual_simplex/device_sparse_matrix.cuh
+++ b/cpp/src/dual_simplex/device_sparse_matrix.cuh
@@ -186,9 +186,7 @@ class device_csc_matrix_t {
 
     // Inclusive cumulative sum to have the corresponding column for each entry
     rmm::device_buffer d_temp_storage;
-
     size_t temp_storage_bytes{0};
-
     cub::DeviceScan::InclusiveSum(
       nullptr, temp_storage_bytes, col_index.data(), col_index.data(), col_index.size(), stream);
     d_temp_storage.resize(temp_storage_bytes, stream);

From f143904d44d9a43c3cbb584e6fb2138f35b595d1 Mon Sep 17 00:00:00 2001
From: Ramakrishnap <42624703+rgsl888prabhu@users.noreply.github.com>
Date: Tue, 7 Oct 2025 12:08:57 -0500
Subject: [PATCH 21/40] Update test_python_API.py

---
 python/cuopt/cuopt/tests/linear_programming/test_python_API.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/cuopt/cuopt/tests/linear_programming/test_python_API.py b/python/cuopt/cuopt/tests/linear_programming/test_python_API.py
index 73a0c30eb..9a6653d1a 100644
--- a/python/cuopt/cuopt/tests/linear_programming/test_python_API.py
+++ b/python/cuopt/cuopt/tests/linear_programming/test_python_API.py
@@ -461,7 +461,7 @@ def test_problem_update():
     assert prob.ObjValue == pytest.approx(5)
 
 
-@pytest.mark.skip(reason="Skipping barrier solver test")
+# @pytest.mark.skip(reason="Skipping barrier solver test")
 def test_barrier_solver():
     """
     Test the barrier solver with different configurations.

From d715f3a8d05f4bc651054a9487c0ec613a6cfb79 Mon Sep 17 00:00:00 2001
From: Ramakrishnap <42624703+rgsl888prabhu@users.noreply.github.com>
Date: Tue, 7 Oct 2025 12:09:14 -0500
Subject: [PATCH 22/40] Update test_lp.py

---
 python/cuopt_server/cuopt_server/tests/test_lp.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/cuopt_server/cuopt_server/tests/test_lp.py b/python/cuopt_server/cuopt_server/tests/test_lp.py
index ff35cfc6f..c46a44b37 100644
--- a/python/cuopt_server/cuopt_server/tests/test_lp.py
+++ b/python/cuopt_server/cuopt_server/tests/test_lp.py
@@ -148,7 +148,7 @@ def test_sample_milp(
     )
 
 
-@pytest.mark.skip(reason="Skipping barrier solver options test")
+# @pytest.mark.skip(reason="Skipping barrier solver options test")
 @pytest.mark.parametrize(
     "folding, dualize, ordering, augmented, eliminate_dense, cudss_determ",
     [

From 5b66f3ddebf95f1b2fcf2d34ca0d4706c0cdcf74 Mon Sep 17 00:00:00 2001
From: Ramakrishnap <42624703+rgsl888prabhu@users.noreply.github.com>
Date: Tue, 7 Oct 2025 12:10:14 -0500
Subject: [PATCH 23/40] Update data_model_wrapper.pyx

---
 .../linear_programming/data_model/data_model_wrapper.pyx  | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/python/cuopt/cuopt/linear_programming/data_model/data_model_wrapper.pyx b/python/cuopt/cuopt/linear_programming/data_model/data_model_wrapper.pyx
index 50641d331..2c196751f 100644
--- a/python/cuopt/cuopt/linear_programming/data_model/data_model_wrapper.pyx
+++ b/python/cuopt/cuopt/linear_programming/data_model/data_model_wrapper.pyx
@@ -25,8 +25,6 @@ import warnings
 
 import numpy as np
 
-import cudf
-
 from libc.stdint cimport uintptr_t
 from libcpp.memory cimport unique_ptr
 from libcpp.utility cimport move
@@ -46,13 +44,11 @@ def type_cast(np_obj, np_type, name):
 
 
 def get_data_ptr(array):
-    if isinstance(array, cudf.Series):
-        return array.__cuda_array_interface__['data'][0]
-    elif isinstance(array, np.ndarray):
+    if isinstance(array, np.ndarray):
         return array.__array_interface__['data'][0]
     else:
         raise Exception(
-            "get_data_ptr must be called with cudf.Series or np.ndarray"
+            "get_data_ptr must be called with np.ndarray"
         )
 
 

From f72ff01c51c6bea144ffd1c088efd7a77cd41e08 Mon Sep 17 00:00:00 2001
From: Ramakrishna Prabhu <ramakrishnap@nvidia.com>
Date: Tue, 7 Oct 2025 12:29:11 -0500
Subject: [PATCH 24/40] fix

---
 .../linear_programming/data_model/data_model_wrapper.pyx  | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/python/cuopt/cuopt/linear_programming/data_model/data_model_wrapper.pyx b/python/cuopt/cuopt/linear_programming/data_model/data_model_wrapper.pyx
index 50641d331..2c196751f 100644
--- a/python/cuopt/cuopt/linear_programming/data_model/data_model_wrapper.pyx
+++ b/python/cuopt/cuopt/linear_programming/data_model/data_model_wrapper.pyx
@@ -25,8 +25,6 @@ import warnings
 
 import numpy as np
 
-import cudf
-
 from libc.stdint cimport uintptr_t
 from libcpp.memory cimport unique_ptr
 from libcpp.utility cimport move
@@ -46,13 +44,11 @@ def type_cast(np_obj, np_type, name):
 
 
 def get_data_ptr(array):
-    if isinstance(array, cudf.Series):
-        return array.__cuda_array_interface__['data'][0]
-    elif isinstance(array, np.ndarray):
+    if isinstance(array, np.ndarray):
         return array.__array_interface__['data'][0]
     else:
         raise Exception(
-            "get_data_ptr must be called with cudf.Series or np.ndarray"
+            "get_data_ptr must be called with np.ndarray"
         )
 
 

From 0d1a2aef9fdb42c2a7a7c389a397e0208b0e63c1 Mon Sep 17 00:00:00 2001
From: Christopher Maes <cmaes@nvidia.com>
Date: Tue, 7 Oct 2025 14:23:05 -0700
Subject: [PATCH 25/40] Fix post-solve crash on physicansched6-2

---
 cpp/src/dual_simplex/presolve.cpp | 101 +++++++++++++++++-------------
 cpp/src/dual_simplex/presolve.hpp |   1 +
 cpp/src/dual_simplex/solve.cpp    |   2 +
 3 files changed, 59 insertions(+), 45 deletions(-)

diff --git a/cpp/src/dual_simplex/presolve.cpp b/cpp/src/dual_simplex/presolve.cpp
index b9351a9a7..035870726 100644
--- a/cpp/src/dual_simplex/presolve.cpp
+++ b/cpp/src/dual_simplex/presolve.cpp
@@ -1411,6 +1411,7 @@ void uncrush_dual_solution(const user_problem_t<i_t, f_t>& user_problem,
 
 template <typename i_t, typename f_t>
 void uncrush_solution(const presolve_info_t<i_t, f_t>& presolve_info,
+                      const simplex_solver_settings_t<i_t, f_t>& settings,
                       const std::vector<f_t>& crushed_x,
                       const std::vector<f_t>& crushed_y,
                       const std::vector<f_t>& crushed_z,
@@ -1452,15 +1453,15 @@ void uncrush_solution(const presolve_info_t<i_t, f_t>& presolve_info,
     matrix_transpose_vector_multiply(presolve_info.folding_info.C_s, 1.0, crushed_y, 0.0, ytilde);
     matrix_transpose_vector_multiply(presolve_info.folding_info.D_s, 1.0, crushed_z, 0.0, ztilde);
 
-    printf("|| y ||_2 = %e\n", vector_norm2<i_t, f_t>(ytilde));
-    printf("|| z ||_2 = %e\n", vector_norm2<i_t, f_t>(ztilde));
+    settings.log.debug("|| y ||_2 = %e\n", vector_norm2<i_t, f_t>(ytilde));
+    settings.log.debug("|| z ||_2 = %e\n", vector_norm2<i_t, f_t>(ztilde));
     std::vector<f_t> dual_residual(previous_cols);
     for (i_t j = 0; j < previous_cols; j++) {
       dual_residual[j] = ztilde[j] - presolve_info.folding_info.c_tilde[j];
     }
     matrix_transpose_vector_multiply(
       presolve_info.folding_info.A_tilde, 1.0, ytilde, 1.0, dual_residual);
-    printf("Unfolded dual residual = %e\n", vector_norm_inf<i_t, f_t>(dual_residual));
+    settings.log.printf("Unfolded dual residual = %e\n", vector_norm_inf<i_t, f_t>(dual_residual));
 
     // Now we need to map the solution back to the original problem
     // minimize c^T x
@@ -1475,74 +1476,83 @@ void uncrush_solution(const presolve_info_t<i_t, f_t>& presolve_info,
     input_z.resize(previous_cols - presolve_info.folding_info.num_upper_bounds);
   }
 
-  if (presolve_info.removed_constraints.size() == 0) {
-    uncrushed_y = input_y;
-  } else {
-    printf("Handling removed constraints %d\n", presolve_info.removed_constraints.size());
-    // We removed some constraints, so we need to map the crushed solution back to the original
-    // constraints
-    const i_t m =
-      presolve_info.removed_constraints.size() + presolve_info.remaining_constraints.size();
-    uncrushed_y.resize(m);
-
-    i_t k = 0;
-    for (const i_t i : presolve_info.remaining_constraints) {
-      uncrushed_y[i] = input_y[k];
-      k++;
-    }
-    for (const i_t i : presolve_info.removed_constraints) {
-      uncrushed_y[i] = 0.0;
+  const i_t num_free_variables = presolve_info.free_variable_pairs.size() / 2;
+  if (num_free_variables > 0) {
+    settings.log.printf("Post-solve: Handling free variables %d\n", num_free_variables);
+    // We added free variables so we need to map the crushed solution back to the original variables
+    for (i_t k = 0; k < 2 * num_free_variables; k += 2) {
+      const i_t u = presolve_info.free_variable_pairs[k];
+      const i_t v = presolve_info.free_variable_pairs[k + 1];
+      input_x[u] -= input_x[v];
     }
+    input_z.resize(input_z.size() - num_free_variables);
+    input_x.resize(input_x.size() - num_free_variables);
   }
 
-  if (presolve_info.removed_variables.size() == 0) {
-    uncrushed_x = input_x;
-    uncrushed_z = input_z;
-  } else {
-    printf("Handling removed variables %d\n", presolve_info.removed_variables.size());
+  if (presolve_info.removed_variables.size() > 0) {
+    settings.log.printf("Post-solve: Handling removed variables %d\n", presolve_info.removed_variables.size());
     // We removed some variables, so we need to map the crushed solution back to the original
     // variables
     const i_t n = presolve_info.removed_variables.size() + presolve_info.remaining_variables.size();
-    uncrushed_x.resize(n);
-    uncrushed_z.resize(n);
+    std::vector<f_t> input_x_copy = input_x;
+    std::vector<f_t> input_z_copy = input_z;
+    input_x_copy.resize(n);
+    input_z_copy.resize(n);
 
     i_t k = 0;
     for (const i_t j : presolve_info.remaining_variables) {
-      uncrushed_x[j] = input_x[k];
-      uncrushed_z[j] = input_z[k];
+      input_x_copy[j] = input_x[k];
+      input_z_copy[j] = input_z[k];
       k++;
     }
 
     k = 0;
     for (const i_t j : presolve_info.removed_variables) {
-      uncrushed_x[j] = presolve_info.removed_values[k];
-      uncrushed_z[j] = presolve_info.removed_reduced_costs[k];
+      input_x_copy[j] = presolve_info.removed_values[k];
+      input_z_copy[j] = presolve_info.removed_reduced_costs[k];
       k++;
     }
+    input_x = input_x_copy;
+    input_z = input_z_copy;
   }
 
-  const i_t num_free_variables = presolve_info.free_variable_pairs.size() / 2;
-  if (num_free_variables > 0) {
-    printf("Handling free variables %d\n", num_free_variables);
-    // We added free variables so we need to map the crushed solution back to the original variables
-    for (i_t k = 0; k < 2 * num_free_variables; k += 2) {
-      const i_t u = presolve_info.free_variable_pairs[k];
-      const i_t v = presolve_info.free_variable_pairs[k + 1];
-      uncrushed_x[u] -= uncrushed_x[v];
+  if (presolve_info.removed_constraints.size() > 0) {
+    settings.log.printf("Post-solve: Handling removed constraints %d\n", presolve_info.removed_constraints.size());
+    // We removed some constraints, so we need to map the crushed solution back to the original
+    // constraints
+    const i_t m =
+      presolve_info.removed_constraints.size() + presolve_info.remaining_constraints.size();
+    std::vector<f_t> input_y_copy = input_y;
+    input_y_copy.resize(m);
+
+    i_t k = 0;
+    for (const i_t i : presolve_info.remaining_constraints) {
+      input_y_copy[i] = input_y[k];
+      k++;
     }
-    const i_t n = uncrushed_x.size();
-    uncrushed_x.resize(n - num_free_variables);
-    uncrushed_z.resize(n - num_free_variables);
+    for (const i_t i : presolve_info.removed_constraints) {
+      input_y_copy[i] = 0.0;
+    }
+    input_y = input_y_copy;
   }
 
+
+
   if (presolve_info.removed_lower_bounds.size() > 0) {
-    printf("Handling removed lower bounds %d\n", presolve_info.removed_lower_bounds.size());
+    settings.log.printf("Post-solve: Handling removed lower bounds %d\n", presolve_info.removed_lower_bounds.size());
     // We removed some lower bounds so we need to map the crushed solution back to the original
     // variables
-    for (i_t j = 0; j < uncrushed_x.size(); j++) {
-      uncrushed_x[j] += presolve_info.removed_lower_bounds[j];
+    for (i_t j = 0; j < input_x.size(); j++) {
+      input_x[j] += presolve_info.removed_lower_bounds[j];
     }
   }
+  assert(uncrushed_x.size() == input_x.size());
+  assert(uncrushed_y.size() == input_y.size());
+  assert(uncrushed_z.size() == input_z.size());
+
+  uncrushed_x = input_x;
+  uncrushed_y = input_y;
+  uncrushed_z = input_z;
 }
 
 #ifdef DUAL_SIMPLEX_INSTANTIATE_DOUBLE
@@ -1585,6 +1595,7 @@ template void uncrush_dual_solution<int, double>(const user_problem_t<int, doubl
                                                  std::vector<double>& user_z);
 
 template void uncrush_solution<int, double>(const presolve_info_t<int, double>& presolve_info,
+                                            const simplex_solver_settings_t<int, double>& settings,
                                             const std::vector<double>& crushed_x,
                                             const std::vector<double>& crushed_y,
                                             const std::vector<double>& crushed_z,
diff --git a/cpp/src/dual_simplex/presolve.hpp b/cpp/src/dual_simplex/presolve.hpp
index cdf0aaac0..fa8a8db58 100644
--- a/cpp/src/dual_simplex/presolve.hpp
+++ b/cpp/src/dual_simplex/presolve.hpp
@@ -180,6 +180,7 @@ void uncrush_dual_solution(const user_problem_t<i_t, f_t>& user_problem,
 
 template <typename i_t, typename f_t>
 void uncrush_solution(const presolve_info_t<i_t, f_t>& presolve_info,
+                      const simplex_solver_settings_t<i_t, f_t>& settings,
                       const std::vector<f_t>& crushed_x,
                       const std::vector<f_t>& crushed_y,
                       const std::vector<f_t>& crushed_z,
diff --git a/cpp/src/dual_simplex/solve.cpp b/cpp/src/dual_simplex/solve.cpp
index 8584e3d80..dadb60cc6 100644
--- a/cpp/src/dual_simplex/solve.cpp
+++ b/cpp/src/dual_simplex/solve.cpp
@@ -208,6 +208,7 @@ lp_status_t solve_linear_program_advanced(const lp_problem_t<i_t, f_t>& original
       std::vector<f_t> unscaled_z(lp.num_cols);
       unscale_solution<i_t, f_t>(column_scales, solution.x, solution.z, unscaled_x, unscaled_z);
       uncrush_solution(presolve_info,
+                       settings,
                        unscaled_x,
                        solution.y,
                        unscaled_z,
@@ -324,6 +325,7 @@ lp_status_t solve_linear_program_with_barrier(const user_problem_t<i_t, f_t>& us
 
     // Undo presolve
     uncrush_solution(presolve_info,
+                     barrier_settings,
                      unscaled_x,
                      barrier_solution.y,
                      unscaled_z,

From eb140d1ad09b2687bc30a5a0245d9790a2aed2e5 Mon Sep 17 00:00:00 2001
From: Christopher Maes <cmaes@nvidia.com>
Date: Tue, 7 Oct 2025 14:24:36 -0700
Subject: [PATCH 26/40] Fix style

---
 cpp/src/dual_simplex/presolve.cpp | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/cpp/src/dual_simplex/presolve.cpp b/cpp/src/dual_simplex/presolve.cpp
index 035870726..e2790858e 100644
--- a/cpp/src/dual_simplex/presolve.cpp
+++ b/cpp/src/dual_simplex/presolve.cpp
@@ -1490,7 +1490,8 @@ void uncrush_solution(const presolve_info_t<i_t, f_t>& presolve_info,
   }
 
   if (presolve_info.removed_variables.size() > 0) {
-    settings.log.printf("Post-solve: Handling removed variables %d\n", presolve_info.removed_variables.size());
+    settings.log.printf("Post-solve: Handling removed variables %d\n",
+                        presolve_info.removed_variables.size());
     // We removed some variables, so we need to map the crushed solution back to the original
     // variables
     const i_t n = presolve_info.removed_variables.size() + presolve_info.remaining_variables.size();
@@ -1517,7 +1518,8 @@ void uncrush_solution(const presolve_info_t<i_t, f_t>& presolve_info,
   }
 
   if (presolve_info.removed_constraints.size() > 0) {
-    settings.log.printf("Post-solve: Handling removed constraints %d\n", presolve_info.removed_constraints.size());
+    settings.log.printf("Post-solve: Handling removed constraints %d\n",
+                        presolve_info.removed_constraints.size());
     // We removed some constraints, so we need to map the crushed solution back to the original
     // constraints
     const i_t m =
@@ -1536,10 +1538,9 @@ void uncrush_solution(const presolve_info_t<i_t, f_t>& presolve_info,
     input_y = input_y_copy;
   }
 
-
-
   if (presolve_info.removed_lower_bounds.size() > 0) {
-    settings.log.printf("Post-solve: Handling removed lower bounds %d\n", presolve_info.removed_lower_bounds.size());
+    settings.log.printf("Post-solve: Handling removed lower bounds %d\n",
+                        presolve_info.removed_lower_bounds.size());
     // We removed some lower bounds so we need to map the crushed solution back to the original
     // variables
     for (i_t j = 0; j < input_x.size(); j++) {

From 08cbac2da1ce44cdaf8151137f52c6b97335f3f2 Mon Sep 17 00:00:00 2001
From: Christopher Maes <cmaes@nvidia.com>
Date: Tue, 7 Oct 2025 17:13:12 -0700
Subject: [PATCH 27/40] Fix UMA on problems where row nz greater than number of
 rows

---
 cpp/src/dual_simplex/barrier.cu | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cpp/src/dual_simplex/barrier.cu b/cpp/src/dual_simplex/barrier.cu
index 62ccde07e..95ce9fce6 100644
--- a/cpp/src/dual_simplex/barrier.cu
+++ b/cpp/src/dual_simplex/barrier.cu
@@ -1028,7 +1028,7 @@ class iteration_data_t {
       }
     }
 
-    std::vector<i_t> histogram_row(m, 0);
+    std::vector<i_t> histogram_row(n, 0);
     max_row_nz = 0;
     for (i_t k = 0; k < m; k++) {
       histogram_row[row_nz[k]]++;

From e2449a9a68ce76594bdb4c7a5d5fcf6c861026c3 Mon Sep 17 00:00:00 2001
From: Christopher Maes <cmaes@nvidia.com>
Date: Tue, 7 Oct 2025 17:45:01 -0700
Subject: [PATCH 28/40] Fix undefined reference due to realloc of colors from
 emplace_back

---
 cpp/src/dual_simplex/folding.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/cpp/src/dual_simplex/folding.cpp b/cpp/src/dual_simplex/folding.cpp
index fe202647b..913d86b0a 100644
--- a/cpp/src/dual_simplex/folding.cpp
+++ b/cpp/src/dual_simplex/folding.cpp
@@ -521,7 +521,7 @@ i_t color_graph(const csc_matrix_t<i_t, f_t>& A,
       // See if we need to split the column colors
       for (i_t color : colors_to_split) {
         split_colors(color,
-                     refining_color.color,
+                     colors[refining_color_index].color,
                      kCol,
                      vertex_to_sum,
                      color_sums,
@@ -543,7 +543,7 @@ i_t color_graph(const csc_matrix_t<i_t, f_t>& A,
       // See if we need to split the row colors
       for (i_t color : colors_to_split) {
         split_colors(color,
-                     refining_color.color,
+                     colors[refining_color_index].color,
                      kRow,
                      vertex_to_sum,
                      color_sums,

From 98cc2ac001ea64a2ca37cf4566c3575c19af03e5 Mon Sep 17 00:00:00 2001
From: Ramakrishna Prabhu <ramakrishnap@nvidia.com>
Date: Wed, 8 Oct 2025 12:37:34 -0500
Subject: [PATCH 29/40] testing

---
 .github/workflows/pr.yaml                     | 161 +-----------------
 ci/test_wheel_cuopt.sh                        |   4 +-
 .../linear_programming/test_python_API.py     |  14 ++
 3 files changed, 22 insertions(+), 157 deletions(-)

diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml
index 9517f9236..bd1582334 100644
--- a/.github/workflows/pr.yaml
+++ b/.github/workflows/pr.yaml
@@ -27,137 +27,13 @@ concurrency:
 jobs:
   pr-builder:
     needs:
-      - changed-files
-      - checks
-      - conda-cpp-build
-      - conda-cpp-tests
-      - conda-python-build
-      - conda-python-tests
-      - docs-build
       - wheel-build-libcuopt
       - wheel-build-cuopt
       - wheel-tests-cuopt
-      - wheel-build-cuopt-server
-      - wheel-tests-cuopt-server
       - wheel-build-cuopt-mps-parser
       - wheel-build-cuopt-sh-client
-      - test-self-hosted-server
     secrets: inherit
     uses: rapidsai/shared-workflows/.github/workflows/pr-builder.yaml@branch-25.10
-  changed-files:
-    secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/changed-files.yaml@branch-25.10
-    with:
-      files_yaml: |
-        test_cpp:
-          - '**'
-          - '!CONTRIBUTING.md'
-          - '!README.md'
-          - '!ci/release/update-version-cuopt.sh'
-          - '!ci/release/update-version-rapids.sh'
-          - '!docs/**'
-          - '!img/**'
-          - '!notebooks/**'
-          - '!python/**'
-          - '!readme_pages/**'
-          - '!container-builder/**'
-          - '!helm-chart/**'
-          - '!ngc/**'
-          - '!omniverse/**'
-          - '!regression/**'
-          - '!resources/**'
-          - '!ucf/**'
-          - '!utilities/**'
-        test_notebooks:
-          - '**'
-          - '!CONTRIBUTING.md'
-          - '!README.md'
-          - '!ci/release/update-version-cuopt.sh'
-          - '!ci/release/update-version-rapids.sh'
-          - '!docs/**'
-          - '!python/nvcf_client/**'
-        test_python:
-          - '**'
-          - '!CONTRIBUTING.md'
-          - '!README.md'
-          - '!ci/release/update-version-cuopt.sh'
-          - '!ci/release/update-version-rapids.sh'
-          - '!docs/**'
-          - '!img/**'
-          - '!notebooks/**'
-          - '!python/nvcf_client/**'
-        test_python_cuopt:
-          - '**'
-          - '!CONTRIBUTING.md'
-          - '!README.md'
-          - '!ci/release/update-version-cuopt.sh'
-          - '!ci/release/update-version-rapids.sh'
-          - '!docs/**'
-          - '!img/**'
-          - '!notebooks/**'
-          - '!python/cuopt_self_hosted/**'
-          - '!python/cuopt_server/**'
-          - '!python/nvcf_client/**'
-        test_python_cuopt_server:
-          - '**'
-          - '!CONTRIBUTING.md'
-          - '!README.md'
-          - '!ci/release/update-version-cuopt.sh'
-          - '!ci/release/update-version-rapids.sh'
-          - '!docs/**'
-          - '!img/**'
-          - '!notebooks/**'
-          - '!python/cuopt_self_hosted/**'
-          - '!python/nvcf_client/**'
-  checks:
-    secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/checks.yaml@branch-25.10
-    with:
-      enable_check_generated_files: false
-
-  conda-cpp-build:
-    needs: checks
-    secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-build.yaml@branch-25.10
-    with:
-      build_type: pull-request
-      script: ci/build_cpp.sh
-  conda-cpp-tests:
-    needs: [conda-cpp-build, changed-files]
-    secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-tests.yaml@branch-25.10
-    #if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_cpp
-    with:
-      build_type: pull-request
-      script: ci/test_cpp.sh
-  conda-python-build:
-    needs: conda-cpp-build
-    secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/conda-python-build.yaml@branch-25.10
-    with:
-      build_type: pull-request
-      script: ci/build_python.sh
-  conda-python-tests:
-    needs: [conda-python-build, changed-files]
-    secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@branch-25.10
-    #if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_python
-    with:
-      run_codecov: false
-      build_type: pull-request
-      script: ci/test_python.sh
-  docs-build:
-    needs: conda-python-build
-    secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-25.10
-    with:
-      build_type: pull-request
-      node_type: "gpu-l4-latest-1"
-      arch: "amd64"
-      file_to_upload: "docs/cuopt/build/html/"
-      artifact-name: "cuopt_docs"
-      container_image: "rapidsai/ci-conda:25.10-latest"
-      script: "ci/build_docs.sh"
   wheel-build-cuopt-mps-parser:
     secrets: inherit
     uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-25.10
@@ -174,8 +50,8 @@ jobs:
     secrets: inherit
     uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-25.10
     with:
-      # build for every combination of arch and CUDA version, but only for the latest Python
-      matrix_filter: group_by([.ARCH, (.CUDA_VER|split(".")|map(tonumber)|.[0])]) | map(max_by(.PY_VER|split(".")|map(tonumber)))
+      # Build only for amd64 and CUDA 12.9.1
+      matrix_filter: 'map(select(.ARCH == "amd64" and .CUDA_VER == "12.9.1"))'
       package-type: cpp
       package-name: libcuopt
       build_type: pull-request
@@ -189,26 +65,15 @@ jobs:
       script: ci/build_wheel_cuopt.sh
       package-name: cuopt
       package-type: python
+      # Build only for amd64 and CUDA 12.9.1
+      matrix_filter: 'map(select(.ARCH == "amd64" and .CUDA_VER == "12.9.1" and .PY_VER == "3.10"))'
   wheel-tests-cuopt:
-    needs: [wheel-build-cuopt, wheel-build-cuopt-mps-parser, wheel-build-cuopt-sh-client, changed-files]
+    needs: [wheel-build-cuopt, wheel-build-cuopt-mps-parser, wheel-build-cuopt-sh-client]
     secrets: inherit
     uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-25.10
-    #if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_python_cuopt
     with:
       build_type: pull-request
       script: ci/test_wheel_cuopt.sh
-  wheel-build-cuopt-server:
-    needs: checks
-    secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-25.10
-    with:
-      build_type: pull-request
-      script: ci/build_wheel_cuopt_server.sh
-      package-name: cuopt_server
-      package-type: python
-      pure-wheel: true
-      # Only need 1 package per CUDA major version. This selects "ARCH=amd64 + the latest supported Python, 1 job per major CUDA version".
-      matrix_filter: map(select(.ARCH == "amd64")) | group_by(.CUDA_VER|split(".")|map(tonumber)|.[0]) | map(max_by([(.PY_VER|split(".")|map(tonumber)), (.CUDA_VER|split(".")|map(tonumber))]))
   wheel-build-cuopt-sh-client:
     secrets: inherit
     uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-25.10
@@ -221,19 +86,3 @@ jobs:
       pure-wheel: true
       # only need 1 build (noarch package): this selects amd64, oldest-supported Python, latest-supported CUDA
       matrix_filter: '[map(select(.ARCH == "amd64")) | min_by((.PY_VER | split(".") | map(tonumber)), (.CUDA_VER | split(".") | map(-tonumber)))]'
-  wheel-tests-cuopt-server:
-    needs: [wheel-build-cuopt, wheel-build-cuopt-server, changed-files]
-    secrets: inherit
-    uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-25.10
-    #if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_python_cuopt_server
-    with:
-      build_type: pull-request
-      script: ci/test_wheel_cuopt_server.sh
-  test-self-hosted-server:
-    needs: [wheel-build-cuopt, wheel-build-cuopt-server, changed-files]
-    secrets: inherit
-    uses: ./.github/workflows/self_hosted_service_test.yaml
-    #if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_python
-    with:
-      build_type: pull-request
-      script: ci/test_self_hosted_service.sh
diff --git a/ci/test_wheel_cuopt.sh b/ci/test_wheel_cuopt.sh
index 1b37ed020..2d22f741c 100755
--- a/ci/test_wheel_cuopt.sh
+++ b/ci/test_wheel_cuopt.sh
@@ -68,9 +68,11 @@ export RAPIDS_DATASET_ROOT_DIR
 
 # Please enable this once ISSUE https://github.com/NVIDIA/cuopt/issues/94 is fixed
 # Run CLI tests
-timeout 10m bash ./python/libcuopt/libcuopt/tests/test_cli.sh
+# timeout 10m bash ./python/libcuopt/libcuopt/tests/test_cli.sh
 
 # Run Python tests
+export UCX_LOG_LEVEL=debug
+RAPIDS_DATASET_ROOT_DIR=./datasets timeout 30m python -m pytest --verbose --capture=no ./python/cuopt/cuopt/tests/linear_programming/test_python_API.py::test_barrier_solver
 RAPIDS_DATASET_ROOT_DIR=./datasets timeout 30m python -m pytest --verbose --capture=no ./python/cuopt/cuopt/tests/
 
 # run jump tests and cvxpy integration tests for only nightly builds
diff --git a/python/cuopt/cuopt/tests/linear_programming/test_python_API.py b/python/cuopt/cuopt/tests/linear_programming/test_python_API.py
index 9a6653d1a..47b349ef7 100644
--- a/python/cuopt/cuopt/tests/linear_programming/test_python_API.py
+++ b/python/cuopt/cuopt/tests/linear_programming/test_python_API.py
@@ -494,7 +494,9 @@ def test_barrier_solver():
     settings.set_parameter(CUOPT_METHOD, SolverMethod.Barrier)
     settings.set_parameter("time_limit", 10)
 
+    print("Solving with default settings\n")
     prob.solve(settings)
+    print("Solved with default settings\n")
 
     assert prob.solved
     assert prob.Status.name == "Optimal"
@@ -513,8 +515,12 @@ def test_barrier_solver():
     settings_forced.set_parameter(CUOPT_CUDSS_DETERMINISTIC, True)
     settings_forced.set_parameter("time_limit", 10)
 
+    print("Solving with forced settings\n")
+
     prob.solve(settings_forced)
 
+    print("Solved with forced settings\n")
+
     assert prob.solved
     assert prob.Status.name == "Optimal"
     assert prob.ObjValue == pytest.approx(1333.33, rel=0.01)
@@ -530,8 +536,12 @@ def test_barrier_solver():
     settings_disabled.set_parameter(CUOPT_CUDSS_DETERMINISTIC, False)
     settings_disabled.set_parameter("time_limit", 10)
 
+    print("Solving with disabled settings\n")
+
     prob.solve(settings_disabled)
 
+    print("Solved with disabled settings\n")
+
     assert prob.solved
     assert prob.Status.name == "Optimal"
     assert prob.ObjValue == pytest.approx(1333.33, rel=0.01)
@@ -545,7 +555,11 @@ def test_barrier_solver():
     settings_auto.set_parameter(CUOPT_AUGMENTED, -1)  # Automatic
     settings_auto.set_parameter("time_limit", 10)
 
+    print("Solving with automatic settings\n")
+
     prob.solve(settings_auto)
+    
+    print("Solved with automatic settings\n")
 
     assert prob.solved
     assert prob.Status.name == "Optimal"

From 373a59bbd219ba4ce2e04f9a24b948d71c7ec2bc Mon Sep 17 00:00:00 2001
From: Ramakrishna Prabhu <ramakrishnap@nvidia.com>
Date: Wed, 8 Oct 2025 12:44:54 -0500
Subject: [PATCH 30/40] Add 3.10 filter

---
 .github/workflows/pr.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml
index bd1582334..3ff1fa17e 100644
--- a/.github/workflows/pr.yaml
+++ b/.github/workflows/pr.yaml
@@ -51,7 +51,7 @@ jobs:
     uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-25.10
     with:
       # Build only for amd64 and CUDA 12.9.1
-      matrix_filter: 'map(select(.ARCH == "amd64" and .CUDA_VER == "12.9.1"))'
+      matrix_filter: 'map(select(.ARCH == "amd64" and .CUDA_VER == "12.9.1" and .PY_VER == "3.10"))'
       package-type: cpp
       package-name: libcuopt
       build_type: pull-request

From ffca33fc10dd943cbc66ee72b44d9bb9c0dbf4dc Mon Sep 17 00:00:00 2001
From: Ramakrishna Prabhu <ramakrishnap@nvidia.com>
Date: Wed, 8 Oct 2025 14:29:32 -0500
Subject: [PATCH 31/40] test

---
 .github/workflows/pr.yaml                     |   1 +
 ci/test_wheel_cuopt.sh                        |  16 +-
 .../linear_programming/test_python_API.py     | 327 ++++++++++++++++++
 3 files changed, 342 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml
index 3ff1fa17e..447dcbf30 100644
--- a/.github/workflows/pr.yaml
+++ b/.github/workflows/pr.yaml
@@ -74,6 +74,7 @@ jobs:
     with:
       build_type: pull-request
       script: ci/test_wheel_cuopt.sh
+      matrix_filter: 'map(select(.ARCH == "amd64" and .CUDA_VER == "12.9.1" and .PY_VER == "3.10"))'
   wheel-build-cuopt-sh-client:
     secrets: inherit
     uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-25.10
diff --git a/ci/test_wheel_cuopt.sh b/ci/test_wheel_cuopt.sh
index 2d22f741c..135daafa8 100755
--- a/ci/test_wheel_cuopt.sh
+++ b/ci/test_wheel_cuopt.sh
@@ -71,9 +71,21 @@ export RAPIDS_DATASET_ROOT_DIR
 # timeout 10m bash ./python/libcuopt/libcuopt/tests/test_cli.sh
 
 # Run Python tests
+# Set environment variables to handle OpenMP and UCX threading issues
+# Disable UCX debug logging to avoid interference with cleanup
 export UCX_LOG_LEVEL=debug
-RAPIDS_DATASET_ROOT_DIR=./datasets timeout 30m python -m pytest --verbose --capture=no ./python/cuopt/cuopt/tests/linear_programming/test_python_API.py::test_barrier_solver
-RAPIDS_DATASET_ROOT_DIR=./datasets timeout 30m python -m pytest --verbose --capture=no ./python/cuopt/cuopt/tests/
+# Set UCX to handle signals gracefully and avoid spinlock issues
+#export UCX_HANDLE_ERRORS=bt
+#export UCX_ERROR_SIGNALS="SIGSEGV,SIGBUS,SIGFPE"
+# Ensure proper OpenMP threading behavior
+#export OMP_NUM_THREADS=1
+#export OMP_WAIT_POLICY=passive
+# Disable CUDA launch blocking to allow proper async cleanup
+#export CUDA_LAUNCH_BLOCKING=0
+RAPIDS_DATASET_ROOT_DIR=./datasets timeout 30m python -m pytest -s --verbose --capture=no ./python/cuopt/cuopt/tests/linear_programming/test_python_API.py::test_barrier_solver_settings
+RAPIDS_DATASET_ROOT_DIR=./datasets timeout 30m python -m pytest -s --verbose --capture=no ./python/cuopt/cuopt/tests/linear_programming/test_python_API.py::test_barrier_solver_fresh_instances
+RAPIDS_DATASET_ROOT_DIR=./datasets timeout 30m python -m pytest -s --verbose --capture=no ./python/cuopt/cuopt/tests/linear_programming/test_python_API.py::test_barrier_solver
+RAPIDS_DATASET_ROOT_DIR=./datasets timeout 30m python -m pytest -s --verbose --capture=no ./python/cuopt/cuopt/tests/
 
 # run jump tests and cvxpy integration tests for only nightly builds
 if [[ "${RAPIDS_BUILD_TYPE}" == "nightly" ]]; then
diff --git a/python/cuopt/cuopt/tests/linear_programming/test_python_API.py b/python/cuopt/cuopt/tests/linear_programming/test_python_API.py
index 47b349ef7..35fb87f32 100644
--- a/python/cuopt/cuopt/tests/linear_programming/test_python_API.py
+++ b/python/cuopt/cuopt/tests/linear_programming/test_python_API.py
@@ -569,3 +569,330 @@ def test_barrier_solver():
     for c in prob.getConstraints():
         # For <= constraints with optimal solution, slack should be >= 0
         assert c.Slack >= -1e-6  # Allow small numerical tolerance
+
+
+def test_barrier_solver_fresh_instances():
+    """
+    Test the barrier solver with different configurations, creating a fresh
+    Problem instance for each configuration to ensure complete isolation.
+
+    Problem:
+        maximize   5*xs + 20*xl
+        subject to  1*xs +  3*xl <= 200
+                    3*xs +  2*xl <= 160
+                    xs, xl >= 0
+
+    Expected Solution:
+        Optimal objective: 1333.33
+        xs = 0, xl = 66.67 (corner solution where constraint 1 is binding)
+    """
+    
+    # Test 1: Default barrier settings
+    print("\n=== Test 1: Default barrier settings ===")
+    prob1 = Problem("Barrier Test - Default")
+    xs1 = prob1.addVariable(lb=0, vtype=VType.CONTINUOUS, name="xs")
+    xl1 = prob1.addVariable(lb=0, vtype=VType.CONTINUOUS, name="xl")
+    prob1.addConstraint(xs1 + 3 * xl1 <= 200, name="constraint1")
+    prob1.addConstraint(3 * xs1 + 2 * xl1 <= 160, name="constraint2")
+    prob1.setObjective(5 * xs1 + 20 * xl1, sense=MAXIMIZE)
+    
+    settings1 = SolverSettings()
+    settings1.set_parameter(CUOPT_METHOD, SolverMethod.Barrier)
+    settings1.set_parameter("time_limit", 10)
+    
+    print("Solving with default settings")
+    prob1.solve(settings1)
+    print("Solved with default settings")
+    
+    assert prob1.solved
+    assert prob1.Status.name == "Optimal"
+    assert prob1.ObjValue == pytest.approx(1333.33, rel=0.01)
+    assert xs1.Value == pytest.approx(0.0, abs=1e-4)
+    assert xl1.Value == pytest.approx(66.67, rel=0.01)
+    
+    # Test 2: Barrier with forced settings
+    print("\n=== Test 2: Barrier with forced settings ===")
+    prob2 = Problem("Barrier Test - Forced")
+    xs2 = prob2.addVariable(lb=0, vtype=VType.CONTINUOUS, name="xs")
+    xl2 = prob2.addVariable(lb=0, vtype=VType.CONTINUOUS, name="xl")
+    prob2.addConstraint(xs2 + 3 * xl2 <= 200, name="constraint1")
+    prob2.addConstraint(3 * xs2 + 2 * xl2 <= 160, name="constraint2")
+    prob2.setObjective(5 * xs2 + 20 * xl2, sense=MAXIMIZE)
+    
+    settings2 = SolverSettings()
+    settings2.set_parameter(CUOPT_METHOD, SolverMethod.Barrier)
+    settings2.set_parameter(CUOPT_FOLDING, 1)  # Force folding
+    settings2.set_parameter(CUOPT_DUALIZE, 1)  # Force dualize
+    settings2.set_parameter(CUOPT_ORDERING, 1)  # AMD ordering
+    settings2.set_parameter(CUOPT_AUGMENTED, 1)  # Augmented system
+    settings2.set_parameter(CUOPT_ELIMINATE_DENSE_COLUMNS, True)
+    settings2.set_parameter(CUOPT_CUDSS_DETERMINISTIC, True)
+    settings2.set_parameter("time_limit", 10)
+    
+    print("Solving with forced settings")
+    prob2.solve(settings2)
+    print("Solved with forced settings")
+    
+    assert prob2.solved
+    assert prob2.Status.name == "Optimal"
+    assert prob2.ObjValue == pytest.approx(1333.33, rel=0.01)
+    assert xs2.Value == pytest.approx(0.0, abs=1e-4)
+    assert xl2.Value == pytest.approx(66.67, rel=0.01)
+    
+    # Test 3: Barrier with features disabled
+    print("\n=== Test 3: Barrier with features disabled ===")
+    prob3 = Problem("Barrier Test - Disabled")
+    xs3 = prob3.addVariable(lb=0, vtype=VType.CONTINUOUS, name="xs")
+    xl3 = prob3.addVariable(lb=0, vtype=VType.CONTINUOUS, name="xl")
+    prob3.addConstraint(xs3 + 3 * xl3 <= 200, name="constraint1")
+    prob3.addConstraint(3 * xs3 + 2 * xl3 <= 160, name="constraint2")
+    prob3.setObjective(5 * xs3 + 20 * xl3, sense=MAXIMIZE)
+    
+    settings3 = SolverSettings()
+    settings3.set_parameter(CUOPT_METHOD, SolverMethod.Barrier)
+    settings3.set_parameter(CUOPT_FOLDING, 0)  # No folding
+    settings3.set_parameter(CUOPT_DUALIZE, 0)  # No dualization
+    settings3.set_parameter(CUOPT_ORDERING, 0)  # cuDSS default
+    settings3.set_parameter(CUOPT_AUGMENTED, 0)  # ADAT system
+    settings3.set_parameter(CUOPT_ELIMINATE_DENSE_COLUMNS, False)
+    settings3.set_parameter(CUOPT_CUDSS_DETERMINISTIC, False)
+    settings3.set_parameter("time_limit", 10)
+    
+    print("Solving with disabled settings")
+    prob3.solve(settings3)
+    print("Solved with disabled settings")
+    
+    assert prob3.solved
+    assert prob3.Status.name == "Optimal"
+    assert prob3.ObjValue == pytest.approx(1333.33, rel=0.01)
+    assert xs3.Value == pytest.approx(0.0, abs=1e-4)
+    assert xl3.Value == pytest.approx(66.67, rel=0.01)
+    
+    # Test 4: Barrier with automatic settings (default -1 values)
+    print("\n=== Test 4: Barrier with automatic settings ===")
+    prob4 = Problem("Barrier Test - Automatic")
+    xs4 = prob4.addVariable(lb=0, vtype=VType.CONTINUOUS, name="xs")
+    xl4 = prob4.addVariable(lb=0, vtype=VType.CONTINUOUS, name="xl")
+    prob4.addConstraint(xs4 + 3 * xl4 <= 200, name="constraint1")
+    prob4.addConstraint(3 * xs4 + 2 * xl4 <= 160, name="constraint2")
+    prob4.setObjective(5 * xs4 + 20 * xl4, sense=MAXIMIZE)
+    
+    settings4 = SolverSettings()
+    settings4.set_parameter(CUOPT_METHOD, SolverMethod.Barrier)
+    settings4.set_parameter(CUOPT_FOLDING, -1)  # Automatic
+    settings4.set_parameter(CUOPT_DUALIZE, -1)  # Automatic
+    settings4.set_parameter(CUOPT_ORDERING, -1)  # Automatic
+    settings4.set_parameter(CUOPT_AUGMENTED, -1)  # Automatic
+    settings4.set_parameter("time_limit", 10)
+    
+    print("Solving with automatic settings")
+    prob4.solve(settings4)
+    print("Solved with automatic settings")
+    
+    assert prob4.solved
+    assert prob4.Status.name == "Optimal"
+    assert prob4.ObjValue == pytest.approx(1333.33, rel=0.01)
+    assert xs4.Value == pytest.approx(0.0, abs=1e-4)
+    assert xl4.Value == pytest.approx(66.67, rel=0.01)
+    
+    # Verify constraint slacks are non-negative for all tests
+    for prob in [prob1, prob2, prob3, prob4]:
+        for c in prob.getConstraints():
+            assert c.Slack >= -1e-6  # Allow small numerical tolerance
+
+
+@pytest.mark.parametrize(
+    "test_name,settings_config",
+    [
+        (
+            "automatic",
+            {
+                CUOPT_FOLDING: -1,
+                CUOPT_DUALIZE: -1,
+                CUOPT_ORDERING: -1,
+                CUOPT_AUGMENTED: -1,
+            },
+        ),
+        (
+            "forced_on",
+            {
+                CUOPT_FOLDING: 1,
+                CUOPT_DUALIZE: 1,
+                CUOPT_ORDERING: 1,
+                CUOPT_AUGMENTED: 1,
+                CUOPT_ELIMINATE_DENSE_COLUMNS: True,
+                CUOPT_CUDSS_DETERMINISTIC: True,
+            },
+        ),
+        (
+            "disabled",
+            {
+                CUOPT_FOLDING: 0,
+                CUOPT_DUALIZE: 0,
+                CUOPT_ORDERING: 0,
+                CUOPT_AUGMENTED: 0,
+                CUOPT_ELIMINATE_DENSE_COLUMNS: False,
+                CUOPT_CUDSS_DETERMINISTIC: False,
+            },
+        ),
+        (
+            "mixed",
+            {
+                CUOPT_FOLDING: 1,
+                CUOPT_DUALIZE: 0,
+                CUOPT_ORDERING: -1,
+                CUOPT_AUGMENTED: 1,
+            },
+        ),
+        (
+            "folding_on",
+            {
+                CUOPT_FOLDING: 1,
+            },
+        ),
+        (
+            "folding_off",
+            {
+                CUOPT_FOLDING: 0,
+            },
+        ),
+        (
+            "dualize_on",
+            {
+                CUOPT_DUALIZE: 1,
+            },
+        ),
+        (
+            "dualize_off",
+            {
+                CUOPT_DUALIZE: 0,
+            },
+        ),
+        (
+            "amd_ordering",
+            {
+                CUOPT_ORDERING: 1,
+            },
+        ),
+        (
+            "cudss_ordering",
+            {
+                CUOPT_ORDERING: 0,
+            },
+        ),
+        (
+            "augmented_system",
+            {
+                CUOPT_AUGMENTED: 1,
+            },
+        ),
+        (
+            "adat_system",
+            {
+                CUOPT_AUGMENTED: 0,
+            },
+        ),
+        (
+            "no_dense_elim",
+            {
+                CUOPT_ELIMINATE_DENSE_COLUMNS: False,
+            },
+        ),
+        (
+            "cudss_deterministic",
+            {
+                CUOPT_CUDSS_DETERMINISTIC: True,
+            },
+        ),
+        (
+            "combo1",
+            {
+                CUOPT_FOLDING: 1,
+                CUOPT_DUALIZE: 1,
+                CUOPT_ORDERING: 1,
+            },
+        ),
+        (
+            "combo2",
+            {
+                CUOPT_FOLDING: 0,
+                CUOPT_AUGMENTED: 0,
+                CUOPT_ELIMINATE_DENSE_COLUMNS: False,
+            },
+        ),
+    ],
+)
+def test_barrier_solver_settings(test_name, settings_config):
+    """
+    Parameterized test for barrier solver with different configurations.
+
+    Tests the barrier solver across various settings combinations to ensure
+    correctness and robustness. Each configuration tests different aspects
+    of the barrier solver implementation.
+
+    Problem:
+        maximize   5*xs + 20*xl
+        subject to  1*xs +  3*xl <= 200
+                    3*xs +  2*xl <= 160
+                    xs, xl >= 0
+
+    Expected Solution:
+        Optimal objective: 1333.33
+        xs = 0, xl = 66.67 (corner solution where constraint 1 is binding)
+
+    Args:
+        test_name: Descriptive name for the test configuration
+        settings_config: Dictionary of barrier solver parameters to set
+    """
+    prob = Problem(f"Barrier Test - {test_name}")
+
+    # Add variables
+    xs = prob.addVariable(lb=0, vtype=VType.CONTINUOUS, name="xs")
+    xl = prob.addVariable(lb=0, vtype=VType.CONTINUOUS, name="xl")
+
+    # Add constraints
+    prob.addConstraint(xs + 3 * xl <= 200, name="constraint1")
+    prob.addConstraint(3 * xs + 2 * xl <= 160, name="constraint2")
+
+    # Set objective: maximize 5*xs + 20*xl
+    prob.setObjective(5 * xs + 20 * xl, sense=MAXIMIZE)
+
+    # Configure solver settings
+    settings = SolverSettings()
+    settings.set_parameter(CUOPT_METHOD, SolverMethod.Barrier)
+    settings.set_parameter("time_limit", 10)
+
+    # Apply test-specific settings
+    for param_name, param_value in settings_config.items():
+        settings.set_parameter(param_name, param_value)
+
+    print(f"\nTesting configuration: {test_name}")
+    print(f"Settings: {settings_config}")
+
+    # Solve the problem
+    prob.solve(settings)
+
+    print(f"Status: {prob.Status.name}")
+    print(f"Objective: {prob.ObjValue}")
+    print(f"xs = {xs.Value}, xl = {xl.Value}")
+
+    # Verify solution
+    assert prob.solved, f"Problem not solved for {test_name}"
+    assert prob.Status.name == "Optimal", f"Not optimal for {test_name}"
+    assert prob.ObjValue == pytest.approx(
+        1333.33, rel=0.01
+    ), f"Incorrect objective for {test_name}"
+    assert xs.Value == pytest.approx(
+        0.0, abs=1e-4
+    ), f"Incorrect xs value for {test_name}"
+    assert xl.Value == pytest.approx(
+        66.67, rel=0.01
+    ), f"Incorrect xl value for {test_name}"
+
+    # Verify constraint slacks are non-negative
+    for c in prob.getConstraints():
+        assert (
+            c.Slack >= -1e-6
+        ), f"Negative slack for {c.getConstraintName()} in {test_name}"
+
+    print(f"✓ Test passed: {test_name}\n")

From cd979cf7c272a05069b894e64f84e40b3b61be9b Mon Sep 17 00:00:00 2001
From: Ramakrishna Prabhu <ramakrishnap@nvidia.com>
Date: Wed, 8 Oct 2025 17:16:53 -0500
Subject: [PATCH 32/40] enable few options  on OMP

---
 ci/test_wheel_cuopt.sh | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/ci/test_wheel_cuopt.sh b/ci/test_wheel_cuopt.sh
index 135daafa8..46ad2b419 100755
--- a/ci/test_wheel_cuopt.sh
+++ b/ci/test_wheel_cuopt.sh
@@ -73,13 +73,13 @@ export RAPIDS_DATASET_ROOT_DIR
 # Run Python tests
 # Set environment variables to handle OpenMP and UCX threading issues
 # Disable UCX debug logging to avoid interference with cleanup
-export UCX_LOG_LEVEL=debug
+#export UCX_LOG_LEVEL=debug
 # Set UCX to handle signals gracefully and avoid spinlock issues
-#export UCX_HANDLE_ERRORS=bt
+export UCX_HANDLE_ERRORS=bt
 #export UCX_ERROR_SIGNALS="SIGSEGV,SIGBUS,SIGFPE"
 # Ensure proper OpenMP threading behavior
-#export OMP_NUM_THREADS=1
-#export OMP_WAIT_POLICY=passive
+export OMP_NUM_THREADS=1
+export OMP_WAIT_POLICY=passive
 # Disable CUDA launch blocking to allow proper async cleanup
 #export CUDA_LAUNCH_BLOCKING=0
 RAPIDS_DATASET_ROOT_DIR=./datasets timeout 30m python -m pytest -s --verbose --capture=no ./python/cuopt/cuopt/tests/linear_programming/test_python_API.py::test_barrier_solver_settings

From eb67876bfbb8b91096575c7c17f79ed81b25ee0d Mon Sep 17 00:00:00 2001
From: Ramakrishna Prabhu <ramakrishnap@nvidia.com>
Date: Wed, 8 Oct 2025 22:02:02 -0500
Subject: [PATCH 33/40] revert testing changes

---
 .github/workflows/pr.yaml                     | 162 +++++++++++-
 ci/test_wheel_cuopt.sh                        |  21 +-
 .../linear_programming/test_python_API.py     | 240 ------------------
 3 files changed, 161 insertions(+), 262 deletions(-)

diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml
index 447dcbf30..9517f9236 100644
--- a/.github/workflows/pr.yaml
+++ b/.github/workflows/pr.yaml
@@ -27,13 +27,137 @@ concurrency:
 jobs:
   pr-builder:
     needs:
+      - changed-files
+      - checks
+      - conda-cpp-build
+      - conda-cpp-tests
+      - conda-python-build
+      - conda-python-tests
+      - docs-build
       - wheel-build-libcuopt
       - wheel-build-cuopt
       - wheel-tests-cuopt
+      - wheel-build-cuopt-server
+      - wheel-tests-cuopt-server
       - wheel-build-cuopt-mps-parser
       - wheel-build-cuopt-sh-client
+      - test-self-hosted-server
     secrets: inherit
     uses: rapidsai/shared-workflows/.github/workflows/pr-builder.yaml@branch-25.10
+  changed-files:
+    secrets: inherit
+    uses: rapidsai/shared-workflows/.github/workflows/changed-files.yaml@branch-25.10
+    with:
+      files_yaml: |
+        test_cpp:
+          - '**'
+          - '!CONTRIBUTING.md'
+          - '!README.md'
+          - '!ci/release/update-version-cuopt.sh'
+          - '!ci/release/update-version-rapids.sh'
+          - '!docs/**'
+          - '!img/**'
+          - '!notebooks/**'
+          - '!python/**'
+          - '!readme_pages/**'
+          - '!container-builder/**'
+          - '!helm-chart/**'
+          - '!ngc/**'
+          - '!omniverse/**'
+          - '!regression/**'
+          - '!resources/**'
+          - '!ucf/**'
+          - '!utilities/**'
+        test_notebooks:
+          - '**'
+          - '!CONTRIBUTING.md'
+          - '!README.md'
+          - '!ci/release/update-version-cuopt.sh'
+          - '!ci/release/update-version-rapids.sh'
+          - '!docs/**'
+          - '!python/nvcf_client/**'
+        test_python:
+          - '**'
+          - '!CONTRIBUTING.md'
+          - '!README.md'
+          - '!ci/release/update-version-cuopt.sh'
+          - '!ci/release/update-version-rapids.sh'
+          - '!docs/**'
+          - '!img/**'
+          - '!notebooks/**'
+          - '!python/nvcf_client/**'
+        test_python_cuopt:
+          - '**'
+          - '!CONTRIBUTING.md'
+          - '!README.md'
+          - '!ci/release/update-version-cuopt.sh'
+          - '!ci/release/update-version-rapids.sh'
+          - '!docs/**'
+          - '!img/**'
+          - '!notebooks/**'
+          - '!python/cuopt_self_hosted/**'
+          - '!python/cuopt_server/**'
+          - '!python/nvcf_client/**'
+        test_python_cuopt_server:
+          - '**'
+          - '!CONTRIBUTING.md'
+          - '!README.md'
+          - '!ci/release/update-version-cuopt.sh'
+          - '!ci/release/update-version-rapids.sh'
+          - '!docs/**'
+          - '!img/**'
+          - '!notebooks/**'
+          - '!python/cuopt_self_hosted/**'
+          - '!python/nvcf_client/**'
+  checks:
+    secrets: inherit
+    uses: rapidsai/shared-workflows/.github/workflows/checks.yaml@branch-25.10
+    with:
+      enable_check_generated_files: false
+
+  conda-cpp-build:
+    needs: checks
+    secrets: inherit
+    uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-build.yaml@branch-25.10
+    with:
+      build_type: pull-request
+      script: ci/build_cpp.sh
+  conda-cpp-tests:
+    needs: [conda-cpp-build, changed-files]
+    secrets: inherit
+    uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-tests.yaml@branch-25.10
+    #if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_cpp
+    with:
+      build_type: pull-request
+      script: ci/test_cpp.sh
+  conda-python-build:
+    needs: conda-cpp-build
+    secrets: inherit
+    uses: rapidsai/shared-workflows/.github/workflows/conda-python-build.yaml@branch-25.10
+    with:
+      build_type: pull-request
+      script: ci/build_python.sh
+  conda-python-tests:
+    needs: [conda-python-build, changed-files]
+    secrets: inherit
+    uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@branch-25.10
+    #if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_python
+    with:
+      run_codecov: false
+      build_type: pull-request
+      script: ci/test_python.sh
+  docs-build:
+    needs: conda-python-build
+    secrets: inherit
+    uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-25.10
+    with:
+      build_type: pull-request
+      node_type: "gpu-l4-latest-1"
+      arch: "amd64"
+      file_to_upload: "docs/cuopt/build/html/"
+      artifact-name: "cuopt_docs"
+      container_image: "rapidsai/ci-conda:25.10-latest"
+      script: "ci/build_docs.sh"
   wheel-build-cuopt-mps-parser:
     secrets: inherit
     uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-25.10
@@ -50,8 +174,8 @@ jobs:
     secrets: inherit
     uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-25.10
     with:
-      # Build only for amd64 and CUDA 12.9.1
-      matrix_filter: 'map(select(.ARCH == "amd64" and .CUDA_VER == "12.9.1" and .PY_VER == "3.10"))'
+      # build for every combination of arch and CUDA version, but only for the latest Python
+      matrix_filter: group_by([.ARCH, (.CUDA_VER|split(".")|map(tonumber)|.[0])]) | map(max_by(.PY_VER|split(".")|map(tonumber)))
       package-type: cpp
       package-name: libcuopt
       build_type: pull-request
@@ -65,16 +189,26 @@ jobs:
       script: ci/build_wheel_cuopt.sh
       package-name: cuopt
       package-type: python
-      # Build only for amd64 and CUDA 12.9.1
-      matrix_filter: 'map(select(.ARCH == "amd64" and .CUDA_VER == "12.9.1" and .PY_VER == "3.10"))'
   wheel-tests-cuopt:
-    needs: [wheel-build-cuopt, wheel-build-cuopt-mps-parser, wheel-build-cuopt-sh-client]
+    needs: [wheel-build-cuopt, wheel-build-cuopt-mps-parser, wheel-build-cuopt-sh-client, changed-files]
     secrets: inherit
     uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-25.10
+    #if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_python_cuopt
     with:
       build_type: pull-request
       script: ci/test_wheel_cuopt.sh
-      matrix_filter: 'map(select(.ARCH == "amd64" and .CUDA_VER == "12.9.1" and .PY_VER == "3.10"))'
+  wheel-build-cuopt-server:
+    needs: checks
+    secrets: inherit
+    uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-25.10
+    with:
+      build_type: pull-request
+      script: ci/build_wheel_cuopt_server.sh
+      package-name: cuopt_server
+      package-type: python
+      pure-wheel: true
+      # Only need 1 package per CUDA major version. This selects "ARCH=amd64 + the latest supported Python, 1 job per major CUDA version".
+      matrix_filter: map(select(.ARCH == "amd64")) | group_by(.CUDA_VER|split(".")|map(tonumber)|.[0]) | map(max_by([(.PY_VER|split(".")|map(tonumber)), (.CUDA_VER|split(".")|map(tonumber))]))
   wheel-build-cuopt-sh-client:
     secrets: inherit
     uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-25.10
@@ -87,3 +221,19 @@ jobs:
       pure-wheel: true
       # only need 1 build (noarch package): this selects amd64, oldest-supported Python, latest-supported CUDA
       matrix_filter: '[map(select(.ARCH == "amd64")) | min_by((.PY_VER | split(".") | map(tonumber)), (.CUDA_VER | split(".") | map(-tonumber)))]'
+  wheel-tests-cuopt-server:
+    needs: [wheel-build-cuopt, wheel-build-cuopt-server, changed-files]
+    secrets: inherit
+    uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-25.10
+    #if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_python_cuopt_server
+    with:
+      build_type: pull-request
+      script: ci/test_wheel_cuopt_server.sh
+  test-self-hosted-server:
+    needs: [wheel-build-cuopt, wheel-build-cuopt-server, changed-files]
+    secrets: inherit
+    uses: ./.github/workflows/self_hosted_service_test.yaml
+    #if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_python
+    with:
+      build_type: pull-request
+      script: ci/test_self_hosted_service.sh
diff --git a/ci/test_wheel_cuopt.sh b/ci/test_wheel_cuopt.sh
index 46ad2b419..61dabd67f 100755
--- a/ci/test_wheel_cuopt.sh
+++ b/ci/test_wheel_cuopt.sh
@@ -66,26 +66,15 @@ cd -
 RAPIDS_DATASET_ROOT_DIR="$(realpath datasets)"
 export RAPIDS_DATASET_ROOT_DIR
 
-# Please enable this once ISSUE https://github.com/NVIDIA/cuopt/issues/94 is fixed
 # Run CLI tests
-# timeout 10m bash ./python/libcuopt/libcuopt/tests/test_cli.sh
+timeout 10m bash ./python/libcuopt/libcuopt/tests/test_cli.sh
 
 # Run Python tests
-# Set environment variables to handle OpenMP and UCX threading issues
-# Disable UCX debug logging to avoid interference with cleanup
-#export UCX_LOG_LEVEL=debug
-# Set UCX to handle signals gracefully and avoid spinlock issues
-export UCX_HANDLE_ERRORS=bt
-#export UCX_ERROR_SIGNALS="SIGSEGV,SIGBUS,SIGFPE"
-# Ensure proper OpenMP threading behavior
+
+# Due to race condition in certain cases UCX might not be able to cleanup properly, so we set the number of threads to 1
 export OMP_NUM_THREADS=1
-export OMP_WAIT_POLICY=passive
-# Disable CUDA launch blocking to allow proper async cleanup
-#export CUDA_LAUNCH_BLOCKING=0
-RAPIDS_DATASET_ROOT_DIR=./datasets timeout 30m python -m pytest -s --verbose --capture=no ./python/cuopt/cuopt/tests/linear_programming/test_python_API.py::test_barrier_solver_settings
-RAPIDS_DATASET_ROOT_DIR=./datasets timeout 30m python -m pytest -s --verbose --capture=no ./python/cuopt/cuopt/tests/linear_programming/test_python_API.py::test_barrier_solver_fresh_instances
-RAPIDS_DATASET_ROOT_DIR=./datasets timeout 30m python -m pytest -s --verbose --capture=no ./python/cuopt/cuopt/tests/linear_programming/test_python_API.py::test_barrier_solver
-RAPIDS_DATASET_ROOT_DIR=./datasets timeout 30m python -m pytest -s --verbose --capture=no ./python/cuopt/cuopt/tests/
+
+RAPIDS_DATASET_ROOT_DIR=./datasets timeout 30m python -m pytest --verbose --capture=no ./python/cuopt/cuopt/tests/
 
 # run jump tests and cvxpy integration tests for only nightly builds
 if [[ "${RAPIDS_BUILD_TYPE}" == "nightly" ]]; then
diff --git a/python/cuopt/cuopt/tests/linear_programming/test_python_API.py b/python/cuopt/cuopt/tests/linear_programming/test_python_API.py
index 35fb87f32..fb679647d 100644
--- a/python/cuopt/cuopt/tests/linear_programming/test_python_API.py
+++ b/python/cuopt/cuopt/tests/linear_programming/test_python_API.py
@@ -461,246 +461,6 @@ def test_problem_update():
     assert prob.ObjValue == pytest.approx(5)
 
 
-# @pytest.mark.skip(reason="Skipping barrier solver test")
-def test_barrier_solver():
-    """
-    Test the barrier solver with different configurations.
-
-    Problem:
-        maximize   5*xs + 20*xl
-        subject to  1*xs +  3*xl <= 200
-                    3*xs +  2*xl <= 160
-                    xs, xl >= 0
-
-    Expected Solution:
-        Optimal objective: 1333.33
-        xs = 0, xl = 66.67 (corner solution where constraint 1 is binding)
-    """
-    prob = Problem("Barrier Test")
-
-    # Add variables
-    xs = prob.addVariable(lb=0, vtype=VType.CONTINUOUS, name="xs")
-    xl = prob.addVariable(lb=0, vtype=VType.CONTINUOUS, name="xl")
-
-    # Add constraints
-    prob.addConstraint(xs + 3 * xl <= 200, name="constraint1")
-    prob.addConstraint(3 * xs + 2 * xl <= 160, name="constraint2")
-
-    # Set objective: maximize 5*xs + 20 * xl
-    prob.setObjective(5 * xs + 20 * xl, sense=MAXIMIZE)
-
-    # Test 1: Default barrier settings
-    settings = SolverSettings()
-    settings.set_parameter(CUOPT_METHOD, SolverMethod.Barrier)
-    settings.set_parameter("time_limit", 10)
-
-    print("Solving with default settings\n")
-    prob.solve(settings)
-    print("Solved with default settings\n")
-
-    assert prob.solved
-    assert prob.Status.name == "Optimal"
-    assert prob.ObjValue == pytest.approx(1333.33, rel=0.01)
-    assert xs.Value == pytest.approx(0.0, abs=1e-4)
-    assert xl.Value == pytest.approx(66.67, rel=0.01)
-
-    # Test 2: Barrier with forced settings
-    settings_forced = SolverSettings()
-    settings_forced.set_parameter(CUOPT_METHOD, SolverMethod.Barrier)
-    settings_forced.set_parameter(CUOPT_FOLDING, 1)  # Force folding
-    settings_forced.set_parameter(CUOPT_DUALIZE, 1)  # Force dualize
-    settings_forced.set_parameter(CUOPT_ORDERING, 1)  # AMD ordering
-    settings_forced.set_parameter(CUOPT_AUGMENTED, 1)  # Augmented system
-    settings_forced.set_parameter(CUOPT_ELIMINATE_DENSE_COLUMNS, True)
-    settings_forced.set_parameter(CUOPT_CUDSS_DETERMINISTIC, True)
-    settings_forced.set_parameter("time_limit", 10)
-
-    print("Solving with forced settings\n")
-
-    prob.solve(settings_forced)
-
-    print("Solved with forced settings\n")
-
-    assert prob.solved
-    assert prob.Status.name == "Optimal"
-    assert prob.ObjValue == pytest.approx(1333.33, rel=0.01)
-
-    # Test 3: Barrier with features disabled
-    settings_disabled = SolverSettings()
-    settings_disabled.set_parameter(CUOPT_METHOD, SolverMethod.Barrier)
-    settings_disabled.set_parameter(CUOPT_FOLDING, 0)  # No folding
-    settings_disabled.set_parameter(CUOPT_DUALIZE, 0)  # No dualization
-    settings_disabled.set_parameter(CUOPT_ORDERING, 0)  # cuDSS default
-    settings_disabled.set_parameter(CUOPT_AUGMENTED, 0)  # ADAT system
-    settings_disabled.set_parameter(CUOPT_ELIMINATE_DENSE_COLUMNS, False)
-    settings_disabled.set_parameter(CUOPT_CUDSS_DETERMINISTIC, False)
-    settings_disabled.set_parameter("time_limit", 10)
-
-    print("Solving with disabled settings\n")
-
-    prob.solve(settings_disabled)
-
-    print("Solved with disabled settings\n")
-
-    assert prob.solved
-    assert prob.Status.name == "Optimal"
-    assert prob.ObjValue == pytest.approx(1333.33, rel=0.01)
-
-    # Test 4: Barrier with automatic settings (default -1 values)
-    settings_auto = SolverSettings()
-    settings_auto.set_parameter(CUOPT_METHOD, SolverMethod.Barrier)
-    settings_auto.set_parameter(CUOPT_FOLDING, -1)  # Automatic
-    settings_auto.set_parameter(CUOPT_DUALIZE, -1)  # Automatic
-    settings_auto.set_parameter(CUOPT_ORDERING, -1)  # Automatic
-    settings_auto.set_parameter(CUOPT_AUGMENTED, -1)  # Automatic
-    settings_auto.set_parameter("time_limit", 10)
-
-    print("Solving with automatic settings\n")
-
-    prob.solve(settings_auto)
-    
-    print("Solved with automatic settings\n")
-
-    assert prob.solved
-    assert prob.Status.name == "Optimal"
-    assert prob.ObjValue == pytest.approx(1333.33, rel=0.01)
-
-    # Verify constraint slacks are non-negative
-    for c in prob.getConstraints():
-        # For <= constraints with optimal solution, slack should be >= 0
-        assert c.Slack >= -1e-6  # Allow small numerical tolerance
-
-
-def test_barrier_solver_fresh_instances():
-    """
-    Test the barrier solver with different configurations, creating a fresh
-    Problem instance for each configuration to ensure complete isolation.
-
-    Problem:
-        maximize   5*xs + 20*xl
-        subject to  1*xs +  3*xl <= 200
-                    3*xs +  2*xl <= 160
-                    xs, xl >= 0
-
-    Expected Solution:
-        Optimal objective: 1333.33
-        xs = 0, xl = 66.67 (corner solution where constraint 1 is binding)
-    """
-    
-    # Test 1: Default barrier settings
-    print("\n=== Test 1: Default barrier settings ===")
-    prob1 = Problem("Barrier Test - Default")
-    xs1 = prob1.addVariable(lb=0, vtype=VType.CONTINUOUS, name="xs")
-    xl1 = prob1.addVariable(lb=0, vtype=VType.CONTINUOUS, name="xl")
-    prob1.addConstraint(xs1 + 3 * xl1 <= 200, name="constraint1")
-    prob1.addConstraint(3 * xs1 + 2 * xl1 <= 160, name="constraint2")
-    prob1.setObjective(5 * xs1 + 20 * xl1, sense=MAXIMIZE)
-    
-    settings1 = SolverSettings()
-    settings1.set_parameter(CUOPT_METHOD, SolverMethod.Barrier)
-    settings1.set_parameter("time_limit", 10)
-    
-    print("Solving with default settings")
-    prob1.solve(settings1)
-    print("Solved with default settings")
-    
-    assert prob1.solved
-    assert prob1.Status.name == "Optimal"
-    assert prob1.ObjValue == pytest.approx(1333.33, rel=0.01)
-    assert xs1.Value == pytest.approx(0.0, abs=1e-4)
-    assert xl1.Value == pytest.approx(66.67, rel=0.01)
-    
-    # Test 2: Barrier with forced settings
-    print("\n=== Test 2: Barrier with forced settings ===")
-    prob2 = Problem("Barrier Test - Forced")
-    xs2 = prob2.addVariable(lb=0, vtype=VType.CONTINUOUS, name="xs")
-    xl2 = prob2.addVariable(lb=0, vtype=VType.CONTINUOUS, name="xl")
-    prob2.addConstraint(xs2 + 3 * xl2 <= 200, name="constraint1")
-    prob2.addConstraint(3 * xs2 + 2 * xl2 <= 160, name="constraint2")
-    prob2.setObjective(5 * xs2 + 20 * xl2, sense=MAXIMIZE)
-    
-    settings2 = SolverSettings()
-    settings2.set_parameter(CUOPT_METHOD, SolverMethod.Barrier)
-    settings2.set_parameter(CUOPT_FOLDING, 1)  # Force folding
-    settings2.set_parameter(CUOPT_DUALIZE, 1)  # Force dualize
-    settings2.set_parameter(CUOPT_ORDERING, 1)  # AMD ordering
-    settings2.set_parameter(CUOPT_AUGMENTED, 1)  # Augmented system
-    settings2.set_parameter(CUOPT_ELIMINATE_DENSE_COLUMNS, True)
-    settings2.set_parameter(CUOPT_CUDSS_DETERMINISTIC, True)
-    settings2.set_parameter("time_limit", 10)
-    
-    print("Solving with forced settings")
-    prob2.solve(settings2)
-    print("Solved with forced settings")
-    
-    assert prob2.solved
-    assert prob2.Status.name == "Optimal"
-    assert prob2.ObjValue == pytest.approx(1333.33, rel=0.01)
-    assert xs2.Value == pytest.approx(0.0, abs=1e-4)
-    assert xl2.Value == pytest.approx(66.67, rel=0.01)
-    
-    # Test 3: Barrier with features disabled
-    print("\n=== Test 3: Barrier with features disabled ===")
-    prob3 = Problem("Barrier Test - Disabled")
-    xs3 = prob3.addVariable(lb=0, vtype=VType.CONTINUOUS, name="xs")
-    xl3 = prob3.addVariable(lb=0, vtype=VType.CONTINUOUS, name="xl")
-    prob3.addConstraint(xs3 + 3 * xl3 <= 200, name="constraint1")
-    prob3.addConstraint(3 * xs3 + 2 * xl3 <= 160, name="constraint2")
-    prob3.setObjective(5 * xs3 + 20 * xl3, sense=MAXIMIZE)
-    
-    settings3 = SolverSettings()
-    settings3.set_parameter(CUOPT_METHOD, SolverMethod.Barrier)
-    settings3.set_parameter(CUOPT_FOLDING, 0)  # No folding
-    settings3.set_parameter(CUOPT_DUALIZE, 0)  # No dualization
-    settings3.set_parameter(CUOPT_ORDERING, 0)  # cuDSS default
-    settings3.set_parameter(CUOPT_AUGMENTED, 0)  # ADAT system
-    settings3.set_parameter(CUOPT_ELIMINATE_DENSE_COLUMNS, False)
-    settings3.set_parameter(CUOPT_CUDSS_DETERMINISTIC, False)
-    settings3.set_parameter("time_limit", 10)
-    
-    print("Solving with disabled settings")
-    prob3.solve(settings3)
-    print("Solved with disabled settings")
-    
-    assert prob3.solved
-    assert prob3.Status.name == "Optimal"
-    assert prob3.ObjValue == pytest.approx(1333.33, rel=0.01)
-    assert xs3.Value == pytest.approx(0.0, abs=1e-4)
-    assert xl3.Value == pytest.approx(66.67, rel=0.01)
-    
-    # Test 4: Barrier with automatic settings (default -1 values)
-    print("\n=== Test 4: Barrier with automatic settings ===")
-    prob4 = Problem("Barrier Test - Automatic")
-    xs4 = prob4.addVariable(lb=0, vtype=VType.CONTINUOUS, name="xs")
-    xl4 = prob4.addVariable(lb=0, vtype=VType.CONTINUOUS, name="xl")
-    prob4.addConstraint(xs4 + 3 * xl4 <= 200, name="constraint1")
-    prob4.addConstraint(3 * xs4 + 2 * xl4 <= 160, name="constraint2")
-    prob4.setObjective(5 * xs4 + 20 * xl4, sense=MAXIMIZE)
-    
-    settings4 = SolverSettings()
-    settings4.set_parameter(CUOPT_METHOD, SolverMethod.Barrier)
-    settings4.set_parameter(CUOPT_FOLDING, -1)  # Automatic
-    settings4.set_parameter(CUOPT_DUALIZE, -1)  # Automatic
-    settings4.set_parameter(CUOPT_ORDERING, -1)  # Automatic
-    settings4.set_parameter(CUOPT_AUGMENTED, -1)  # Automatic
-    settings4.set_parameter("time_limit", 10)
-    
-    print("Solving with automatic settings")
-    prob4.solve(settings4)
-    print("Solved with automatic settings")
-    
-    assert prob4.solved
-    assert prob4.Status.name == "Optimal"
-    assert prob4.ObjValue == pytest.approx(1333.33, rel=0.01)
-    assert xs4.Value == pytest.approx(0.0, abs=1e-4)
-    assert xl4.Value == pytest.approx(66.67, rel=0.01)
-    
-    # Verify constraint slacks are non-negative for all tests
-    for prob in [prob1, prob2, prob3, prob4]:
-        for c in prob.getConstraints():
-            assert c.Slack >= -1e-6  # Allow small numerical tolerance
-
-
 @pytest.mark.parametrize(
     "test_name,settings_config",
     [

From 6c4b021d42a4a8c77337aefb93b0d8bdc69b7d59 Mon Sep 17 00:00:00 2001
From: Ramakrishna Prabhu <ramakrishnap@nvidia.com>
Date: Wed, 8 Oct 2025 22:03:50 -0500
Subject: [PATCH 34/40] update

---
 python/cuopt/cuopt/tests/linear_programming/test_python_API.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/python/cuopt/cuopt/tests/linear_programming/test_python_API.py b/python/cuopt/cuopt/tests/linear_programming/test_python_API.py
index fb679647d..bf744adf7 100644
--- a/python/cuopt/cuopt/tests/linear_programming/test_python_API.py
+++ b/python/cuopt/cuopt/tests/linear_programming/test_python_API.py
@@ -654,5 +654,3 @@ def test_barrier_solver_settings(test_name, settings_config):
         assert (
             c.Slack >= -1e-6
         ), f"Negative slack for {c.getConstraintName()} in {test_name}"
-
-    print(f"✓ Test passed: {test_name}\n")

From 93b0243b5f637ecd9d6ff32be953185e788b686e Mon Sep 17 00:00:00 2001
From: Ramakrishna Prabhu <ramakrishnap@nvidia.com>
Date: Wed, 8 Oct 2025 22:11:56 -0500
Subject: [PATCH 35/40] add doc for CUOPT_BARRIER_DUAL_INITIAL_POINT

---
 .../source/cuopt-c/lp-milp/lp-milp-c-api.rst  |  1 +
 docs/cuopt/source/lp-milp-settings.rst        | 81 +++++++++++--------
 2 files changed, 47 insertions(+), 35 deletions(-)

diff --git a/docs/cuopt/source/cuopt-c/lp-milp/lp-milp-c-api.rst b/docs/cuopt/source/cuopt-c/lp-milp/lp-milp-c-api.rst
index 7c4be9834..e321e319d 100644
--- a/docs/cuopt/source/cuopt-c/lp-milp/lp-milp-c-api.rst
+++ b/docs/cuopt/source/cuopt-c/lp-milp/lp-milp-c-api.rst
@@ -180,6 +180,7 @@ These constants are used as parameter names in the :c:func:`cuOptSetParameter`,
 .. doxygendefine:: CUOPT_ORDERING
 .. doxygendefine:: CUOPT_ELIMINATE_DENSE_COLUMNS
 .. doxygendefine:: CUOPT_CUDSS_DETERMINISTIC
+.. doxygendefine:: CUOPT_BARRIER_DUAL_INITIAL_POINT
 .. doxygendefine:: CUOPT_DUAL_POSTSOLVE
 .. doxygendefine:: CUOPT_SOLUTION_FILE
 .. doxygendefine:: CUOPT_NUM_CPU_THREADS
diff --git a/docs/cuopt/source/lp-milp-settings.rst b/docs/cuopt/source/lp-milp-settings.rst
index e815960b3..52ae613f9 100644
--- a/docs/cuopt/source/lp-milp-settings.rst
+++ b/docs/cuopt/source/lp-milp-settings.rst
@@ -23,8 +23,8 @@ may run slightly over the limit. If set along with the iteration limit, cuOpt wi
 the first limit (iteration or time) is hit.
 
 
-Note: by default there is no time limit. So cuOpt will run until it finds an optimal solution,
-or proves the problem is infeasible or unbounded.
+.. note:: by default there is no time limit. So cuOpt will run until it finds an optimal solution,
+   or proves the problem is infeasible or unbounded.
 
 
 
@@ -33,25 +33,25 @@ Log to Console
 ``CUOPT_LOG_TO_CONSOLE`` controls whether cuOpt should log information to the console during a solve.
 If true, a logging info is written to the console, if false no logging info is written to the console (logs may still be written to a file.)
 
-Note: the default value is true.
+.. note:: the default value is true.
 
 Log File
 ^^^^^^^^
 ``CUOPT_LOG_FILE`` controls the name of a log file where cuOpt should write information about the solve.
 
-Note: the default value is ``""`` and no log file is written. This setting is ignored by the cuOpt service, use the log callback feature instead.
+.. note:: the default value is ``""`` and no log file is written. This setting is ignored by the cuOpt service, use the log callback feature instead.
 
 Solution File
 ^^^^^^^^^^^^^
 ``CUOPT_SOLUTION_FILE`` controls the name of a file where cuOpt should write the solution.
 
-Note: the default value is ``""`` and no solution file is written. This setting is ignored by the cuOpt service.
+.. note:: the default value is ``""`` and no solution file is written. This setting is ignored by the cuOpt service.
 
 User Problem File
 ^^^^^^^^^^^^^^^^^
 ``CUOPT_USER_PROBLEM_FILE`` controls the name of a file where cuOpt should write the user problem.
 
-Note: the default value is ``""`` and no user problem file is written. This setting is ignored by the cuOpt service.
+.. note:: the default value is ``""`` and no user problem file is written. This setting is ignored by the cuOpt service.
 
 Num CPU Threads
 ^^^^^^^^^^^^^^^
@@ -59,7 +59,7 @@ Num CPU Threads
 the amount of CPU resources cuOpt uses. Set this to a large value to improve solve times for CPU
 parallel parts of the solvers.
 
-Note: by default the number of CPU threads is automatically determined based on the number of CPU cores.
+.. note:: by default the number of CPU threads is automatically determined based on the number of CPU cores.
 
 Presolve
 ^^^^^^^^
@@ -85,7 +85,7 @@ Method
 * ``Dual Simplex``: Use the dual simplex method.
 * ``Barrier``: Use the barrier (interior-point) method.
 
-Note: The default method is ``Concurrent``.
+.. note:: The default method is ``Concurrent``.
 
 C API users should use the constants defined in :ref:`method-constants` for this parameter.
 
@@ -116,8 +116,8 @@ For performance reasons, cuOpt's does not constantly checks for iteration limit,
 the solver might run a few extra iterations over the limit. If set along with the time limit,
 cuOpt will stop at the first limit (iteration or time) reached.
 
-Note: by default there is no iteration limit. So, cuOpt will run until it finds an optimal solution,
-or proves the problem is infeasible or unbounded.
+.. note:: by default there is no iteration limit. So, cuOpt will run until it finds an optimal solution,
+   or proves the problem is infeasible or unbounded.
 
 
 Infeasiblity Detection
@@ -128,8 +128,8 @@ is not always accurate. Some problems detected as infeasible may converge under
 Detecting infeasibility consumes both more runtime and memory. The added runtime is between 3% and 7%,
 the added memory consumpution is between 10% and 20%.
 
-Note: by default PDLP will not detect infeasibility. Dual simplex will always detect infeasibility
-regardless of this setting.
+.. note:: by default PDLP will not detect infeasibility. Dual simplex will always detect infeasibility
+   regardless of this setting.
 
 Strict Infeasibility
 ^^^^^^^^^^^^^^^^^^^^
@@ -138,7 +138,7 @@ Strict Infeasibility
 is detected as infeasible, PDLP will stop. When false both the current and average solution need to be
 detected as infeasible for PDLP to stop.
 
-Note: the default value is false.
+.. note:: the default value is false.
 
 .. _crossover:
 
@@ -152,7 +152,7 @@ between their bounds. Enabling crossover allows the user to obtain a high-qualit
 that lies at a vertex of the feasible region. If n is the number of variables, and m is the number of
 constraints, n - m variables will be on their bounds in a basic solution.
 
-Note: the default value is false.
+.. note:: the default value is false.
 
 Save Best Primal So Far
 ^^^^^^^^^^^^^^^^^^^^^^^
@@ -163,21 +163,21 @@ With this parameter set to true, PDLP
 * If no primal feasible was found, the one with the lowest primal residual will be kept
 * If two have the same primal residual, the one with the best objective will be kept
 
-Note: the default value is false.
+.. note:: the default value is false.
 
 First Primal Feasible
 ^^^^^^^^^^^^^^^^^^^^^
 
 ``CUOPT_FIRST_PRIMAL_FEASIBLE`` controls whether PDLP should stop when the first primal feasible solution is found.
 
-Note: the default value is false.
+.. note:: the default value is false.
 
 Per Constraint Residual
 ^^^^^^^^^^^^^^^^^^^^^^^
 
 ``CUOPT_PER_CONSTRAINT_RESIDUAL`` controls whether PDLP should compute the primal & dual residual per constraint instead of globally.
 
-Note: the default value is false.
+.. note:: the default value is false.
 
 Barrier Solver Settings
 ^^^^^^^^^^^^^^^^^^^^^^^^
@@ -193,7 +193,7 @@ Folding
 * ``0``: Disable folding
 * ``1``: Force folding to run
 
-Note: the default value is ``-1`` (automatic).
+.. note:: the default value is ``-1`` (automatic).
 
 Dualize
 """""""
@@ -204,7 +204,7 @@ Dualize
 * ``0``: Don't attempt to dualize
 * ``1``: Force dualize
 
-Note: the default value is ``-1`` (automatic).
+.. note:: the default value is ``-1`` (automatic).
 
 Ordering
 """"""""
@@ -215,7 +215,7 @@ Ordering
 * ``0``: cuDSS default ordering
 * ``1``: AMD (Approximate Minimum Degree) ordering
 
-Note: the default value is ``-1`` (automatic).
+.. note:: the default value is ``-1`` (automatic).
 
 Augmented System
 """"""""""""""""
@@ -226,7 +226,7 @@ Augmented System
 * ``0``: Solve the ADAT system (normal equations)
 * ``1``: Solve the augmented system
 
-Note: the default value is ``-1`` (automatic). The augmented system may be more stable for some problems, while ADAT may be faster for others.
+.. note:: the default value is ``-1`` (automatic). The augmented system may be more stable for some problems, while ADAT may be faster for others.
 
 Eliminate Dense Columns
 """"""""""""""""""""""""
@@ -239,7 +239,7 @@ However, extra solves must be performed at each iteration.
 
 This setting only has an effect when the ADAT (normal equation) system is solved.
 
-Note: the default value is ``true``.
+.. note:: the default value is ``true``.
 
 cuDSS Deterministic Mode
 """""""""""""""""""""""""
@@ -249,7 +249,18 @@ cuDSS Deterministic Mode
 * ``true``: Use deterministic mode
 * ``false``: Use non-deterministic mode (default)
 
-Note: the default value is ``false``. Enable deterministic mode if reproducibility is more important than performance.
+.. note:: the default value is ``false``. Enable deterministic mode if reproducibility is more important than performance.
+
+Dual Initial Point
+""""""""""""""""""
+
+``CUOPT_BARRIER_DUAL_INITIAL_POINT`` controls the method used to compute the dual initial point for the barrier solver. The choice of initial point can affect convergence speed and numerical stability.
+
+* ``-1``: Automatic (default) - cuOpt selects the best method
+* ``0``: Use the Lustig-Marsten-Shanno initial point - a heuristic approach based on the paper "On Implementing Mehrotra's Predictor–Corrector Interior-Point Method for Linear Programming" (SIAM Journal on Optimization, 1992)
+* ``1``: Use initial point from dual least squares problem - solves a least squares problem to compute a dual solution that satisfies the dual feasibility constraints
+
+.. note:: the default value is ``-1`` (automatic). When set to automatic, the behavior is equivalent to option ``0``.
 
 Absolute Primal Tolerance
 ^^^^^^^^^^^^^^^^^^^^^^^^^
@@ -275,7 +286,7 @@ The primal feasibility condition is computed as follows::
 
    primal_feasiblity < absolute_primal_tolerance + relative_primal_tolerance * l2_norm(b)
 
-Note: the default value is ``1e-4``.
+.. note:: the default value is ``1e-4``.
 
 Absolute Dual Tolerance
 ^^^^^^^^^^^^^^^^^^^^^^^
@@ -288,7 +299,7 @@ The dual feasibility condition is computed as follows::
 
    dual_feasiblity < absolute_dual_tolerance + relative_dual_tolerance * l2_norm(c)
 
-Note: the default value is ``1e-4``.
+.. note:: the default value is ``1e-4``.
 
 Relative Dual Tolerance
 ^^^^^^^^^^^^^^^^^^^^^^^
@@ -299,7 +310,7 @@ The dual feasibility condition is computed as follows::
 
    dual_feasiblity < absolute_dual_tolerance + relative_dual_tolerance * l2_norm(c)
 
-Note: the default value is ``1e-4``.
+.. note:: the default value is ``1e-4``.
 
 
 Absolute Gap Tolerance
@@ -312,7 +323,7 @@ The duality gap is computed as follows::
 
    duality_gap < absolute_gap_tolerance + relative_gap_tolerance * (|primal_objective| + |dual_objective|)
 
-Note: the default value is ``1e-4``.
+.. note:: the default value is ``1e-4``.
 
 
 Relative Gap Tolerance
@@ -324,7 +335,7 @@ The duality gap is computed as follows::
 
    duality_gap < absolute_gap_tolerance + relative_gap_tolerance * (|primal_objective| + |dual_objective|)
 
-Note: the default value is ``1e-4``.
+.. note:: the default value is ``1e-4``.
 
 
 Mixed Integer Linear Programming
@@ -340,7 +351,7 @@ Heuristics only
 bound is improved via the GPU. When set to false, both the GPU and CPU are used and
 the dual bound is improved on the CPU.
 
-Note: the default value is false.
+.. note:: the default value is false.
 
 Scaling
 ^^^^^^^
@@ -348,7 +359,7 @@ Scaling
 ``CUOPT_MIP_SCALING`` controls if scaling should be applied to the MIP problem. When true scaling is applied,
 when false, no scaling is applied.
 
-Note: the defaulte value is true.
+.. note:: the defaulte value is true.
 
 
 Absolute Tolerance
@@ -356,14 +367,14 @@ Absolute Tolerance
 
 ``CUOPT_MIP_ABSOLUTE_TOLERANCE`` controls the MIP absolute tolerance.
 
-Note: the default value is ``1e-6``.
+.. note:: the default value is ``1e-6``.
 
 Relative Tolerance
 ^^^^^^^^^^^^^^^^^^
 
 ``CUOPT_MIP_RELATIVE_TOLERANCE`` controls the MIP relative tolerance.
 
-Note: the default value is ``1e-12``.
+.. note:: the default value is ``1e-12``.
 
 
 Integrality Tolerance
@@ -372,7 +383,7 @@ Integrality Tolerance
 ``CUOPT_INTEGRALITY_TOLERANCE`` controls the MIP integrality tolerance. A variable is considered to be integral, if
 it is within the integrality tolerance of an integer.
 
-Note: the default value is ``1e-5``.
+.. note:: the default value is ``1e-5``.
 
 Absolute MIP Gap
 ^^^^^^^^^^^^^^^^
@@ -387,7 +398,7 @@ when minimizing or
 
 when maximizing.
 
-Note: the default value is ``1e-10``.
+.. note:: the default value is ``1e-10``.
 
 Relative MIP Gap
 ^^^^^^^^^^^^^^^^
@@ -399,4 +410,4 @@ Relative MIP Gap
 If the Best Objective and the Dual Bound are both zero the gap is zero. If the best objective value is zero, the
 gap is infinity.
 
-Note: the default value is ``1e-4``.
+.. note:: the default value is ``1e-4``.

From 93d013a184441e98d1b8e0e9dc8487c2463c8b34 Mon Sep 17 00:00:00 2001
From: Ramakrishna Prabhu <ramakrishnap@nvidia.com>
Date: Wed, 8 Oct 2025 22:18:07 -0500
Subject: [PATCH 36/40] add test for CUOPT_BARRIER_DUAL_INITIAL_POINT

---
 .../linear_programming/test_python_API.py     | 27 +++++++++++++++++++
 .../cuopt_server/tests/test_lp.py             | 20 +++++++++-----
 2 files changed, 41 insertions(+), 6 deletions(-)

diff --git a/python/cuopt/cuopt/tests/linear_programming/test_python_API.py b/python/cuopt/cuopt/tests/linear_programming/test_python_API.py
index bf744adf7..c94716eaa 100644
--- a/python/cuopt/cuopt/tests/linear_programming/test_python_API.py
+++ b/python/cuopt/cuopt/tests/linear_programming/test_python_API.py
@@ -35,6 +35,7 @@
 )
 from cuopt.linear_programming.solver.solver_parameters import (
     CUOPT_AUGMENTED,
+    CUOPT_BARRIER_DUAL_INITIAL_POINT,
     CUOPT_CUDSS_DETERMINISTIC,
     CUOPT_DUALIZE,
     CUOPT_ELIMINATE_DENSE_COLUMNS,
@@ -580,6 +581,32 @@ def test_problem_update():
                 CUOPT_ELIMINATE_DENSE_COLUMNS: False,
             },
         ),
+        (
+            "dual_initial_point_automatic",
+            {
+                CUOPT_BARRIER_DUAL_INITIAL_POINT: -1,
+            },
+        ),
+        (
+            "dual_initial_point_lustig",
+            {
+                CUOPT_BARRIER_DUAL_INITIAL_POINT: 0,
+            },
+        ),
+        (
+            "dual_initial_point_least_squares",
+            {
+                CUOPT_BARRIER_DUAL_INITIAL_POINT: 1,
+            },
+        ),
+        (
+            "combo3_with_dual_init",
+            {
+                CUOPT_AUGMENTED: 1,
+                CUOPT_BARRIER_DUAL_INITIAL_POINT: 1,
+                CUOPT_ELIMINATE_DENSE_COLUMNS: True,
+            },
+        ),
     ],
 )
 def test_barrier_solver_settings(test_name, settings_config):
diff --git a/python/cuopt_server/cuopt_server/tests/test_lp.py b/python/cuopt_server/cuopt_server/tests/test_lp.py
index c46a44b37..c0f7ee3cc 100644
--- a/python/cuopt_server/cuopt_server/tests/test_lp.py
+++ b/python/cuopt_server/cuopt_server/tests/test_lp.py
@@ -150,18 +150,22 @@ def test_sample_milp(
 
 # @pytest.mark.skip(reason="Skipping barrier solver options test")
 @pytest.mark.parametrize(
-    "folding, dualize, ordering, augmented, eliminate_dense, cudss_determ",
+    "folding, dualize, ordering, augmented, eliminate_dense, cudss_determ, dual_initial_point",
     [
         # Test automatic settings (default)
-        (-1, -1, -1, -1, True, False),
+        (-1, -1, -1, -1, True, False, -1),
         # Test folding off, no dualization, cuDSS default ordering, ADAT system
-        (0, 0, 0, 0, True, False),
+        (0, 0, 0, 0, True, False, 0),
         # Test folding on, force dualization, AMD ordering, augmented system
-        (1, 1, 1, 1, True, True),
+        (1, 1, 1, 1, True, True, 1),
         # Test mixed settings: automatic folding, no dualize, AMD, augmented
-        (-1, 0, 1, 1, False, False),
+        (-1, 0, 1, 1, False, False, 0),
         # Test no folding, automatic dualize, cuDSS default, ADAT
-        (0, -1, 0, 0, True, True),
+        (0, -1, 0, 0, True, True, -1),
+        # Test dual initial point with Lustig-Marsten-Shanno
+        (-1, -1, -1, -1, True, False, 0),
+        # Test dual initial point with least squares
+        (-1, -1, -1, 1, True, False, 1),
     ],
 )
 def test_barrier_solver_options(
@@ -172,6 +176,7 @@ def test_barrier_solver_options(
     augmented,
     eliminate_dense,
     cudss_determ,
+    dual_initial_point,
 ):
     """
     Test the barrier solver (method=3) with various configuration options:
@@ -181,6 +186,7 @@ def test_barrier_solver_options(
     - augmented: (-1) automatic, (0) ADAT, (1) augmented system
     - eliminate_dense_columns: True to eliminate, False to not
     - cudss_deterministic: True for deterministic, False for nondeterministic
+    - barrier_dual_initial_point: (-1) automatic, (0) Lustig-Marsten-Shanno, (1) dual least squares
     """
     data = get_std_data_for_lp()
 
@@ -194,6 +200,7 @@ def test_barrier_solver_options(
     data["solver_config"]["augmented"] = augmented
     data["solver_config"]["eliminate_dense_columns"] = eliminate_dense
     data["solver_config"]["cudss_deterministic"] = cudss_determ
+    data["solver_config"]["barrier_dual_initial_point"] = dual_initial_point
 
     res = get_lp(client, data)
 
@@ -203,6 +210,7 @@ def test_barrier_solver_options(
     print(f"folding={folding}, dualize={dualize}, ordering={ordering}")
     print(f"augmented={augmented}, eliminate_dense={eliminate_dense}")
     print(f"cudss_deterministic={cudss_determ}")
+    print(f"barrier_dual_initial_point={dual_initial_point}")
     print(res.json())
 
     validate_lp_result(

From 668ded72eee718e7ab97319e8fd9f3aae40b2413 Mon Sep 17 00:00:00 2001
From: Ramakrishna Prabhu <ramakrishnap@nvidia.com>
Date: Wed, 8 Oct 2025 22:50:04 -0500
Subject: [PATCH 37/40] fix style

---
 .../cuopt/tests/linear_programming/test_python_API.py    | 3 ++-
 python/cuopt_server/cuopt_server/tests/test_lp.py        | 9 ++++++---
 2 files changed, 8 insertions(+), 4 deletions(-)

diff --git a/python/cuopt/cuopt/tests/linear_programming/test_python_API.py b/python/cuopt/cuopt/tests/linear_programming/test_python_API.py
index c94716eaa..c7ef8b99b 100644
--- a/python/cuopt/cuopt/tests/linear_programming/test_python_API.py
+++ b/python/cuopt/cuopt/tests/linear_programming/test_python_API.py
@@ -627,7 +627,8 @@ def test_barrier_solver_settings(test_name, settings_config):
         Optimal objective: 1333.33
         xs = 0, xl = 66.67 (corner solution where constraint 1 is binding)
 
-    Args:
+    Args
+    ----
         test_name: Descriptive name for the test configuration
         settings_config: Dictionary of barrier solver parameters to set
     """
diff --git a/python/cuopt_server/cuopt_server/tests/test_lp.py b/python/cuopt_server/cuopt_server/tests/test_lp.py
index c0f7ee3cc..4a01daaca 100644
--- a/python/cuopt_server/cuopt_server/tests/test_lp.py
+++ b/python/cuopt_server/cuopt_server/tests/test_lp.py
@@ -150,7 +150,8 @@ def test_sample_milp(
 
 # @pytest.mark.skip(reason="Skipping barrier solver options test")
 @pytest.mark.parametrize(
-    "folding, dualize, ordering, augmented, eliminate_dense, cudss_determ, dual_initial_point",
+    "folding, dualize, ordering, augmented, eliminate_dense, cudss_determ, "
+    "dual_initial_point",
     [
         # Test automatic settings (default)
         (-1, -1, -1, -1, True, False, -1),
@@ -185,8 +186,10 @@ def test_barrier_solver_options(
     - ordering: (-1) automatic, (0) cuDSS default, (1) AMD
     - augmented: (-1) automatic, (0) ADAT, (1) augmented system
     - eliminate_dense_columns: True to eliminate, False to not
-    - cudss_deterministic: True for deterministic, False for nondeterministic
-    - barrier_dual_initial_point: (-1) automatic, (0) Lustig-Marsten-Shanno, (1) dual least squares
+    - cudss_deterministic: True for deterministic, False for
+      nondeterministic
+    - barrier_dual_initial_point: (-1) automatic, (0) Lustig-Marsten-Shanno,
+      (1) dual least squares
     """
     data = get_std_data_for_lp()
 

From f50b7b1809144132105438c92ff3bcb2f6560bf7 Mon Sep 17 00:00:00 2001
From: Chris Maes <cmaes@nvidia.com>
Date: Thu, 9 Oct 2025 10:12:05 -0700
Subject: [PATCH 38/40] Update lp-milp-settings.rst

---
 docs/cuopt/source/lp-milp-settings.rst | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/docs/cuopt/source/lp-milp-settings.rst b/docs/cuopt/source/lp-milp-settings.rst
index 52ae613f9..947c2bdf2 100644
--- a/docs/cuopt/source/lp-milp-settings.rst
+++ b/docs/cuopt/source/lp-milp-settings.rst
@@ -254,13 +254,13 @@ cuDSS Deterministic Mode
 Dual Initial Point
 """"""""""""""""""
 
-``CUOPT_BARRIER_DUAL_INITIAL_POINT`` controls the method used to compute the dual initial point for the barrier solver. The choice of initial point can affect convergence speed and numerical stability.
+``CUOPT_BARRIER_DUAL_INITIAL_POINT`` controls the method used to compute the dual initial point for the barrier solver. The choice of initial point will affect the number of iterations performed by barrier.
 
 * ``-1``: Automatic (default) - cuOpt selects the best method
-* ``0``: Use the Lustig-Marsten-Shanno initial point - a heuristic approach based on the paper "On Implementing Mehrotra's Predictor–Corrector Interior-Point Method for Linear Programming" (SIAM Journal on Optimization, 1992)
-* ``1``: Use initial point from dual least squares problem - solves a least squares problem to compute a dual solution that satisfies the dual feasibility constraints
+* ``0``: Use an initial point from a heuristic approach based on the paper "On Implementing Mehrotra's Predictor–Corrector Interior-Point Method for Linear Programming" (SIAM J. Optimization, 1992) by Lustig, Martsten, Shanno.
+* ``1``: Use an initial point from solving a least squares problem that minimizes the norms of the dual variables and reduced costs while statisfying the dual equality constraints.
 
-.. note:: the default value is ``-1`` (automatic). When set to automatic, the behavior is equivalent to option ``0``.
+.. note:: the default value is ``-1`` (automatic). 
 
 Absolute Primal Tolerance
 ^^^^^^^^^^^^^^^^^^^^^^^^^

From 5d568423f87c1685b8c4dcdea70488e4790a3bd4 Mon Sep 17 00:00:00 2001
From: Ramakrishna Prabhu <ramakrishnap@nvidia.com>
Date: Thu, 9 Oct 2025 12:51:15 -0500
Subject: [PATCH 39/40] style

---
 docs/cuopt/source/lp-milp-settings.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/cuopt/source/lp-milp-settings.rst b/docs/cuopt/source/lp-milp-settings.rst
index 947c2bdf2..d755d5a97 100644
--- a/docs/cuopt/source/lp-milp-settings.rst
+++ b/docs/cuopt/source/lp-milp-settings.rst
@@ -260,7 +260,7 @@ Dual Initial Point
 * ``0``: Use an initial point from a heuristic approach based on the paper "On Implementing Mehrotra's Predictor–Corrector Interior-Point Method for Linear Programming" (SIAM J. Optimization, 1992) by Lustig, Martsten, Shanno.
 * ``1``: Use an initial point from solving a least squares problem that minimizes the norms of the dual variables and reduced costs while statisfying the dual equality constraints.
 
-.. note:: the default value is ``-1`` (automatic). 
+.. note:: the default value is ``-1`` (automatic).
 
 Absolute Primal Tolerance
 ^^^^^^^^^^^^^^^^^^^^^^^^^

From b6a7ad51a8faee7c2b82495d5afecffaa5aacc1e Mon Sep 17 00:00:00 2001
From: Ramakrishna Prabhu <ramakrishnap@nvidia.com>
Date: Thu, 9 Oct 2025 17:39:42 -0500
Subject: [PATCH 40/40] add nvjit link to list

---
 python/libcuopt/CMakeLists.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/python/libcuopt/CMakeLists.txt b/python/libcuopt/CMakeLists.txt
index 175e501e4..b6fbb6b2b 100644
--- a/python/libcuopt/CMakeLists.txt
+++ b/python/libcuopt/CMakeLists.txt
@@ -86,6 +86,7 @@ set(rpaths
   "$ORIGIN/../../nvidia/curand/lib"
   "$ORIGIN/../../nvidia/cusolver/lib"
   "$ORIGIN/../../nvidia/cusparse/lib"
+  "$ORIGIN/../../nvidia/nvjitlink/lib"
 )
 
 # Add CUDA version-specific paths based on CUDA compiler version