From 96a80eddfc18739a5dbd2ab23b3c9de0d5c28be6 Mon Sep 17 00:00:00 2001
From: Aseem Saxena <aseem.bits@gmail.com>
Date: Tue, 2 Jun 2026 22:10:23 +0000
Subject: [PATCH 01/76] Add RF-DETR Triton postproc flag

---
 inference_models/docs/changelog.md                 | 12 ++++++++++++
 inference_models/inference_models/configuration.py |  9 +++++++++
 2 files changed, 21 insertions(+)

diff --git a/inference_models/docs/changelog.md b/inference_models/docs/changelog.md
index 19905bd793..5166f497ea 100644
--- a/inference_models/docs/changelog.md
+++ b/inference_models/docs/changelog.md
@@ -1,5 +1,16 @@
 # Changelog
 
+## `0.29.0`
+
+### Added
+
+- Opt-in Triton RF-DETR instance-segmentation RLE post-processing. Set
+  `INFERENCE_MODELS_RFDETR_TRITON_POSTPROC_ENABLED=True` to generate COCO RLE
+  masks directly from sparse interpolated mask regions on supported CUDA
+  inputs.
+
+---
+
 ## `0.28.7`
 
 - Added YOLO26 semantic segmentation support (ONNX, TorchScript, and TensorRT backends).
@@ -13,6 +24,7 @@
 
 - Ported SAM3 to inference_models
 - There were issues with dependencies while introducing SAM3 hence versions `0.28.2` and `0.28.3`
+---
 
 ## `0.28.1`
 
diff --git a/inference_models/inference_models/configuration.py b/inference_models/inference_models/configuration.py
index 3b7fb89bce..c8673598de 100644
--- a/inference_models/inference_models/configuration.py
+++ b/inference_models/inference_models/configuration.py
@@ -289,6 +289,15 @@
     variable_name="INFERENCE_MODELS_RFDETR_DEFAULT_CONFIDENCE",
     default=INFERENCE_MODELS_DEFAULT_CONFIDENCE,
 )
+DEFAULT_INFERENCE_MODELS_RFDETR_TRITON_POSTPROC_ENABLED = False
+_LEGACY_RFDETR_TRITON_POSTPROC = get_boolean_from_env(
+    variable_name="RFDETR_TRITON_POSTPROC",
+    default=DEFAULT_INFERENCE_MODELS_RFDETR_TRITON_POSTPROC_ENABLED,
+)
+INFERENCE_MODELS_RFDETR_TRITON_POSTPROC_ENABLED = get_boolean_from_env(
+    variable_name="INFERENCE_MODELS_RFDETR_TRITON_POSTPROC_ENABLED",
+    default=_LEGACY_RFDETR_TRITON_POSTPROC,
+)
 INFERENCE_MODELS_ROBOFLOW_INSTANT_DEFAULT_CONFIDENCE = get_float_from_env(
     variable_name="INFERENCE_MODELS_ROBOFLOW_INSTANT_DEFAULT_CONFIDENCE",
     default=0.99,

From 83b6f69b419f7b822df4b95d620164371fe7e9f7 Mon Sep 17 00:00:00 2001
From: Aseem Saxena <aseem.bits@gmail.com>
Date: Tue, 2 Jun 2026 22:10:33 +0000
Subject: [PATCH 02/76] Add batched instance RLE alignment helper

---
 .../models/common/roboflow/post_processing.py | 122 +++++++++++++
 .../common/roboflow/test_post_processing.py   | 162 ++++++++++++++++++
 2 files changed, 284 insertions(+)

diff --git a/inference_models/inference_models/models/common/roboflow/post_processing.py b/inference_models/inference_models/models/common/roboflow/post_processing.py
index a92d4242f8..e820d6a950 100644
--- a/inference_models/inference_models/models/common/roboflow/post_processing.py
+++ b/inference_models/inference_models/models/common/roboflow/post_processing.py
@@ -506,6 +506,128 @@ def align_instance_segmentation_results(
     return image_bboxes, masks
 
 
+def align_instance_segmentation_results_to_rle_masks_batch(
+    image_bboxes: torch.Tensor,
+    masks: torch.Tensor,
+    padding: Tuple[int, int, int, int],
+    scale_width: float,
+    scale_height: float,
+    original_size: ImageDimensions,
+    size_after_pre_processing: ImageDimensions,
+    inference_size: ImageDimensions,
+    static_crop_offset: StaticCropOffset,
+    binarization_threshold: float = 0.0,
+) -> Tuple[torch.Tensor, List[dict]]:
+    if image_bboxes.shape[0] == 0:
+        return image_bboxes, []
+
+    pad_left, pad_top, pad_right, pad_bottom = padding
+    offsets = torch.tensor(
+        [pad_left, pad_top, pad_left, pad_top],
+        device=image_bboxes.device,
+    )
+    image_bboxes[:, :4].sub_(offsets)
+    scale = torch.as_tensor(
+        [scale_width, scale_height, scale_width, scale_height],
+        dtype=image_bboxes.dtype,
+        device=image_bboxes.device,
+    )
+    image_bboxes[:, :4].div_(scale)
+    n, mh, mw = masks.shape
+    mask_h_scale = mh / inference_size.height
+    mask_w_scale = mw / inference_size.width
+    mask_pad_top, mask_pad_bottom, mask_pad_left, mask_pad_right = (
+        round(mask_h_scale * pad_top),
+        round(mask_h_scale * pad_bottom),
+        round(mask_w_scale * pad_left),
+        round(mask_w_scale * pad_right),
+    )
+    if (
+        mask_pad_top < 0
+        or mask_pad_bottom < 0
+        or mask_pad_left < 0
+        or mask_pad_right < 0
+    ):
+        masks = torch.nn.functional.pad(
+            masks,
+            (
+                abs(min(mask_pad_left, 0)),
+                abs(min(mask_pad_right, 0)),
+                abs(min(mask_pad_top, 0)),
+                abs(min(mask_pad_bottom, 0)),
+            ),
+            "constant",
+            0,
+        )
+        padded_mask_offset_top = max(mask_pad_top, 0)
+        padded_mask_offset_bottom = max(mask_pad_bottom, 0)
+        padded_mask_offset_left = max(mask_pad_left, 0)
+        padded_mask_offset_right = max(mask_pad_right, 0)
+        masks = masks[
+            :,
+            padded_mask_offset_top : masks.shape[1] - padded_mask_offset_bottom,
+            padded_mask_offset_left : masks.shape[2] - padded_mask_offset_right,
+        ]
+    else:
+        masks = masks[
+            :, mask_pad_top : mh - mask_pad_bottom, mask_pad_left : mw - mask_pad_right
+        ]
+    masks = (
+        torch.nn.functional.interpolate(
+            masks[:, None],
+            size=(
+                size_after_pre_processing.height,
+                size_after_pre_processing.width,
+            ),
+            mode="bilinear",
+            align_corners=False,
+            antialias=True,
+        )
+        .squeeze(1)
+        .gt_(binarization_threshold)
+        .to(dtype=torch.bool)
+    )
+    if static_crop_offset.offset_x > 0 or static_crop_offset.offset_y > 0:
+        mask_canvas = torch.zeros(
+            (
+                masks.shape[0],
+                original_size.height,
+                original_size.width,
+            ),
+            dtype=torch.bool,
+            device=masks.device,
+        )
+        mask_canvas[
+            :,
+            static_crop_offset.offset_y : static_crop_offset.offset_y + masks.shape[1],
+            static_crop_offset.offset_x : static_crop_offset.offset_x + masks.shape[2],
+        ] = masks
+        static_crop_offsets = torch.as_tensor(
+            [
+                static_crop_offset.offset_x,
+                static_crop_offset.offset_y,
+                static_crop_offset.offset_x,
+                static_crop_offset.offset_y,
+            ],
+            dtype=image_bboxes.dtype,
+            device=image_bboxes.device,
+        )
+        image_bboxes[:, :4].add_(static_crop_offsets)
+        masks = mask_canvas
+    xyxy_max = torch.as_tensor(
+        [
+            original_size.width,
+            original_size.height,
+            original_size.width,
+            original_size.height,
+        ],
+        dtype=image_bboxes.dtype,
+        device=image_bboxes.device,
+    )
+    image_bboxes[:, :4].clamp_(min=torch.zeros_like(xyxy_max), max=xyxy_max)
+    return image_bboxes, [torch_mask_to_coco_rle(mask) for mask in masks]
+
+
 def align_instance_segmentation_results_to_rle_masks(
     image_bboxes: torch.Tensor,
     masks: torch.Tensor,
diff --git a/inference_models/tests/unit_tests/models/common/roboflow/test_post_processing.py b/inference_models/tests/unit_tests/models/common/roboflow/test_post_processing.py
index f85404c75b..885f67f15d 100644
--- a/inference_models/tests/unit_tests/models/common/roboflow/test_post_processing.py
+++ b/inference_models/tests/unit_tests/models/common/roboflow/test_post_processing.py
@@ -5,8 +5,10 @@
   - NMS helpers: per-class `conf_thresh` tensor path
 """
 
+import numpy as np
 import pytest
 import torch
+from pycocotools import mask as mask_utils
 
 from inference_models.configuration import INFERENCE_MODELS_DEFAULT_CONFIDENCE
 from inference_models.entities import ImageDimensions
@@ -17,6 +19,8 @@
 from inference_models.models.common.roboflow.post_processing import (
     ConfidenceFilter,
     align_instance_segmentation_results,
+    align_instance_segmentation_results_to_rle_masks,
+    align_instance_segmentation_results_to_rle_masks_batch,
     post_process_nms_fused_model_output,
     rescale_image_detections,
     rescale_key_points_detections,
@@ -40,6 +44,40 @@ def _od_output(box_class_conf):
     return out
 
 
+def _decode_rles(rles, height: int, width: int) -> np.ndarray:
+    if not rles:
+        return np.empty((0, height, width), dtype=bool)
+    decoded = mask_utils.decode(rles)
+    if decoded.ndim == 2:
+        decoded = decoded[:, :, None]
+    return decoded.transpose(2, 0, 1).astype(bool)
+
+
+def _rle_alignment_inputs():
+    bboxes = torch.tensor(
+        [
+            [1.0, 2.0, 9.0, 7.0],
+            [-2.0, -1.0, 5.0, 4.0],
+            [4.0, 1.0, 14.0, 12.0],
+        ],
+        dtype=torch.float32,
+    )
+    masks = torch.full((3, 8, 10), -1.0, dtype=torch.float32)
+    masks[0, 2:6, 3:8] = 2.0
+    masks[1, 1:4, 1:5] = 1.0
+    masks[2, 4:7, 5:9] = 3.0
+    return bboxes, masks
+
+
+def _static_crop(offset_x: int, offset_y: int, width: int, height: int):
+    return StaticCropOffset(
+        offset_x=offset_x,
+        offset_y=offset_y,
+        crop_width=width,
+        crop_height=height,
+    )
+
+
 class TestRunNmsForObjectDetection:
     def test_scalar_keeps_all_above_threshold(self) -> None:
         # Three well-separated boxes, three classes, conf 0.7/0.5/0.3.
@@ -443,3 +481,127 @@ def test_clips_box_coords(self) -> None:
         assert out_bboxes[0, 1].item() == pytest.approx(20.0)
         assert out_bboxes[0, 2].item() == pytest.approx(600.0)
         assert out_bboxes[0, 3].item() == pytest.approx(400.0)
+
+
+class TestAlignInstanceSegmentationResultsToRleMasksBatch:
+
+    @pytest.mark.parametrize(
+        "case",
+        [
+            {
+                "padding": (0, 0, 0, 0),
+                "original_size": ImageDimensions(height=8, width=10),
+                "size_after_pre_processing": ImageDimensions(height=8, width=10),
+                "inference_size": ImageDimensions(height=8, width=10),
+                "static_crop_offset": _static_crop(0, 0, 10, 8),
+                "binarization_threshold": 0.0,
+            },
+            {
+                "padding": (1, 1, 1, 0),
+                "original_size": ImageDimensions(height=8, width=10),
+                "size_after_pre_processing": ImageDimensions(height=8, width=10),
+                "inference_size": ImageDimensions(height=8, width=10),
+                "static_crop_offset": _static_crop(0, 0, 10, 8),
+                "binarization_threshold": 0.0,
+            },
+            {
+                "padding": (-1, 0, -1, 0),
+                "original_size": ImageDimensions(height=8, width=10),
+                "size_after_pre_processing": ImageDimensions(height=8, width=10),
+                "inference_size": ImageDimensions(height=8, width=10),
+                "static_crop_offset": _static_crop(0, 0, 10, 8),
+                "binarization_threshold": 0.0,
+            },
+            {
+                "padding": (0, 0, 0, 0),
+                "original_size": ImageDimensions(height=11, width=13),
+                "size_after_pre_processing": ImageDimensions(height=8, width=10),
+                "inference_size": ImageDimensions(height=8, width=10),
+                "static_crop_offset": _static_crop(2, 1, 10, 8),
+                "binarization_threshold": 0.0,
+            },
+            {
+                "padding": (0, 0, 0, 0),
+                "original_size": ImageDimensions(height=8, width=10),
+                "size_after_pre_processing": ImageDimensions(height=8, width=10),
+                "inference_size": ImageDimensions(height=8, width=10),
+                "static_crop_offset": _static_crop(0, 0, 10, 8),
+                "binarization_threshold": 0.5,
+            },
+        ],
+    )
+    def test_batch_matches_generator_path(self, case: dict) -> None:
+        bboxes, masks = _rle_alignment_inputs()
+        batch_boxes = bboxes.clone()
+        generator_boxes = bboxes.clone()
+
+        actual_boxes, actual_rles = (
+            align_instance_segmentation_results_to_rle_masks_batch(
+                image_bboxes=batch_boxes,
+                masks=masks.clone(),
+                scale_width=1.0,
+                scale_height=1.0,
+                **case,
+            )
+        )
+        expected_pairs = list(
+            align_instance_segmentation_results_to_rle_masks(
+                image_bboxes=generator_boxes,
+                masks=masks.clone(),
+                scale_width=1.0,
+                scale_height=1.0,
+                **case,
+            )
+        )
+        expected_boxes = torch.stack([bbox for bbox, _ in expected_pairs])
+        expected_rles = [rle for _, rle in expected_pairs]
+
+        torch.testing.assert_close(actual_boxes, expected_boxes, rtol=0, atol=0)
+        torch.testing.assert_close(batch_boxes, expected_boxes, rtol=0, atol=0)
+        np.testing.assert_array_equal(
+            _decode_rles(
+                actual_rles,
+                case["original_size"].height,
+                case["original_size"].width,
+            ),
+            _decode_rles(
+                expected_rles,
+                case["original_size"].height,
+                case["original_size"].width,
+            ),
+        )
+
+    def test_empty_batch_matches_generator_path(self) -> None:
+        case = {
+            "padding": (0, 0, 0, 0),
+            "original_size": ImageDimensions(height=8, width=10),
+            "size_after_pre_processing": ImageDimensions(height=8, width=10),
+            "inference_size": ImageDimensions(height=8, width=10),
+            "static_crop_offset": _static_crop(0, 0, 10, 8),
+            "binarization_threshold": 0.0,
+        }
+        bboxes = torch.empty((0, 4), dtype=torch.float32)
+        masks = torch.empty((0, 8, 10), dtype=torch.float32)
+
+        actual_boxes, actual_rles = (
+            align_instance_segmentation_results_to_rle_masks_batch(
+                image_bboxes=bboxes.clone(),
+                masks=masks.clone(),
+                scale_width=1.0,
+                scale_height=1.0,
+                **case,
+            )
+        )
+        expected_pairs = list(
+            align_instance_segmentation_results_to_rle_masks(
+                image_bboxes=bboxes.clone(),
+                masks=masks.clone(),
+                scale_width=1.0,
+                scale_height=1.0,
+                **case,
+            )
+        )
+
+        assert actual_boxes.shape == (0, 4)
+        assert actual_rles == []
+        assert expected_pairs == []

From 6f9d7ca4eaede0526b94802c7c105e137d9b7b41 Mon Sep 17 00:00:00 2001
From: Aseem Saxena <aseem.bits@gmail.com>
Date: Tue, 2 Jun 2026 22:10:39 +0000
Subject: [PATCH 03/76] Add RF-DETR Triton sparse RLE postprocess

---
 .../models/rfdetr/triton_postprocess.py       | 1135 +++++++++++++++++
 1 file changed, 1135 insertions(+)
 create mode 100644 inference_models/inference_models/models/rfdetr/triton_postprocess.py

diff --git a/inference_models/inference_models/models/rfdetr/triton_postprocess.py b/inference_models/inference_models/models/rfdetr/triton_postprocess.py
new file mode 100644
index 0000000000..0851cdf824
--- /dev/null
+++ b/inference_models/inference_models/models/rfdetr/triton_postprocess.py
@@ -0,0 +1,1135 @@
+from collections import OrderedDict
+from threading import Lock
+from typing import List, Optional, Tuple, Union
+
+import numpy as np
+import torch
+import torch.nn.functional as F
+from pycocotools import mask as mask_utils
+
+from inference_models.logger import LOGGER
+from inference_models.models.base.instance_segmentation import InstanceDetections
+from inference_models.models.base.types import InstancesRLEMasks
+from inference_models.models.common.roboflow.model_packages import PreProcessingMetadata
+from inference_models.models.rfdetr.class_remapping import ClassesReMapping
+
+try:
+    import triton
+    import triton.language as tl
+except ImportError:  # pragma: no cover - depends on optional GPU package
+    triton = None
+    tl = None
+
+
+_HEADER_SIZE = 16
+_BLOCK_ROI_H = 512
+_MAX_EXACT_FLAT_INDEX = 1 << 24
+_SPARSE_MAX_ROI_WIDTH = 512
+_SPARSE_BLOCK_COLS = 8
+_SPARSE_MAX_TOTAL_RUNS = 8192
+_SPARSE_MAX_CLASSES_PER_QUERY = 4
+_SPARSE_TOPK_MAX_TOTAL_RUNS = _SPARSE_MAX_TOTAL_RUNS * _SPARSE_MAX_CLASSES_PER_QUERY
+_MAX_INTERPOLATION_WEIGHT_CACHE_ENTRIES = 16
+_INTERPOLATION_WEIGHT_CACHE = OrderedDict()
+_INTERPOLATION_WEIGHT_CACHE_LOCK = Lock()
+
+
+def _get_interpolation_weights(
+    src_size: int,
+    output_size: int,
+    device: torch.device,
+    axis: str,
+) -> Tuple[torch.Tensor, torch.Tensor]:
+    device_key = _interpolation_cache_key(src_size, output_size, device, axis)
+    with _INTERPOLATION_WEIGHT_CACHE_LOCK:
+        cached = _INTERPOLATION_WEIGHT_CACHE.get(device_key)
+        if cached is not None:
+            _INTERPOLATION_WEIGHT_CACHE.move_to_end(device_key)
+            return cached
+
+    if axis == "height":
+        basis = torch.eye(src_size, device=device).reshape(src_size, 1, src_size, 1)
+        weights = F.interpolate(
+            basis,
+            size=(output_size, 1),
+            mode="bilinear",
+            align_corners=False,
+            antialias=True,
+        )[:, 0, :, 0].T.contiguous()
+    else:
+        basis = torch.eye(src_size, device=device).reshape(src_size, 1, 1, src_size)
+        weights = F.interpolate(
+            basis,
+            size=(1, output_size),
+            mode="bilinear",
+            align_corners=False,
+            antialias=True,
+        )[:, 0, 0, :].T.contiguous()
+
+    nonzero = weights != 0
+    if int(nonzero.sum(dim=1).max().item()) > 2:
+        raise ValueError("Expected antialiased bilinear interpolation to use 2 taps")
+    indices = torch.zeros((output_size, 2), dtype=torch.int32, device=device)
+    values = torch.zeros((output_size, 2), dtype=torch.float32, device=device)
+    for output_index in range(output_size):
+        source_indices = nonzero[output_index].nonzero(as_tuple=True)[0]
+        indices[output_index, : source_indices.numel()] = source_indices.to(
+            dtype=torch.int32
+        )
+        values[output_index, : source_indices.numel()] = weights[
+            output_index, source_indices
+        ]
+
+    cached_value = (indices, values)
+    with _INTERPOLATION_WEIGHT_CACHE_LOCK:
+        cached = _INTERPOLATION_WEIGHT_CACHE.get(device_key)
+        if cached is not None:
+            _INTERPOLATION_WEIGHT_CACHE.move_to_end(device_key)
+            return cached
+        _INTERPOLATION_WEIGHT_CACHE[device_key] = cached_value
+        while (
+            len(_INTERPOLATION_WEIGHT_CACHE) > _MAX_INTERPOLATION_WEIGHT_CACHE_ENTRIES
+        ):
+            _INTERPOLATION_WEIGHT_CACHE.popitem(last=False)
+    return cached_value
+
+
+def _interpolation_cache_key(
+    src_size: int,
+    output_size: int,
+    device: torch.device,
+    axis: str,
+) -> Tuple[str, int, int, int, int, str]:
+    return (
+        device.type,
+        -1 if device.index is None else device.index,
+        src_size,
+        output_size,
+        torch.cuda.current_device() if device.type == "cuda" else -1,
+        axis,
+    )
+
+
+def post_process_single_instance_segmentation_result_to_rle_masks_triton(
+    image_bboxes: torch.Tensor,
+    image_scores: torch.Tensor,
+    image_masks: torch.Tensor,
+    image_meta: PreProcessingMetadata,
+    threshold: Union[float, torch.Tensor],
+    classes_re_mapping: Optional[ClassesReMapping],
+) -> Optional[InstanceDetections]:
+    unsupported_reason = _unsupported_triton_postprocess_reason(
+        image_bboxes=image_bboxes,
+        image_scores=image_scores,
+        image_masks=image_masks,
+        image_meta=image_meta,
+        threshold=threshold,
+        classes_re_mapping=classes_re_mapping,
+    )
+    if unsupported_reason is not None:
+        LOGGER.debug(
+            "RF-DETR Triton postprocess path is unsupported: %s",
+            unsupported_reason,
+        )
+        return None
+
+    image_scores = image_scores.contiguous()
+    image_bboxes = image_bboxes.contiguous()
+    image_masks = image_masks.contiguous()
+    class_mapping = classes_re_mapping.class_mapping.contiguous()
+    num_queries, num_classes = image_scores.shape
+    mask_height, mask_width = image_masks.shape[-2:]
+    output_height = image_meta.original_size.height
+    output_width = image_meta.original_size.width
+    confidence_threshold = float(threshold)
+
+    y_idx, y_weight = _get_interpolation_weights(
+        src_size=mask_height,
+        output_size=output_height,
+        device=image_masks.device,
+        axis="height",
+    )
+    x_idx, x_weight = _get_interpolation_weights(
+        src_size=mask_width,
+        output_size=output_width,
+        device=image_masks.device,
+        axis="width",
+    )
+
+    metadata = torch.empty(
+        (num_queries, _HEADER_SIZE),
+        dtype=torch.float32,
+        device=image_scores.device,
+    )
+    records = torch.empty(
+        (_SPARSE_MAX_TOTAL_RUNS + 1, 3),
+        dtype=torch.int32,
+        device=image_scores.device,
+    )
+    _select_best_query_metadata_kernel[(num_queries,)](
+        image_scores,
+        image_bboxes,
+        class_mapping,
+        metadata,
+        records,
+        confidence_threshold,
+        num_queries,
+        num_classes,
+        class_mapping.shape[0],
+        output_height,
+        output_width,
+        BLOCK_CLASSES=triton.next_power_of_2(num_classes),
+        METADATA_STRIDE=_HEADER_SIZE,
+        FLAG_MULTICLASS=True,
+    )
+    _sparse_atomic_rle_from_metadata_kernel[
+        (num_queries, triton.cdiv(_SPARSE_MAX_ROI_WIDTH, _SPARSE_BLOCK_COLS))
+    ](
+        image_masks,
+        y_idx,
+        y_weight,
+        x_idx,
+        x_weight,
+        metadata,
+        records,
+        num_queries,
+        mask_height,
+        mask_width,
+        output_height,
+        output_width,
+        image_masks.stride(0),
+        image_masks.stride(1),
+        image_masks.stride(2),
+        BLOCK_MASK=triton.next_power_of_2(mask_height * mask_width),
+        BLOCK_OUT_H=triton.next_power_of_2(output_height),
+        BLOCK_OUT_W=triton.next_power_of_2(output_width),
+        BLOCK_ROI_H=_BLOCK_ROI_H,
+        MAX_ROI_WIDTH=_SPARSE_MAX_ROI_WIDTH,
+        MAX_TOTAL_RUNS=_SPARSE_MAX_TOTAL_RUNS,
+        METADATA_STRIDE=_HEADER_SIZE,
+        BLOCK_COLS=_SPARSE_BLOCK_COLS,
+    )
+
+    metadata_host = metadata.cpu().numpy()
+    result = _instance_detections_from_sparse_records(
+        metadata_host=metadata_host,
+        records=records,
+        max_total_runs=_SPARSE_MAX_TOTAL_RUNS,
+        height=output_height,
+        width=output_width,
+    )
+    if result is not None:
+        return result
+    if not _should_retry_sparse_topk_metadata(
+        metadata_host=metadata_host,
+        records=records,
+        max_total_runs=_SPARSE_MAX_TOTAL_RUNS,
+    ):
+        return None
+
+    topk_metadata_rows = num_queries * _SPARSE_MAX_CLASSES_PER_QUERY
+    metadata = torch.empty(
+        (topk_metadata_rows, _HEADER_SIZE),
+        dtype=torch.float32,
+        device=image_scores.device,
+    )
+    records = torch.empty(
+        (_SPARSE_TOPK_MAX_TOTAL_RUNS + 1, 3),
+        dtype=torch.int32,
+        device=image_scores.device,
+    )
+    _select_topk_query_class_metadata_kernel[(num_queries,)](
+        image_scores,
+        image_bboxes,
+        class_mapping,
+        metadata,
+        metadata,
+        records,
+        confidence_threshold,
+        num_queries,
+        num_classes,
+        class_mapping.shape[0],
+        output_height,
+        output_width,
+        BLOCK_CLASSES=triton.next_power_of_2(num_classes),
+        METADATA_STRIDE=_HEADER_SIZE,
+        MAX_CLASSES_PER_QUERY=_SPARSE_MAX_CLASSES_PER_QUERY,
+        FLAG_WRITE_QUERY_METADATA=False,
+        FLAG_OVERFLOW_CLASSES=True,
+    )
+    _sparse_atomic_rle_from_metadata_kernel[
+        (
+            topk_metadata_rows,
+            triton.cdiv(_SPARSE_MAX_ROI_WIDTH, _SPARSE_BLOCK_COLS),
+        )
+    ](
+        image_masks,
+        y_idx,
+        y_weight,
+        x_idx,
+        x_weight,
+        metadata,
+        records,
+        topk_metadata_rows,
+        mask_height,
+        mask_width,
+        output_height,
+        output_width,
+        image_masks.stride(0),
+        image_masks.stride(1),
+        image_masks.stride(2),
+        BLOCK_MASK=triton.next_power_of_2(mask_height * mask_width),
+        BLOCK_OUT_H=triton.next_power_of_2(output_height),
+        BLOCK_OUT_W=triton.next_power_of_2(output_width),
+        BLOCK_ROI_H=_BLOCK_ROI_H,
+        MAX_ROI_WIDTH=_SPARSE_MAX_ROI_WIDTH,
+        MAX_TOTAL_RUNS=_SPARSE_TOPK_MAX_TOTAL_RUNS,
+        METADATA_STRIDE=_HEADER_SIZE,
+        BLOCK_COLS=_SPARSE_BLOCK_COLS,
+    )
+    metadata_host = metadata.cpu().numpy()
+    return _instance_detections_from_sparse_records(
+        metadata_host=metadata_host,
+        records=records,
+        max_total_runs=_SPARSE_TOPK_MAX_TOTAL_RUNS,
+        height=output_height,
+        width=output_width,
+        max_detections=num_queries,
+    )
+
+
+def _instance_detections_from_sparse_records(
+    metadata_host: np.ndarray,
+    records: torch.Tensor,
+    max_total_runs: int,
+    height: int,
+    width: int,
+    max_detections: Optional[int] = None,
+) -> Optional[InstanceDetections]:
+    active_ranks = np.flatnonzero(metadata_host[:, 0] > 0.5)
+    if active_ranks.size == 0:
+        return InstanceDetections(
+            xyxy=torch.empty((0, 4), dtype=torch.int32),
+            confidence=torch.empty((0,), dtype=torch.float32),
+            class_id=torch.empty((0,), dtype=torch.int32),
+            mask=InstancesRLEMasks.from_coco_rle_masks(
+                image_size=(height, width),
+                masks=[],
+            ),
+        )
+    if np.any(metadata_host[active_ranks, 8] > 0.5):
+        return None
+    records_host = records.cpu().numpy()
+    total_runs = int(records_host[0, 0])
+    if int(records_host[0, 1]) != 0 or total_runs < 0 or total_runs > max_total_runs:
+        return None
+
+    order = np.lexsort(
+        (
+            -metadata_host[active_ranks, 10],
+            -metadata_host[active_ranks, 2],
+        )
+    )
+    active_ranks = active_ranks[order]
+    if max_detections is not None:
+        active_ranks = active_ranks[:max_detections]
+    records_host = records_host[1 : total_runs + 1] if total_runs else None
+    boxes = torch.from_numpy(metadata_host[active_ranks, 3:7].copy()).round().int()
+    confidence = torch.from_numpy(metadata_host[active_ranks, 2].copy())
+    class_id = torch.from_numpy(metadata_host[active_ranks, 1].copy()).int()
+
+    rle_masks = []
+    for rank in active_ranks.tolist():
+        if records_host is None:
+            rank_records = np.empty((0, 3), dtype=np.int32)
+        else:
+            rank_records = records_host[records_host[:, 0] == rank]
+        if rank_records.size:
+            starts_array = rank_records[:, 1].astype(np.int64, copy=False)
+            ends_array = rank_records[:, 2].astype(np.int64, copy=False)
+            order = np.argsort(starts_array, kind="stable")
+            starts_array = starts_array[order]
+            ends_array = ends_array[order]
+        else:
+            starts_array = np.empty((0,), dtype=np.int64)
+            ends_array = np.empty((0,), dtype=np.int64)
+        counts = _counts_from_runs(
+            starts=starts_array,
+            ends=ends_array,
+            height=height,
+            width=width,
+        )
+        rle_masks.append(_rle_from_counts(counts=counts, height=height, width=width))
+
+    instances_masks = InstancesRLEMasks.from_coco_rle_masks(
+        image_size=(height, width),
+        masks=rle_masks,
+    )
+    return InstanceDetections(
+        xyxy=boxes,
+        confidence=confidence,
+        class_id=class_id,
+        mask=instances_masks,
+    )
+
+
+def _should_retry_sparse_topk_metadata(
+    metadata_host: np.ndarray,
+    records: torch.Tensor,
+    max_total_runs: int,
+) -> bool:
+    active_ranks = np.flatnonzero(metadata_host[:, 0] > 0.5)
+    if active_ranks.size == 0 or np.any(metadata_host[active_ranks, 8] > 0.5):
+        return False
+    records_host = records.cpu().numpy()
+    total_runs = int(records_host[0, 0])
+    if int(records_host[0, 1]) == 0 or total_runs < 0 or total_runs > max_total_runs:
+        return False
+    return True
+
+
+def _supports_triton_postprocess_path(
+    image_bboxes: torch.Tensor,
+    image_scores: torch.Tensor,
+    image_masks: torch.Tensor,
+    image_meta: PreProcessingMetadata,
+    threshold: Union[float, torch.Tensor],
+    classes_re_mapping: Optional[ClassesReMapping],
+) -> bool:
+    return (
+        _unsupported_triton_postprocess_reason(
+            image_bboxes=image_bboxes,
+            image_scores=image_scores,
+            image_masks=image_masks,
+            image_meta=image_meta,
+            threshold=threshold,
+            classes_re_mapping=classes_re_mapping,
+        )
+        is None
+    )
+
+
+def _unsupported_triton_postprocess_reason(
+    image_bboxes: torch.Tensor,
+    image_scores: torch.Tensor,
+    image_masks: torch.Tensor,
+    image_meta: PreProcessingMetadata,
+    threshold: Union[float, torch.Tensor],
+    classes_re_mapping: Optional[ClassesReMapping],
+) -> Optional[str]:
+    if triton is None:
+        return "triton_unavailable"
+    if classes_re_mapping is None:
+        return "class_remapping_required"
+    if isinstance(threshold, torch.Tensor):
+        return "tensor_threshold_unsupported"
+    if image_scores.ndim != 2 or image_bboxes.ndim != 2 or image_masks.ndim != 3:
+        return "invalid_tensor_rank"
+    num_queries, num_classes = image_scores.shape
+    if image_bboxes.shape != (num_queries, 4) or image_masks.shape[0] != num_queries:
+        return "shape_mismatch"
+    if classes_re_mapping.class_mapping.shape[0] < num_classes:
+        return "class_mapping_too_small"
+    mask_height, mask_width = image_masks.shape[-2:]
+    output_height = image_meta.original_size.height
+    output_width = image_meta.original_size.width
+    if (
+        num_queries * num_classes > 16384
+        or mask_height * mask_width > 8192
+        or output_height <= 0
+        or output_width <= 0
+        or output_height > 4096
+        or output_width > 4096
+        or output_height * output_width >= _MAX_EXACT_FLAT_INDEX
+    ):
+        return "input_size_exceeds_triton_limits"
+    if (
+        image_meta.pad_left != 0
+        or image_meta.pad_top != 0
+        or image_meta.pad_right != 0
+        or image_meta.pad_bottom != 0
+    ):
+        return "padding_unsupported"
+    if (
+        image_meta.static_crop_offset.offset_x != 0
+        or image_meta.static_crop_offset.offset_y != 0
+    ):
+        return "static_crop_unsupported"
+    if (
+        image_meta.size_after_pre_processing.height != output_height
+        or image_meta.size_after_pre_processing.width != output_width
+    ):
+        return "resize_metadata_unsupported"
+    if image_scores.device.type != "cuda":
+        return "cuda_device_required"
+    if (
+        image_bboxes.device != image_scores.device
+        or image_masks.device != image_scores.device
+        or classes_re_mapping.class_mapping.device != image_scores.device
+    ):
+        return "device_mismatch"
+    return None
+
+
+def _counts_from_runs(
+    starts: np.ndarray,
+    ends: np.ndarray,
+    height: int,
+    width: int,
+) -> List[int]:
+    total = height * width
+    lengths = ends - starts
+    valid = lengths > 0
+    if starts.size and not np.all(valid):
+        starts = starts[valid]
+        ends = ends[valid]
+        lengths = lengths[valid]
+
+    if starts.size:
+        run_count = starts.size
+        gaps = starts.astype(np.int64, copy=True)
+        gaps[1:] -= ends[:-1]
+        tail = total - int(ends[-1])
+        if tail > 0:
+            counts = np.empty(run_count * 2 + 1, dtype=np.int64)
+            counts[-1] = tail
+        else:
+            counts = np.empty(run_count * 2, dtype=np.int64)
+        counts[: run_count * 2 : 2] = gaps
+        counts[1 : run_count * 2 : 2] = lengths
+        counts = counts.tolist()
+    else:
+        counts = [total]
+    return counts
+
+
+def _rle_from_counts(counts: List[int], height: int, width: int) -> dict:
+    return mask_utils.frPyObjects(
+        {"counts": counts, "size": [height, width]}, height, width
+    )
+
+
+if triton is not None:
+
+    @triton.jit
+    def _select_best_query_metadata_kernel(
+        scores,
+        bboxes,
+        class_mapping,
+        metadata,
+        records,
+        threshold: tl.constexpr,
+        num_queries: tl.constexpr,
+        num_classes: tl.constexpr,
+        class_mapping_size: tl.constexpr,
+        output_height: tl.constexpr,
+        output_width: tl.constexpr,
+        BLOCK_CLASSES: tl.constexpr,
+        METADATA_STRIDE: tl.constexpr,
+        FLAG_MULTICLASS: tl.constexpr,
+    ):
+        rank = tl.program_id(0)
+        meta_base = rank * METADATA_STRIDE
+        if rank == 0:
+            tl.store(records + 0, 0)
+            tl.store(records + 1, 0)
+
+        class_offsets = tl.arange(0, BLOCK_CLASSES)
+        class_active = class_offsets < num_classes
+        mapped_classes = tl.load(
+            class_mapping + class_offsets,
+            mask=class_active & (class_offsets < class_mapping_size),
+            other=-1,
+        ).to(tl.int32)
+        class_scores = tl.load(
+            scores + rank * num_classes + class_offsets,
+            mask=class_active,
+            other=-1.0,
+        )
+        valid_classes = class_active & (mapped_classes >= 0)
+        passing_classes = valid_classes & (class_scores > threshold)
+        passing_class_count = tl.sum(tl.where(passing_classes, 1, 0), axis=0)
+        if FLAG_MULTICLASS and passing_class_count > 1:
+            tl.store(records + 1, 1)
+        selected_score = tl.max(tl.where(valid_classes, class_scores, -1.0), axis=0)
+        selected_class = tl.max(
+            tl.where(
+                valid_classes & (class_scores == selected_score),
+                class_offsets,
+                -1,
+            ),
+            axis=0,
+        ).to(tl.int32)
+        mapped_class = tl.load(
+            class_mapping + selected_class,
+            mask=(selected_class >= 0) & (selected_class < class_mapping_size),
+            other=-1,
+        ).to(tl.int32)
+        is_valid_detection = (mapped_class >= 0) & (selected_score > threshold)
+        query_index = rank
+        selected_index = rank * num_classes + selected_class
+
+        tl.store(
+            metadata + meta_base + 0,
+            tl.where(is_valid_detection, 1.0, 0.0),
+        )
+        tl.store(metadata + meta_base + 1, mapped_class.to(tl.float32))
+        tl.store(
+            metadata + meta_base + 2,
+            tl.where(is_valid_detection, selected_score, 0.0),
+        )
+        tl.store(metadata + meta_base + 7, 0.0)
+        tl.store(metadata + meta_base + 8, 0.0)
+        tl.store(metadata + meta_base + 9, query_index.to(tl.float32))
+        tl.store(metadata + meta_base + 10, selected_index.to(tl.float32))
+        tl.store(metadata + meta_base + 11, 0.0)
+        tl.store(metadata + meta_base + 12, 0.0)
+        tl.store(metadata + meta_base + 13, 0.0)
+        tl.store(metadata + meta_base + 14, 0.0)
+        tl.store(metadata + meta_base + 15, 0.0)
+
+        bbox_base = query_index * 4
+        cx = tl.load(bboxes + bbox_base, mask=is_valid_detection, other=0.0)
+        cy = tl.load(bboxes + bbox_base + 1, mask=is_valid_detection, other=0.0)
+        width = tl.load(bboxes + bbox_base + 2, mask=is_valid_detection, other=0.0)
+        height = tl.load(
+            bboxes + bbox_base + 3,
+            mask=is_valid_detection,
+            other=0.0,
+        )
+        x1 = tl.maximum(
+            0.0,
+            tl.minimum((cx - 0.5 * width) * output_width, output_width),
+        )
+        y1 = tl.maximum(
+            0.0,
+            tl.minimum((cy - 0.5 * height) * output_height, output_height),
+        )
+        x2 = tl.maximum(
+            0.0,
+            tl.minimum((cx + 0.5 * width) * output_width, output_width),
+        )
+        y2 = tl.maximum(
+            0.0,
+            tl.minimum((cy + 0.5 * height) * output_height, output_height),
+        )
+        tl.store(metadata + meta_base + 3, x1)
+        tl.store(metadata + meta_base + 4, y1)
+        tl.store(metadata + meta_base + 5, x2)
+        tl.store(metadata + meta_base + 6, y2)
+
+    @triton.jit
+    def _select_topk_query_class_metadata_kernel(
+        scores,
+        bboxes,
+        class_mapping,
+        metadata,
+        query_metadata,
+        records,
+        threshold: tl.constexpr,
+        num_queries: tl.constexpr,
+        num_classes: tl.constexpr,
+        class_mapping_size: tl.constexpr,
+        output_height: tl.constexpr,
+        output_width: tl.constexpr,
+        BLOCK_CLASSES: tl.constexpr,
+        METADATA_STRIDE: tl.constexpr,
+        MAX_CLASSES_PER_QUERY: tl.constexpr,
+        FLAG_WRITE_QUERY_METADATA: tl.constexpr,
+        FLAG_OVERFLOW_CLASSES: tl.constexpr,
+    ):
+        query_index = tl.program_id(0)
+        if query_index == 0:
+            tl.store(records + 0, 0)
+            tl.store(records + 1, 0)
+
+        class_offsets = tl.arange(0, BLOCK_CLASSES)
+        class_active = class_offsets < num_classes
+        mapped_classes = tl.load(
+            class_mapping + class_offsets,
+            mask=class_active & (class_offsets < class_mapping_size),
+            other=-1,
+        ).to(tl.int32)
+        class_scores = tl.load(
+            scores + query_index * num_classes + class_offsets,
+            mask=class_active,
+            other=-1.0,
+        )
+        passing_classes = (
+            class_active & (mapped_classes >= 0) & (class_scores > threshold)
+        )
+        passing_class_count = tl.sum(tl.where(passing_classes, 1, 0), axis=0)
+        if FLAG_OVERFLOW_CLASSES and passing_class_count > MAX_CLASSES_PER_QUERY:
+            tl.store(records + 1, 1)
+
+        work_scores = tl.where(passing_classes, class_scores, -1.0)
+        for class_rank in tl.static_range(0, 4):
+            selected_score = tl.max(work_scores, axis=0)
+            selected_class = tl.max(
+                tl.where(
+                    work_scores == selected_score,
+                    class_offsets,
+                    -1,
+                ),
+                axis=0,
+            ).to(tl.int32)
+            mapped_class = tl.load(
+                class_mapping + selected_class,
+                mask=(selected_class >= 0) & (selected_class < class_mapping_size),
+                other=-1,
+            ).to(tl.int32)
+            is_valid_detection = (mapped_class >= 0) & (selected_score > threshold)
+            metadata_rank = query_index * MAX_CLASSES_PER_QUERY + class_rank
+            meta_base = metadata_rank * METADATA_STRIDE
+            selected_index = query_index * num_classes + selected_class
+
+            tl.store(
+                metadata + meta_base + 0,
+                tl.where(is_valid_detection, 1.0, 0.0),
+            )
+            tl.store(metadata + meta_base + 1, mapped_class.to(tl.float32))
+            tl.store(
+                metadata + meta_base + 2,
+                tl.where(is_valid_detection, selected_score, 0.0),
+            )
+            tl.store(metadata + meta_base + 7, 0.0)
+            tl.store(metadata + meta_base + 8, 0.0)
+            tl.store(metadata + meta_base + 9, query_index.to(tl.float32))
+            tl.store(metadata + meta_base + 10, selected_index.to(tl.float32))
+            tl.store(metadata + meta_base + 11, 0.0)
+            tl.store(metadata + meta_base + 12, 0.0)
+            tl.store(metadata + meta_base + 13, 0.0)
+            tl.store(metadata + meta_base + 14, 0.0)
+            tl.store(metadata + meta_base + 15, 0.0)
+
+            bbox_base = query_index * 4
+            cx = tl.load(bboxes + bbox_base, mask=is_valid_detection, other=0.0)
+            cy = tl.load(bboxes + bbox_base + 1, mask=is_valid_detection, other=0.0)
+            width = tl.load(
+                bboxes + bbox_base + 2,
+                mask=is_valid_detection,
+                other=0.0,
+            )
+            height = tl.load(
+                bboxes + bbox_base + 3,
+                mask=is_valid_detection,
+                other=0.0,
+            )
+            x1 = tl.maximum(
+                0.0,
+                tl.minimum((cx - 0.5 * width) * output_width, output_width),
+            )
+            y1 = tl.maximum(
+                0.0,
+                tl.minimum((cy - 0.5 * height) * output_height, output_height),
+            )
+            x2 = tl.maximum(
+                0.0,
+                tl.minimum((cx + 0.5 * width) * output_width, output_width),
+            )
+            y2 = tl.maximum(
+                0.0,
+                tl.minimum((cy + 0.5 * height) * output_height, output_height),
+            )
+            tl.store(metadata + meta_base + 3, x1)
+            tl.store(metadata + meta_base + 4, y1)
+            tl.store(metadata + meta_base + 5, x2)
+            tl.store(metadata + meta_base + 6, y2)
+            if FLAG_WRITE_QUERY_METADATA and class_rank == 0:
+                query_meta_base = query_index * METADATA_STRIDE
+                tl.store(
+                    query_metadata + query_meta_base + 0,
+                    tl.where(is_valid_detection, 1.0, 0.0),
+                )
+                tl.store(
+                    query_metadata + query_meta_base + 1, mapped_class.to(tl.float32)
+                )
+                tl.store(
+                    query_metadata + query_meta_base + 2,
+                    tl.where(is_valid_detection, selected_score, 0.0),
+                )
+                tl.store(query_metadata + query_meta_base + 3, x1)
+                tl.store(query_metadata + query_meta_base + 4, y1)
+                tl.store(query_metadata + query_meta_base + 5, x2)
+                tl.store(query_metadata + query_meta_base + 6, y2)
+                tl.store(query_metadata + query_meta_base + 7, 0.0)
+                tl.store(query_metadata + query_meta_base + 8, 0.0)
+                tl.store(
+                    query_metadata + query_meta_base + 9, query_index.to(tl.float32)
+                )
+                tl.store(
+                    query_metadata + query_meta_base + 10, selected_index.to(tl.float32)
+                )
+                tl.store(query_metadata + query_meta_base + 11, 0.0)
+                tl.store(query_metadata + query_meta_base + 12, 0.0)
+                tl.store(query_metadata + query_meta_base + 13, 0.0)
+                tl.store(query_metadata + query_meta_base + 14, 0.0)
+                tl.store(query_metadata + query_meta_base + 15, 0.0)
+            work_scores = tl.where(class_offsets == selected_class, -1.0, work_scores)
+
+    @triton.jit
+    def _sparse_atomic_rle_from_metadata_kernel(
+        masks,
+        y_idx,
+        y_weight,
+        x_idx,
+        x_weight,
+        metadata,
+        records,
+        num_queries: tl.constexpr,
+        mask_height: tl.constexpr,
+        mask_width: tl.constexpr,
+        output_height: tl.constexpr,
+        output_width: tl.constexpr,
+        mask_stride_q: tl.constexpr,
+        mask_stride_h: tl.constexpr,
+        mask_stride_w: tl.constexpr,
+        BLOCK_MASK: tl.constexpr,
+        BLOCK_OUT_H: tl.constexpr,
+        BLOCK_OUT_W: tl.constexpr,
+        BLOCK_ROI_H: tl.constexpr,
+        MAX_ROI_WIDTH: tl.constexpr,
+        MAX_TOTAL_RUNS: tl.constexpr,
+        METADATA_STRIDE: tl.constexpr,
+        BLOCK_COLS: tl.constexpr,
+    ):
+        rank = tl.program_id(0)
+        tile_x = tl.program_id(1)
+        local_x_offsets = tile_x * BLOCK_COLS + tl.arange(0, BLOCK_COLS)
+        meta_base = rank * METADATA_STRIDE
+        is_valid_detection = tl.load(metadata + meta_base + 0) > 0.5
+        query_index = tl.load(metadata + meta_base + 9).to(tl.int32)
+
+        if not is_valid_detection:
+            return
+
+        mask_offsets = tl.arange(0, BLOCK_MASK)
+        mask_active = mask_offsets < (mask_height * mask_width)
+        source_y = mask_offsets // mask_width
+        source_x = mask_offsets - source_y * mask_width
+        mask_values = tl.load(
+            masks
+            + query_index * mask_stride_q
+            + source_y * mask_stride_h
+            + source_x * mask_stride_w,
+            mask=mask_active,
+            other=-1.0,
+        )
+        positive_source = mask_active & (mask_values > 0.0)
+        source_y_min = tl.min(tl.where(positive_source, source_y, mask_height), axis=0)
+        source_y_max = tl.max(tl.where(positive_source, source_y, -1), axis=0)
+        source_x_min = tl.min(tl.where(positive_source, source_x, mask_width), axis=0)
+        source_x_max = tl.max(tl.where(positive_source, source_x, -1), axis=0)
+        has_positive_source = source_y_max >= 0
+        if not has_positive_source:
+            return
+
+        source_y_min = tl.maximum(source_y_min - 1, 0)
+        source_y_max = tl.minimum(source_y_max + 1, mask_height - 1)
+        source_x_min = tl.maximum(source_x_min - 1, 0)
+        source_x_max = tl.minimum(source_x_max + 1, mask_width - 1)
+
+        out_y_offsets = tl.arange(0, BLOCK_OUT_H)
+        y_active = out_y_offsets < output_height
+        interp_y0 = tl.load(y_idx + out_y_offsets * 2, mask=y_active, other=-1)
+        interp_y1 = tl.load(y_idx + out_y_offsets * 2 + 1, mask=y_active, other=-1)
+        interp_y_weight0 = tl.load(
+            y_weight + out_y_offsets * 2,
+            mask=y_active,
+            other=0.0,
+        )
+        interp_y_weight1 = tl.load(
+            y_weight + out_y_offsets * 2 + 1,
+            mask=y_active,
+            other=0.0,
+        )
+        candidate_y = y_active & (
+            (
+                (interp_y0 >= source_y_min)
+                & (interp_y0 <= source_y_max)
+                & (interp_y_weight0 != 0.0)
+            )
+            | (
+                (interp_y1 >= source_y_min)
+                & (interp_y1 <= source_y_max)
+                & (interp_y_weight1 != 0.0)
+            )
+        )
+        roi_y_start = tl.min(
+            tl.where(candidate_y, out_y_offsets, output_height), axis=0
+        )
+        roi_y_end = tl.max(tl.where(candidate_y, out_y_offsets + 1, 0), axis=0)
+
+        out_x_offsets = tl.arange(0, BLOCK_OUT_W)
+        x_active = out_x_offsets < output_width
+        interp_x0 = tl.load(x_idx + out_x_offsets * 2, mask=x_active, other=-1)
+        interp_x1 = tl.load(x_idx + out_x_offsets * 2 + 1, mask=x_active, other=-1)
+        interp_x_weight0 = tl.load(
+            x_weight + out_x_offsets * 2,
+            mask=x_active,
+            other=0.0,
+        )
+        interp_x_weight1 = tl.load(
+            x_weight + out_x_offsets * 2 + 1,
+            mask=x_active,
+            other=0.0,
+        )
+        candidate_x = x_active & (
+            (
+                (interp_x0 >= source_x_min)
+                & (interp_x0 <= source_x_max)
+                & (interp_x_weight0 != 0.0)
+            )
+            | (
+                (interp_x1 >= source_x_min)
+                & (interp_x1 <= source_x_max)
+                & (interp_x_weight1 != 0.0)
+            )
+        )
+        roi_x_start = tl.min(tl.where(candidate_x, out_x_offsets, output_width), axis=0)
+        roi_x_end = tl.max(tl.where(candidate_x, out_x_offsets + 1, 0), axis=0)
+        roi_width = roi_x_end - roi_x_start
+
+        if tile_x == 0:
+            tl.store(metadata + meta_base + 11, roi_y_start.to(tl.float32))
+            tl.store(metadata + meta_base + 12, roi_y_end.to(tl.float32))
+            tl.store(metadata + meta_base + 13, roi_x_start.to(tl.float32))
+            tl.store(metadata + meta_base + 14, roi_x_end.to(tl.float32))
+
+        if (roi_y_start >= roi_y_end) or (roi_x_start >= roi_x_end):
+            return
+
+        x_band_offset = tile_x * BLOCK_COLS
+        rows = tl.arange(0, BLOCK_ROI_H)
+        col_offsets = tl.arange(0, BLOCK_COLS)
+        mask_base = query_index * mask_stride_q
+
+        while x_band_offset < roi_width:
+            local_x_offsets = x_band_offset + col_offsets
+            column_active = local_x_offsets < roi_width
+            output_x = roi_x_start + local_x_offsets
+            output_x_matrix = output_x[None, :]
+            x_base = output_x_matrix * 2
+            source_x0 = tl.load(
+                x_idx + x_base,
+                mask=column_active[None, :],
+                other=0,
+            ).to(tl.int64)
+            source_x1 = tl.load(
+                x_idx + x_base + 1,
+                mask=column_active[None, :],
+                other=0,
+            ).to(tl.int64)
+            x_weight0 = tl.load(
+                x_weight + x_base,
+                mask=column_active[None, :],
+                other=0.0,
+            )
+            x_weight1 = tl.load(
+                x_weight + x_base + 1,
+                mask=column_active[None, :],
+                other=0.0,
+            )
+
+            open_slots = tl.full((BLOCK_COLS,), -1, tl.int32)
+            y_tile_start = roi_y_start
+            while y_tile_start <= roi_y_end:
+                row_y = y_tile_start + rows
+                output_y = row_y[:, None]
+                active = (row_y[:, None] < roi_y_end) & column_active[None, :]
+                boundary_active = (row_y[:, None] <= roi_y_end) & column_active[None, :]
+
+                y_base = output_y * 2
+                source_y0 = tl.load(y_idx + y_base, mask=active, other=0).to(tl.int64)
+                source_y1 = tl.load(y_idx + y_base + 1, mask=active, other=0).to(
+                    tl.int64
+                )
+                y_weight0 = tl.load(y_weight + y_base, mask=active, other=0.0)
+                y_weight1 = tl.load(y_weight + y_base + 1, mask=active, other=0.0)
+                value00 = tl.load(
+                    masks
+                    + mask_base
+                    + source_y0 * mask_stride_h
+                    + source_x0 * mask_stride_w,
+                    mask=active,
+                    other=0.0,
+                )
+                value10 = tl.load(
+                    masks
+                    + mask_base
+                    + source_y1 * mask_stride_h
+                    + source_x0 * mask_stride_w,
+                    mask=active,
+                    other=0.0,
+                )
+                value01 = tl.load(
+                    masks
+                    + mask_base
+                    + source_y0 * mask_stride_h
+                    + source_x1 * mask_stride_w,
+                    mask=active,
+                    other=0.0,
+                )
+                value11 = tl.load(
+                    masks
+                    + mask_base
+                    + source_y1 * mask_stride_h
+                    + source_x1 * mask_stride_w,
+                    mask=active,
+                    other=0.0,
+                )
+                current_values = (
+                    value00 * y_weight0 + value10 * y_weight1
+                ) * x_weight0 + (value01 * y_weight0 + value11 * y_weight1) * x_weight1
+                current_positive = active & (current_values > 0.0)
+
+                previous_y = output_y - 1
+                previous_active = boundary_active & (row_y[:, None] > roi_y_start)
+                previous_y_base = previous_y * 2
+                prev_source_y0 = tl.load(
+                    y_idx + previous_y_base,
+                    mask=previous_active,
+                    other=0,
+                ).to(tl.int64)
+                prev_source_y1 = tl.load(
+                    y_idx + previous_y_base + 1,
+                    mask=previous_active,
+                    other=0,
+                ).to(tl.int64)
+                prev_y_weight0 = tl.load(
+                    y_weight + previous_y_base,
+                    mask=previous_active,
+                    other=0.0,
+                )
+                prev_y_weight1 = tl.load(
+                    y_weight + previous_y_base + 1,
+                    mask=previous_active,
+                    other=0.0,
+                )
+                prev_value00 = tl.load(
+                    masks
+                    + mask_base
+                    + prev_source_y0 * mask_stride_h
+                    + source_x0 * mask_stride_w,
+                    mask=previous_active,
+                    other=0.0,
+                )
+                prev_value10 = tl.load(
+                    masks
+                    + mask_base
+                    + prev_source_y1 * mask_stride_h
+                    + source_x0 * mask_stride_w,
+                    mask=previous_active,
+                    other=0.0,
+                )
+                prev_value01 = tl.load(
+                    masks
+                    + mask_base
+                    + prev_source_y0 * mask_stride_h
+                    + source_x1 * mask_stride_w,
+                    mask=previous_active,
+                    other=0.0,
+                )
+                prev_value11 = tl.load(
+                    masks
+                    + mask_base
+                    + prev_source_y1 * mask_stride_h
+                    + source_x1 * mask_stride_w,
+                    mask=previous_active,
+                    other=0.0,
+                )
+                previous_values = (
+                    prev_value00 * prev_y_weight0 + prev_value10 * prev_y_weight1
+                ) * x_weight0 + (
+                    prev_value01 * prev_y_weight0 + prev_value11 * prev_y_weight1
+                ) * x_weight1
+                previous_positive = previous_active & (previous_values > 0.0)
+                is_start = current_positive & ~previous_positive
+                is_end = previous_positive & ~current_positive
+                start_prefix = tl.cumsum(tl.where(is_start, 1, 0), 0)
+                end_prefix = tl.cumsum(tl.where(is_end, 1, 0), 0)
+                start_count = tl.max(start_prefix, axis=0)
+                end_count = tl.max(end_prefix, axis=0)
+
+                start_slots = start_prefix - 1
+                run_flat = (output_x_matrix * output_height + output_y).to(tl.int32)
+                for col in tl.static_range(0, BLOCK_COLS):
+                    col_match = col_offsets == col
+                    col_has_starts = (
+                        tl.max(tl.where(col_match & (start_count > 0), 1, 0), axis=0)
+                        != 0
+                    )
+                    col_start_count = tl.max(
+                        tl.where(col_match, start_count, 0), axis=0
+                    ).to(tl.int32)
+                    col_end_count = tl.max(
+                        tl.where(col_match, end_count, 0), axis=0
+                    ).to(tl.int32)
+                    open_slot = tl.max(tl.where(col_match, open_slots, -1), axis=0).to(
+                        tl.int32
+                    )
+                    open_at_start = open_slot >= 0
+                    open_at_start_i = tl.where(open_at_start, 1, 0).to(tl.int32)
+                    col_base = tl.atomic_add(
+                        records + 0,
+                        col_start_count,
+                        sem="relaxed",
+                        mask=col_has_starts,
+                    ).to(tl.int32)
+                    col_base = tl.where(col_has_starts, col_base, 0)
+                    if col_has_starts & ((col_base + col_start_count) > MAX_TOTAL_RUNS):
+                        tl.store(records + 1, 1)
+
+                    start_record_slots = col_base + start_slots
+                    start_store = (
+                        is_start
+                        & col_match[None, :]
+                        & (start_record_slots < MAX_TOTAL_RUNS)
+                    )
+                    tl.store(
+                        records + (start_record_slots + 1) * 3,
+                        tl.full((BLOCK_ROI_H, BLOCK_COLS), rank, tl.int32),
+                        mask=start_store,
+                    )
+                    tl.store(
+                        records + (start_record_slots + 1) * 3 + 1,
+                        run_flat,
+                        mask=start_store,
+                    )
+
+                    current_end_slots = col_base + end_prefix - 1 - open_at_start_i
+                    end_record_slots = tl.where(
+                        open_at_start & (end_prefix == 1),
+                        open_slot,
+                        current_end_slots,
+                    )
+                    end_store = (
+                        is_end
+                        & col_match[None, :]
+                        & (end_record_slots >= 0)
+                        & (end_record_slots < MAX_TOTAL_RUNS)
+                    )
+                    tl.store(
+                        records + (end_record_slots + 1) * 3 + 2,
+                        run_flat,
+                        mask=end_store,
+                    )
+
+                    closed_current_starts = tl.maximum(
+                        col_end_count - open_at_start_i, 0
+                    )
+                    unmatched_current_starts = col_start_count - closed_current_starts
+                    open_after = (open_at_start_i + col_start_count - col_end_count) > 0
+                    next_open_slot = tl.where(
+                        open_after,
+                        tl.where(
+                            unmatched_current_starts > 0,
+                            col_base + col_start_count - 1,
+                            open_slot,
+                        ),
+                        -1,
+                    ).to(tl.int32)
+                    open_slots = tl.where(col_match, next_open_slot, open_slots)
+
+                y_tile_start += BLOCK_ROI_H
+            x_band_offset += MAX_ROI_WIDTH

From 2ddba5e73b5ad71ad7e9411df8d53ab2aad2ad01 Mon Sep 17 00:00:00 2001
From: Aseem Saxena <aseem.bits@gmail.com>
Date: Tue, 2 Jun 2026 22:10:46 +0000
Subject: [PATCH 04/76] Wire RF-DETR RLE postprocess fast path

---
 .../inference_models/models/rfdetr/common.py  | 434 ++++++++++++++----
 1 file changed, 346 insertions(+), 88 deletions(-)

diff --git a/inference_models/inference_models/models/rfdetr/common.py b/inference_models/inference_models/models/rfdetr/common.py
index a3ae26cd29..13e7186597 100644
--- a/inference_models/inference_models/models/rfdetr/common.py
+++ b/inference_models/inference_models/models/rfdetr/common.py
@@ -4,21 +4,27 @@
 from torchvision.transforms import functional
 
 from inference_models import Detections, InstanceDetections, InstancesRLEMasks
-from inference_models.entities import ImageDimensions
+from inference_models.configuration import (
+    INFERENCE_MODELS_RFDETR_TRITON_POSTPROC_ENABLED,
+)
 from inference_models.errors import CorruptedModelPackageError
 from inference_models.models.common.roboflow.model_packages import (
     PreProcessingMetadata,
-    StaticCropOffset,
 )
 from inference_models.models.common.roboflow.post_processing import (
     align_instance_segmentation_results,
-    align_instance_segmentation_results_to_rle_masks,
+    align_instance_segmentation_results_to_rle_masks_batch,
     rescale_image_detections,
 )
 from inference_models.models.rfdetr.class_remapping import ClassesReMapping
 from inference_models.models.rfdetr.post_processor import select_topk_predictions
+from inference_models.models.rfdetr.triton_postprocess import (
+    post_process_single_instance_segmentation_result_to_rle_masks_triton,
+)
 from inference_models.utils.file_system import read_json
 
+_TRITON_POSTPROC_ENABLED = INFERENCE_MODELS_RFDETR_TRITON_POSTPROC_ENABLED
+
 
 def parse_model_type(config_path: str) -> str:
     try:
@@ -45,6 +51,29 @@ def parse_model_type(config_path: str) -> str:
         ) from error
 
 
+def _pre_processing_metadata_key(image_meta: PreProcessingMetadata) -> tuple:
+    denorm_size = image_meta.nonsquare_intermediate_size or image_meta.inference_size
+    static_crop_offset = image_meta.static_crop_offset
+    return (
+        image_meta.pad_left,
+        image_meta.pad_top,
+        image_meta.pad_right,
+        image_meta.pad_bottom,
+        image_meta.scale_width,
+        image_meta.scale_height,
+        image_meta.original_size.height,
+        image_meta.original_size.width,
+        image_meta.size_after_pre_processing.height,
+        image_meta.size_after_pre_processing.width,
+        denorm_size.height,
+        denorm_size.width,
+        static_crop_offset.offset_x,
+        static_crop_offset.offset_y,
+        static_crop_offset.crop_width,
+        static_crop_offset.crop_height,
+    )
+
+
 def post_process_object_detection_results(
     bboxes: torch.Tensor,
     logits: torch.Tensor,
@@ -218,6 +247,142 @@ def post_process_instance_segmentation_results(
     return results
 
 
+def _post_process_single_instance_segmentation_result_to_rle_masks(
+    image_bboxes: torch.Tensor,
+    image_logits: torch.Tensor,
+    image_masks: torch.Tensor,
+    image_meta: PreProcessingMetadata,
+    threshold: Union[float, torch.Tensor],
+    num_classes: int,
+    classes_re_mapping: Optional[ClassesReMapping],
+) -> InstanceDetections:
+    if not _TRITON_POSTPROC_ENABLED:
+        return _post_process_single_instance_segmentation_result_to_rle_masks_classic(
+            image_bboxes=image_bboxes,
+            image_logits=image_logits,
+            image_masks=image_masks,
+            image_meta=image_meta,
+            threshold=threshold,
+            num_classes=num_classes,
+            classes_re_mapping=classes_re_mapping,
+        )
+
+    triton_result = (
+        post_process_single_instance_segmentation_result_to_rle_masks_triton(
+            image_bboxes=image_bboxes,
+            image_scores=image_logits,
+            image_masks=image_masks,
+            image_meta=image_meta,
+            threshold=threshold,
+            classes_re_mapping=classes_re_mapping,
+        )
+    )
+    if triton_result is not None:
+        return triton_result
+
+    return _post_process_single_instance_segmentation_result_to_rle_masks_classic(
+        image_bboxes=image_bboxes,
+        image_logits=image_logits,
+        image_masks=image_masks,
+        image_meta=image_meta,
+        threshold=threshold,
+        num_classes=num_classes,
+        classes_re_mapping=classes_re_mapping,
+    )
+
+
+def _post_process_single_instance_segmentation_result_to_rle_masks_classic(
+    image_bboxes: torch.Tensor,
+    image_logits: torch.Tensor,
+    image_masks: torch.Tensor,
+    image_meta: PreProcessingMetadata,
+    threshold: Union[float, torch.Tensor],
+    num_classes: int,
+    classes_re_mapping: Optional[ClassesReMapping],
+) -> InstanceDetections:
+    num_queries, num_logits_classes = image_logits.shape
+    flat_scores = image_logits.reshape(-1)
+    confidence, topk_indexes = torch.topk(flat_scores, num_queries)
+    query_indices = topk_indexes // num_logits_classes
+    top_classes = topk_indexes % num_logits_classes
+    if classes_re_mapping is not None:
+        if classes_re_mapping.class_mapping.shape[0] >= num_logits_classes:
+            top_classes = classes_re_mapping.class_mapping[top_classes]
+        else:
+            mapped_classes = torch.full_like(top_classes, -1)
+            mappable_classes = top_classes < classes_re_mapping.class_mapping.shape[0]
+            mapped_classes[mappable_classes] = classes_re_mapping.class_mapping[
+                top_classes[mappable_classes]
+            ]
+            top_classes = mapped_classes
+        remapping_mask = top_classes >= 0
+    else:
+        named = top_classes < num_classes
+        remapping_mask = named
+    confidence_mask = confidence > (
+        threshold[top_classes.clamp(min=0, max=threshold.shape[0] - 1).long()]
+        if isinstance(threshold, torch.Tensor)
+        else threshold
+    )
+    keep_mask = remapping_mask & confidence_mask
+    confidence = confidence[keep_mask]
+    top_classes = top_classes[keep_mask]
+    query_indices = query_indices[keep_mask]
+    confidence, sorted_indices = torch.sort(confidence, descending=True)
+    top_classes = top_classes[sorted_indices]
+    query_indices = query_indices[sorted_indices]
+    selected_boxes = image_bboxes[query_indices]
+    selected_masks = image_masks[query_indices]
+    cxcy = selected_boxes[:, :2]
+    wh = selected_boxes[:, 2:]
+    xy_min = cxcy - 0.5 * wh
+    xy_max = cxcy + 0.5 * wh
+    selected_boxes_xyxy_pct = torch.cat([xy_min, xy_max], dim=-1)
+    denorm_size = image_meta.nonsquare_intermediate_size or image_meta.inference_size
+    denorm_size_whwh = torch.tensor(
+        [
+            denorm_size.width,
+            denorm_size.height,
+            denorm_size.width,
+            denorm_size.height,
+        ],
+        device=image_bboxes.device,
+    )
+    padding = (
+        image_meta.pad_left,
+        image_meta.pad_top,
+        image_meta.pad_right,
+        image_meta.pad_bottom,
+    )
+    selected_boxes_xyxy = selected_boxes_xyxy_pct * denorm_size_whwh
+    aligned_boxes_tensor, rle_masks = (
+        align_instance_segmentation_results_to_rle_masks_batch(
+            image_bboxes=selected_boxes_xyxy,
+            masks=selected_masks,
+            padding=padding,
+            scale_height=image_meta.scale_height,
+            scale_width=image_meta.scale_width,
+            original_size=image_meta.original_size,
+            size_after_pre_processing=image_meta.size_after_pre_processing,
+            inference_size=denorm_size,
+            static_crop_offset=image_meta.static_crop_offset,
+        )
+    )
+    instances_masks = InstancesRLEMasks.from_coco_rle_masks(
+        image_size=(
+            image_meta.original_size.height,
+            image_meta.original_size.width,
+        ),
+        masks=rle_masks,
+    )
+    return InstanceDetections(
+        xyxy=aligned_boxes_tensor.round().int(),
+        confidence=confidence,
+        class_id=top_classes.int(),
+        mask=instances_masks,
+    )
+
+
 def post_process_instance_segmentation_results_to_rle_masks(
     bboxes: torch.Tensor,
     logits: torch.Tensor,
@@ -228,62 +393,162 @@ def post_process_instance_segmentation_results_to_rle_masks(
     classes_re_mapping: Optional[ClassesReMapping],
 ) -> List[InstanceDetections]:
     logits_sigmoid = torch.nn.functional.sigmoid(logits)
-    final_results = []
+    batch_size, num_queries, num_logits_classes = logits_sigmoid.shape
+    final_results: List[Optional[InstanceDetections]] = [None] * batch_size
     device = bboxes.device
     if isinstance(threshold, torch.Tensor):
         threshold = threshold.to(device=device, dtype=logits_sigmoid.dtype)
-    for image_bboxes, image_logits, image_masks, image_meta in zip(
-        bboxes, logits_sigmoid, masks, pre_processing_meta
-    ):
-        confidence, top_classes, image_bboxes, query_indices = select_topk_predictions(
-            logits_sigmoid=image_logits,
-            bboxes_cxcywh=image_bboxes,
-        )
-        image_masks = image_masks[query_indices]
-        if classes_re_mapping is not None:
-            remapping_mask = torch.isin(
-                top_classes, classes_re_mapping.remaining_class_ids
+    if batch_size == 1:
+        return [
+            _post_process_single_instance_segmentation_result_to_rle_masks(
+                image_bboxes=bboxes[0],
+                image_logits=logits_sigmoid[0],
+                image_masks=masks[0],
+                image_meta=pre_processing_meta[0],
+                threshold=threshold,
+                num_classes=num_classes,
+                classes_re_mapping=classes_re_mapping,
             )
-            top_classes = classes_re_mapping.class_mapping[top_classes[remapping_mask]]
-            confidence = confidence[remapping_mask]
-            image_bboxes = image_bboxes[remapping_mask]
-            image_masks = image_masks[remapping_mask]
-        else:
-            # drop DETR no-object rows
-            named = top_classes < num_classes
-            confidence = confidence[named]
-            top_classes = top_classes[named]
-            image_bboxes = image_bboxes[named]
-            image_masks = image_masks[named]
-        confidence_mask = confidence > (
-            threshold[top_classes.long()]
-            if isinstance(threshold, torch.Tensor)
-            else threshold
+        ]
+
+    flat_scores = logits_sigmoid.reshape(batch_size, -1)
+    confidence, topk_indexes = torch.topk(flat_scores, num_queries, dim=1)
+    query_indices = topk_indexes // num_logits_classes
+    top_classes = topk_indexes % num_logits_classes
+    image_bboxes = torch.gather(
+        bboxes,
+        dim=1,
+        index=query_indices[:, :, None].expand(-1, -1, bboxes.shape[-1]),
+    )
+    image_masks = torch.gather(
+        masks,
+        dim=1,
+        index=query_indices[:, :, None, None].expand(
+            -1,
+            -1,
+            masks.shape[-2],
+            masks.shape[-1],
+        ),
+    )
+
+    if classes_re_mapping is not None:
+        mapped_classes = torch.full_like(top_classes, -1)
+        mappable_classes = top_classes < classes_re_mapping.class_mapping.shape[0]
+        mapped_classes[mappable_classes] = classes_re_mapping.class_mapping[
+            top_classes[mappable_classes]
+        ]
+        class_mask = mapped_classes >= 0
+        top_classes = mapped_classes
+    else:
+        # drop DETR no-object rows
+        class_mask = top_classes < num_classes
+
+    if isinstance(threshold, torch.Tensor):
+        threshold_indices = top_classes.clamp(
+            min=0,
+            max=threshold.shape[0] - 1,
+        ).long()
+        threshold_values = threshold[threshold_indices]
+        confidence_mask = class_mask & (confidence > threshold_values)
+    else:
+        confidence_mask = class_mask & (confidence > threshold)
+    valid_counts = confidence_mask.sum(dim=1)
+    valid_sorted = torch.zeros_like(confidence_mask)
+    sorted_confidence = torch.zeros_like(confidence)
+    sorted_classes = torch.zeros_like(top_classes)
+    sorted_boxes = torch.zeros_like(image_bboxes)
+    sorted_masks = torch.empty_like(image_masks)
+    for valid_count in torch.unique(valid_counts).tolist():
+        if valid_count == 0:
+            continue
+        image_indices = (valid_counts == valid_count).nonzero(as_tuple=True)[0]
+        group_mask = confidence_mask[image_indices]
+        group_size = image_indices.shape[0]
+        group_confidence = confidence[image_indices][group_mask].reshape(
+            group_size,
+            valid_count,
         )
-        confidence = confidence[confidence_mask]
-        top_classes = top_classes[confidence_mask]
-        selected_boxes = image_bboxes[confidence_mask]
-        selected_masks = image_masks[confidence_mask]
-        confidence, sorted_indices = torch.sort(confidence, descending=True)
-        top_classes = top_classes[sorted_indices]
-        selected_boxes = selected_boxes[sorted_indices]
-        selected_masks = selected_masks[sorted_indices]
-        cxcy = selected_boxes[:, :2]
-        wh = selected_boxes[:, 2:]
-        xy_min = cxcy - 0.5 * wh
-        xy_max = cxcy + 0.5 * wh
-        selected_boxes_xyxy_pct = torch.cat([xy_min, xy_max], dim=-1)
-        denorm_size = (
-            image_meta.nonsquare_intermediate_size or image_meta.inference_size
+        group_classes = top_classes[image_indices][group_mask].reshape(
+            group_size,
+            valid_count,
         )
-        denorm_size_whwh = torch.tensor(
+        group_boxes = image_bboxes[image_indices][group_mask].reshape(
+            group_size,
+            valid_count,
+            image_bboxes.shape[-1],
+        )
+        group_masks = image_masks[image_indices][group_mask].reshape(
+            group_size,
+            valid_count,
+            image_masks.shape[-2],
+            image_masks.shape[-1],
+        )
+        group_confidence, sorted_indices = torch.sort(
+            group_confidence,
+            dim=1,
+            descending=True,
+        )
+        sorted_confidence[image_indices, :valid_count] = group_confidence
+        sorted_classes[image_indices, :valid_count] = torch.gather(
+            group_classes,
+            dim=1,
+            index=sorted_indices,
+        )
+        sorted_boxes[image_indices, :valid_count] = torch.gather(
+            group_boxes,
+            dim=1,
+            index=sorted_indices[:, :, None].expand(-1, -1, group_boxes.shape[-1]),
+        )
+        sorted_masks[image_indices, :valid_count] = torch.gather(
+            group_masks,
+            dim=1,
+            index=sorted_indices[:, :, None, None].expand(
+                -1,
+                -1,
+                group_masks.shape[-2],
+                group_masks.shape[-1],
+            ),
+        )
+        valid_sorted[image_indices, :valid_count] = True
+    confidence = sorted_confidence
+    top_classes = sorted_classes
+    selected_boxes = sorted_boxes
+    selected_masks = sorted_masks
+
+    cxcy = selected_boxes[..., :2]
+    wh = selected_boxes[..., 2:]
+    xy_min = cxcy - 0.5 * wh
+    xy_max = cxcy + 0.5 * wh
+    selected_boxes_xyxy_pct = torch.cat([xy_min, xy_max], dim=-1)
+    denorm_sizes = [
+        image_meta.nonsquare_intermediate_size or image_meta.inference_size
+        for image_meta in pre_processing_meta
+    ]
+    denorm_size_whwh = torch.tensor(
+        [
             [
                 denorm_size.width,
                 denorm_size.height,
                 denorm_size.width,
                 denorm_size.height,
-            ],
-            device=device,
+            ]
+            for denorm_size in denorm_sizes
+        ],
+        device=device,
+    )
+    selected_boxes_xyxy = selected_boxes_xyxy_pct * denorm_size_whwh[:, None, :]
+
+    metadata_groups = {}
+    for image_index, image_meta in enumerate(pre_processing_meta):
+        metadata_groups.setdefault(
+            _pre_processing_metadata_key(image_meta),
+            [],
+        ).append(image_index)
+
+    for image_indices in metadata_groups.values():
+        image_meta = pre_processing_meta[image_indices[0]]
+        denorm_size = (
+            image_meta.nonsquare_intermediate_size or image_meta.inference_size
         )
         padding = (
             image_meta.pad_left,
@@ -291,47 +556,40 @@ def post_process_instance_segmentation_results_to_rle_masks(
             image_meta.pad_right,
             image_meta.pad_bottom,
         )
-        selected_boxes_xyxy = selected_boxes_xyxy_pct * denorm_size_whwh
-        aligned_boxes, rle_masks = [], []
-        for bbox, mask in align_instance_segmentation_results_to_rle_masks(
-            image_bboxes=selected_boxes_xyxy,
-            masks=selected_masks,
-            padding=padding,
-            scale_height=image_meta.scale_height,
-            scale_width=image_meta.scale_width,
-            original_size=image_meta.original_size,
-            size_after_pre_processing=image_meta.size_after_pre_processing,
-            inference_size=denorm_size,
-            static_crop_offset=image_meta.static_crop_offset,
-        ):
-            aligned_boxes.append(bbox)
-            rle_masks.append(mask)
-        instances_masks = InstancesRLEMasks.from_coco_rle_masks(
-            image_size=(
-                image_meta.original_size.height,
-                image_meta.original_size.width,
-            ),
-            masks=rle_masks,
+        group_valid = valid_sorted[image_indices]
+        group_counts = group_valid.sum(dim=1).tolist()
+        group_boxes = selected_boxes_xyxy[image_indices][group_valid]
+        group_masks = selected_masks[image_indices][group_valid]
+        group_confidence = confidence[image_indices][group_valid]
+        group_classes = top_classes[image_indices][group_valid]
+        aligned_boxes_tensor, rle_masks = (
+            align_instance_segmentation_results_to_rle_masks_batch(
+                image_bboxes=group_boxes,
+                masks=group_masks,
+                padding=padding,
+                scale_height=image_meta.scale_height,
+                scale_width=image_meta.scale_width,
+                original_size=image_meta.original_size,
+                size_after_pre_processing=image_meta.size_after_pre_processing,
+                inference_size=denorm_size,
+                static_crop_offset=image_meta.static_crop_offset,
+            )
         )
-        if len(aligned_boxes) > 0:
-            aligned_boxes_tensor = torch.stack(aligned_boxes, dim=0)
-            final_results.append(
-                InstanceDetections(
-                    xyxy=aligned_boxes_tensor.round().int(),
-                    confidence=confidence,
-                    class_id=top_classes.int(),
-                    mask=instances_masks,
-                )
+        offset = 0
+        for image_index, count in zip(image_indices, group_counts):
+            next_offset = offset + count
+            instances_masks = InstancesRLEMasks.from_coco_rle_masks(
+                image_size=(
+                    image_meta.original_size.height,
+                    image_meta.original_size.width,
+                ),
+                masks=rle_masks[offset:next_offset],
             )
-        else:
-            final_results.append(
-                InstanceDetections(
-                    xyxy=torch.empty(
-                        (0, 4), dtype=torch.int32, device=image_bboxes.device
-                    ),
-                    class_id=top_classes.int(),
-                    confidence=confidence,
-                    mask=instances_masks,
-                )
+            final_results[image_index] = InstanceDetections(
+                xyxy=aligned_boxes_tensor[offset:next_offset].round().int(),
+                confidence=group_confidence[offset:next_offset],
+                class_id=group_classes[offset:next_offset].int(),
+                mask=instances_masks,
             )
+            offset = next_offset
     return final_results

From 0da43d098e69f36996dc16edc6e07656d1887fd5 Mon Sep 17 00:00:00 2001
From: Aseem Saxena <aseem.bits@gmail.com>
Date: Tue, 2 Jun 2026 22:10:52 +0000
Subject: [PATCH 05/76] Add RF-DETR Triton postproc tests

---
 .../models/rfdetr/test_triton_postprocess.py  | 513 ++++++++++++++++++
 1 file changed, 513 insertions(+)
 create mode 100644 inference_models/tests/unit_tests/models/rfdetr/test_triton_postprocess.py

diff --git a/inference_models/tests/unit_tests/models/rfdetr/test_triton_postprocess.py b/inference_models/tests/unit_tests/models/rfdetr/test_triton_postprocess.py
new file mode 100644
index 0000000000..9aedd69cd8
--- /dev/null
+++ b/inference_models/tests/unit_tests/models/rfdetr/test_triton_postprocess.py
@@ -0,0 +1,513 @@
+import numpy as np
+import pytest
+import torch
+
+from inference_models.entities import ImageDimensions
+from inference_models.models.common.rle_utils import coco_rle_masks_to_numpy_mask
+from inference_models.models.common.roboflow.model_packages import (
+    PreProcessingMetadata,
+    StaticCropOffset,
+)
+from inference_models.models.rfdetr import common as rfdetr_common
+from inference_models.models.rfdetr import triton_postprocess
+from inference_models.models.rfdetr.class_remapping import ClassesReMapping
+from inference_models.models.rfdetr.common import (
+    _post_process_single_instance_segmentation_result_to_rle_masks_classic,
+    post_process_instance_segmentation_results_to_rle_masks,
+)
+from inference_models.models.rfdetr.triton_postprocess import (
+    _INTERPOLATION_WEIGHT_CACHE,
+    _MAX_INTERPOLATION_WEIGHT_CACHE_ENTRIES,
+    _get_interpolation_weights,
+    _supports_triton_postprocess_path,
+    _unsupported_triton_postprocess_reason,
+    post_process_single_instance_segmentation_result_to_rle_masks_triton,
+)
+
+
+def _metadata(
+    height: int = 64,
+    width: int = 64,
+    padding: tuple = (0, 0, 0, 0),
+    static_crop_offset: tuple = (0, 0),
+    size_after_pre_processing: tuple = None,
+) -> PreProcessingMetadata:
+    size = ImageDimensions(height=height, width=width)
+    pad_left, pad_top, pad_right, pad_bottom = padding
+    offset_x, offset_y = static_crop_offset
+    preprocessed_height, preprocessed_width = size_after_pre_processing or (
+        height,
+        width,
+    )
+    preprocessed_size = ImageDimensions(
+        height=preprocessed_height,
+        width=preprocessed_width,
+    )
+    return PreProcessingMetadata(
+        pad_left=pad_left,
+        pad_top=pad_top,
+        pad_right=pad_right,
+        pad_bottom=pad_bottom,
+        original_size=size,
+        size_after_pre_processing=preprocessed_size,
+        inference_size=size,
+        scale_width=1.0,
+        scale_height=1.0,
+        static_crop_offset=StaticCropOffset(
+            offset_x=offset_x,
+            offset_y=offset_y,
+            crop_width=preprocessed_width,
+            crop_height=preprocessed_height,
+        ),
+        nonsquare_intermediate_size=None,
+    )
+
+
+def _class_mapping(device: torch.device, num_classes: int = 2) -> ClassesReMapping:
+    return ClassesReMapping(
+        remaining_class_ids=torch.arange(num_classes, dtype=torch.int64, device=device),
+        class_mapping=torch.arange(num_classes, dtype=torch.int64, device=device),
+    )
+
+
+def _single_detection_inputs(device: torch.device):
+    bboxes = torch.tensor(
+        [
+            [0.50, 0.50, 0.50, 0.50],
+            [0.25, 0.25, 0.20, 0.20],
+        ],
+        dtype=torch.float32,
+        device=device,
+    )
+    logits = torch.tensor(
+        [
+            [4.0, -4.0],
+            [-4.0, -4.0],
+        ],
+        dtype=torch.float32,
+        device=device,
+    )
+    masks = torch.full((2, 8, 8), -2.0, dtype=torch.float32, device=device)
+    masks[0, 2:6, 2:6] = 2.0
+    return bboxes, logits, masks
+
+
+def _support_kwargs(
+    num_queries: int = 2,
+    num_classes: int = 2,
+    mask_size: tuple = (8, 8),
+) -> dict:
+    device = torch.device("cpu")
+    return {
+        "image_bboxes": torch.full(
+            (num_queries, 4),
+            0.5,
+            dtype=torch.float32,
+            device=device,
+        ),
+        "image_scores": torch.full(
+            (num_queries, num_classes),
+            0.1,
+            dtype=torch.float32,
+            device=device,
+        ),
+        "image_masks": torch.zeros(
+            (num_queries, *mask_size),
+            dtype=torch.float32,
+            device=device,
+        ),
+        "image_meta": _metadata(),
+        "threshold": 0.4,
+        "classes_re_mapping": _class_mapping(device, num_classes=num_classes),
+    }
+
+
+def _assert_detections_equal(actual, expected) -> None:
+    torch.testing.assert_close(actual.xyxy.cpu(), expected.xyxy.cpu(), rtol=0, atol=0)
+    torch.testing.assert_close(
+        actual.confidence.cpu(), expected.confidence.cpu(), rtol=0, atol=0
+    )
+    torch.testing.assert_close(
+        actual.class_id.cpu(), expected.class_id.cpu(), rtol=0, atol=0
+    )
+    actual_mask = coco_rle_masks_to_numpy_mask(actual.mask)
+    expected_mask = coco_rle_masks_to_numpy_mask(expected.mask)
+    np.testing.assert_array_equal(actual_mask, expected_mask)
+
+
+def _expected_classic_result(
+    bboxes: torch.Tensor,
+    logits: torch.Tensor,
+    masks: torch.Tensor,
+    metadata: PreProcessingMetadata,
+    threshold,
+    classes_re_mapping,
+    num_classes: int = 2,
+):
+    return _post_process_single_instance_segmentation_result_to_rle_masks_classic(
+        image_bboxes=bboxes,
+        image_logits=torch.sigmoid(logits),
+        image_masks=masks,
+        image_meta=metadata,
+        threshold=threshold,
+        num_classes=num_classes,
+        classes_re_mapping=classes_re_mapping,
+    )
+
+
+def _batched_inputs(device: torch.device):
+    bboxes = torch.tensor(
+        [
+            [
+                [0.50, 0.50, 0.50, 0.50],
+                [0.25, 0.25, 0.20, 0.20],
+                [0.75, 0.75, 0.15, 0.15],
+            ],
+            [
+                [0.50, 0.50, 0.40, 0.40],
+                [0.25, 0.25, 0.20, 0.20],
+                [0.75, 0.75, 0.15, 0.15],
+            ],
+            [
+                [0.25, 0.75, 0.20, 0.20],
+                [0.75, 0.25, 0.20, 0.20],
+                [0.50, 0.50, 0.30, 0.30],
+            ],
+        ],
+        dtype=torch.float32,
+        device=device,
+    )
+    logits = torch.full((3, 3, 2), -4.0, dtype=torch.float32, device=device)
+    logits[0, 0, 0] = 4.0
+    logits[0, 1, 1] = 3.0
+    logits[2, 2, 0] = 2.0
+
+    masks = torch.full((3, 3, 8, 8), -2.0, dtype=torch.float32, device=device)
+    masks[0, 0, 2:6, 2:6] = 2.0
+    masks[0, 1, 1:3, 1:3] = 2.0
+    masks[1, 0, 3:5, 3:5] = 2.0
+    masks[2, 2, 3:6, 3:6] = 2.0
+    return bboxes, logits, masks
+
+
+def _assert_batched_results_match_classic(
+    actual,
+    bboxes: torch.Tensor,
+    logits: torch.Tensor,
+    masks: torch.Tensor,
+    metadata,
+    threshold,
+    classes_re_mapping,
+    num_classes: int = 2,
+) -> None:
+    assert len(actual) == bboxes.shape[0]
+    for image_index, actual_detections in enumerate(actual):
+        expected = _expected_classic_result(
+            bboxes=bboxes[image_index],
+            logits=logits[image_index],
+            masks=masks[image_index],
+            metadata=metadata[image_index],
+            threshold=threshold,
+            classes_re_mapping=classes_re_mapping,
+            num_classes=num_classes,
+        )
+        _assert_detections_equal(actual_detections, expected)
+
+
+def test_rfdetr_triton_postproc_flag_false_bypasses_triton(monkeypatch) -> None:
+    monkeypatch.setattr(rfdetr_common, "_TRITON_POSTPROC_ENABLED", False)
+
+    def fail_if_called(*args, **kwargs):
+        raise AssertionError("Triton postproc should be disabled")
+
+    monkeypatch.setattr(
+        rfdetr_common,
+        "post_process_single_instance_segmentation_result_to_rle_masks_triton",
+        fail_if_called,
+    )
+
+    bboxes, logits, masks = _single_detection_inputs(torch.device("cpu"))
+    results = post_process_instance_segmentation_results_to_rle_masks(
+        bboxes=bboxes.unsqueeze(0),
+        logits=logits.unsqueeze(0),
+        masks=masks.unsqueeze(0),
+        pre_processing_meta=[_metadata()],
+        threshold=0.4,
+        num_classes=2,
+        classes_re_mapping=_class_mapping(torch.device("cpu")),
+    )
+
+    assert len(results) == 1
+    assert results[0].confidence.shape == (1,)
+
+
+def test_rfdetr_triton_postproc_flag_true_uses_triton_result(monkeypatch) -> None:
+    monkeypatch.setattr(rfdetr_common, "_TRITON_POSTPROC_ENABLED", True)
+    sentinel = object()
+
+    def return_sentinel(*args, **kwargs):
+        return sentinel
+
+    monkeypatch.setattr(
+        rfdetr_common,
+        "post_process_single_instance_segmentation_result_to_rle_masks_triton",
+        return_sentinel,
+    )
+
+    bboxes, logits, masks = _single_detection_inputs(torch.device("cpu"))
+    results = post_process_instance_segmentation_results_to_rle_masks(
+        bboxes=bboxes.unsqueeze(0),
+        logits=logits.unsqueeze(0),
+        masks=masks.unsqueeze(0),
+        pre_processing_meta=[_metadata()],
+        threshold=0.4,
+        num_classes=2,
+        classes_re_mapping=_class_mapping(torch.device("cpu")),
+    )
+
+    assert results == [sentinel]
+
+
+def test_rfdetr_triton_postproc_reports_triton_unavailable(monkeypatch) -> None:
+    monkeypatch.setattr(triton_postprocess, "triton", None)
+
+    reason = _unsupported_triton_postprocess_reason(**_support_kwargs())
+
+    assert reason == "triton_unavailable"
+
+
+@pytest.mark.parametrize(
+    ("case", "expected_reason"),
+    [
+        ("no_class_mapping", "class_remapping_required"),
+        ("tensor_threshold", "tensor_threshold_unsupported"),
+        ("invalid_tensor_rank", "invalid_tensor_rank"),
+        ("shape_mismatch", "shape_mismatch"),
+        ("class_mapping_too_small", "class_mapping_too_small"),
+        ("input_size_exceeds_limits", "input_size_exceeds_triton_limits"),
+        ("padding", "padding_unsupported"),
+        ("static_crop", "static_crop_unsupported"),
+        ("resize_metadata", "resize_metadata_unsupported"),
+        ("cpu_device", "cuda_device_required"),
+    ],
+)
+def test_rfdetr_triton_postproc_unsupported_reason_matrix(
+    monkeypatch,
+    case: str,
+    expected_reason: str,
+) -> None:
+    monkeypatch.setattr(triton_postprocess, "triton", object())
+    kwargs = _support_kwargs()
+
+    if case == "no_class_mapping":
+        kwargs["classes_re_mapping"] = None
+    elif case == "tensor_threshold":
+        kwargs["threshold"] = torch.tensor([0.4, 0.4])
+    elif case == "invalid_tensor_rank":
+        kwargs["image_scores"] = kwargs["image_scores"][None]
+    elif case == "shape_mismatch":
+        kwargs["image_bboxes"] = kwargs["image_bboxes"][:1]
+    elif case == "class_mapping_too_small":
+        kwargs["classes_re_mapping"] = _class_mapping(torch.device("cpu"), 1)
+    elif case == "input_size_exceeds_limits":
+        kwargs = _support_kwargs(num_queries=129, num_classes=128)
+    elif case == "padding":
+        kwargs["image_meta"] = _metadata(padding=(1, 0, 0, 0))
+    elif case == "static_crop":
+        kwargs["image_meta"] = _metadata(static_crop_offset=(1, 0))
+    elif case == "resize_metadata":
+        kwargs["image_meta"] = _metadata(size_after_pre_processing=(32, 64))
+
+    reason = _unsupported_triton_postprocess_reason(**kwargs)
+
+    assert reason == expected_reason
+    assert not _supports_triton_postprocess_path(**kwargs)
+
+
+@pytest.mark.parametrize("case", ["no_class_mapping", "tensor_threshold", "padding"])
+def test_rfdetr_triton_postproc_unsupported_cases_fallback_to_classic(
+    monkeypatch,
+    case: str,
+) -> None:
+    monkeypatch.setattr(rfdetr_common, "_TRITON_POSTPROC_ENABLED", True)
+    calls = 0
+    real_triton_postprocess = (
+        rfdetr_common.post_process_single_instance_segmentation_result_to_rle_masks_triton
+    )
+
+    def spy_triton_postprocess(*args, **kwargs):
+        nonlocal calls
+        calls += 1
+        return real_triton_postprocess(*args, **kwargs)
+
+    monkeypatch.setattr(
+        rfdetr_common,
+        "post_process_single_instance_segmentation_result_to_rle_masks_triton",
+        spy_triton_postprocess,
+    )
+
+    device = torch.device("cpu")
+    bboxes, logits, masks = _single_detection_inputs(device)
+    metadata = _metadata()
+    threshold = 0.4
+    classes_re_mapping = _class_mapping(device)
+    if case == "no_class_mapping":
+        classes_re_mapping = None
+    elif case == "tensor_threshold":
+        threshold = torch.tensor([0.4, 0.4], dtype=torch.float32, device=device)
+    elif case == "padding":
+        metadata = _metadata(padding=(1, 0, 0, 0))
+
+    expected = _expected_classic_result(
+        bboxes=bboxes,
+        logits=logits,
+        masks=masks,
+        metadata=metadata,
+        threshold=threshold,
+        classes_re_mapping=classes_re_mapping,
+    )
+    actual = post_process_instance_segmentation_results_to_rle_masks(
+        bboxes=bboxes.unsqueeze(0),
+        logits=logits.unsqueeze(0),
+        masks=masks.unsqueeze(0),
+        pre_processing_meta=[metadata],
+        threshold=threshold,
+        num_classes=2,
+        classes_re_mapping=classes_re_mapping,
+    )[0]
+
+    assert calls == 1
+    _assert_detections_equal(actual, expected)
+
+
+def test_rfdetr_batched_rle_postprocess_matches_classic_for_mixed_counts_and_metadata(
+    monkeypatch,
+) -> None:
+    monkeypatch.setattr(rfdetr_common, "_TRITON_POSTPROC_ENABLED", False)
+    device = torch.device("cpu")
+    bboxes, logits, masks = _batched_inputs(device)
+    metadata = [
+        _metadata(),
+        _metadata(padding=(1, 0, 0, 0)),
+        _metadata(),
+    ]
+    threshold = 0.4
+    classes_re_mapping = _class_mapping(device)
+
+    actual = post_process_instance_segmentation_results_to_rle_masks(
+        bboxes=bboxes,
+        logits=logits,
+        masks=masks,
+        pre_processing_meta=metadata,
+        threshold=threshold,
+        num_classes=2,
+        classes_re_mapping=classes_re_mapping,
+    )
+
+    _assert_batched_results_match_classic(
+        actual=actual,
+        bboxes=bboxes,
+        logits=logits,
+        masks=masks,
+        metadata=metadata,
+        threshold=threshold,
+        classes_re_mapping=classes_re_mapping,
+    )
+    assert [result.confidence.shape[0] for result in actual] == [2, 0, 1]
+
+
+def test_rfdetr_batched_rle_postprocess_matches_classic_for_tensor_threshold_and_unmapped_classes(
+    monkeypatch,
+) -> None:
+    monkeypatch.setattr(rfdetr_common, "_TRITON_POSTPROC_ENABLED", False)
+    device = torch.device("cpu")
+    bboxes, logits, masks = _batched_inputs(device)
+    logits[2, 0, 1] = 5.0
+    metadata = [
+        _metadata(),
+        _metadata(padding=(1, 0, 0, 0)),
+        _metadata(),
+    ]
+    threshold = torch.tensor([0.4, 0.4], dtype=torch.float32, device=device)
+    classes_re_mapping = ClassesReMapping(
+        remaining_class_ids=torch.tensor([0], dtype=torch.int64, device=device),
+        class_mapping=torch.tensor([0, -1], dtype=torch.int64, device=device),
+    )
+
+    actual = post_process_instance_segmentation_results_to_rle_masks(
+        bboxes=bboxes,
+        logits=logits,
+        masks=masks,
+        pre_processing_meta=metadata,
+        threshold=threshold,
+        num_classes=2,
+        classes_re_mapping=classes_re_mapping,
+    )
+
+    _assert_batched_results_match_classic(
+        actual=actual,
+        bboxes=bboxes,
+        logits=logits,
+        masks=masks,
+        metadata=metadata,
+        threshold=threshold,
+        classes_re_mapping=classes_re_mapping,
+    )
+    assert [result.confidence.shape[0] for result in actual] == [1, 0, 1]
+    assert all(result.class_id.tolist() == [0] for result in (actual[0], actual[2]))
+
+
+def test_rfdetr_triton_postproc_interpolation_weight_cache_is_bounded() -> None:
+    _INTERPOLATION_WEIGHT_CACHE.clear()
+    try:
+        for output_size in range(8, 8 + _MAX_INTERPOLATION_WEIGHT_CACHE_ENTRIES + 3):
+            _get_interpolation_weights(
+                src_size=8,
+                output_size=output_size,
+                device=torch.device("cpu"),
+                axis="height",
+            )
+
+        assert (
+            len(_INTERPOLATION_WEIGHT_CACHE) <= _MAX_INTERPOLATION_WEIGHT_CACHE_ENTRIES
+        )
+    finally:
+        _INTERPOLATION_WEIGHT_CACHE.clear()
+
+
+@pytest.mark.skipif(
+    not torch.cuda.is_available() or triton_postprocess.triton is None,
+    reason="CUDA and Triton are required",
+)
+def test_rfdetr_triton_postproc_matches_classic_rle_path() -> None:
+    cpu = torch.device("cpu")
+    cuda = torch.device("cuda")
+    bboxes_cpu, logits_cpu, masks_cpu = _single_detection_inputs(cpu)
+    scores_cpu = torch.sigmoid(logits_cpu)
+    metadata = _metadata()
+    expected = _post_process_single_instance_segmentation_result_to_rle_masks_classic(
+        image_bboxes=bboxes_cpu,
+        image_logits=scores_cpu,
+        image_masks=masks_cpu,
+        image_meta=metadata,
+        threshold=0.4,
+        num_classes=2,
+        classes_re_mapping=_class_mapping(cpu),
+    )
+    cuda_kwargs = {
+        "image_bboxes": bboxes_cpu.to(cuda),
+        "image_scores": scores_cpu.to(cuda),
+        "image_masks": masks_cpu.to(cuda),
+        "image_meta": metadata,
+        "threshold": 0.4,
+        "classes_re_mapping": _class_mapping(cuda),
+    }
+
+    assert _unsupported_triton_postprocess_reason(**cuda_kwargs) is None
+    assert _supports_triton_postprocess_path(**cuda_kwargs)
+    actual = post_process_single_instance_segmentation_result_to_rle_masks_triton(
+        **cuda_kwargs
+    )
+
+    assert actual is not None
+    _assert_detections_equal(actual, expected)

From 036d593703db522c7effff7d5ef14e6fc413436c Mon Sep 17 00:00:00 2001
From: Aseem Saxena <aseem.bits@gmail.com>
Date: Tue, 2 Jun 2026 22:22:54 +0000
Subject: [PATCH 06/76] Keep RF-DETR default RLE path unchanged

---
 .../inference_models/models/rfdetr/common.py  | 134 ++++++++++++++----
 .../models/rfdetr/test_triton_postprocess.py  |  29 ++++
 2 files changed, 133 insertions(+), 30 deletions(-)

diff --git a/inference_models/inference_models/models/rfdetr/common.py b/inference_models/inference_models/models/rfdetr/common.py
index 13e7186597..ad59b9ed76 100644
--- a/inference_models/inference_models/models/rfdetr/common.py
+++ b/inference_models/inference_models/models/rfdetr/common.py
@@ -13,6 +13,7 @@
 )
 from inference_models.models.common.roboflow.post_processing import (
     align_instance_segmentation_results,
+    align_instance_segmentation_results_to_rle_masks,
     align_instance_segmentation_results_to_rle_masks_batch,
     rescale_image_detections,
 )
@@ -355,19 +356,20 @@ def _post_process_single_instance_segmentation_result_to_rle_masks_classic(
         image_meta.pad_bottom,
     )
     selected_boxes_xyxy = selected_boxes_xyxy_pct * denorm_size_whwh
-    aligned_boxes_tensor, rle_masks = (
-        align_instance_segmentation_results_to_rle_masks_batch(
-            image_bboxes=selected_boxes_xyxy,
-            masks=selected_masks,
-            padding=padding,
-            scale_height=image_meta.scale_height,
-            scale_width=image_meta.scale_width,
-            original_size=image_meta.original_size,
-            size_after_pre_processing=image_meta.size_after_pre_processing,
-            inference_size=denorm_size,
-            static_crop_offset=image_meta.static_crop_offset,
-        )
-    )
+    aligned_boxes, rle_masks = [], []
+    for bbox, mask in align_instance_segmentation_results_to_rle_masks(
+        image_bboxes=selected_boxes_xyxy,
+        masks=selected_masks,
+        padding=padding,
+        scale_height=image_meta.scale_height,
+        scale_width=image_meta.scale_width,
+        original_size=image_meta.original_size,
+        size_after_pre_processing=image_meta.size_after_pre_processing,
+        inference_size=denorm_size,
+        static_crop_offset=image_meta.static_crop_offset,
+    ):
+        aligned_boxes.append(bbox)
+        rle_masks.append(mask)
     instances_masks = InstancesRLEMasks.from_coco_rle_masks(
         image_size=(
             image_meta.original_size.height,
@@ -375,6 +377,12 @@ def _post_process_single_instance_segmentation_result_to_rle_masks_classic(
         ),
         masks=rle_masks,
     )
+    if len(aligned_boxes) > 0:
+        aligned_boxes_tensor = torch.stack(aligned_boxes, dim=0)
+    else:
+        aligned_boxes_tensor = torch.empty(
+            (0, 4), dtype=torch.int32, device=image_bboxes.device
+        )
     return InstanceDetections(
         xyxy=aligned_boxes_tensor.round().int(),
         confidence=confidence,
@@ -383,33 +391,18 @@ def _post_process_single_instance_segmentation_result_to_rle_masks_classic(
     )
 
 
-def post_process_instance_segmentation_results_to_rle_masks(
+def _post_process_instance_segmentation_results_to_rle_masks_batched_dense(
     bboxes: torch.Tensor,
-    logits: torch.Tensor,
+    logits_sigmoid: torch.Tensor,
     masks: torch.Tensor,
     pre_processing_meta: List[PreProcessingMetadata],
     threshold: Union[float, torch.Tensor],
     num_classes: int,
     classes_re_mapping: Optional[ClassesReMapping],
 ) -> List[InstanceDetections]:
-    logits_sigmoid = torch.nn.functional.sigmoid(logits)
     batch_size, num_queries, num_logits_classes = logits_sigmoid.shape
     final_results: List[Optional[InstanceDetections]] = [None] * batch_size
     device = bboxes.device
-    if isinstance(threshold, torch.Tensor):
-        threshold = threshold.to(device=device, dtype=logits_sigmoid.dtype)
-    if batch_size == 1:
-        return [
-            _post_process_single_instance_segmentation_result_to_rle_masks(
-                image_bboxes=bboxes[0],
-                image_logits=logits_sigmoid[0],
-                image_masks=masks[0],
-                image_meta=pre_processing_meta[0],
-                threshold=threshold,
-                num_classes=num_classes,
-                classes_re_mapping=classes_re_mapping,
-            )
-        ]
 
     flat_scores = logits_sigmoid.reshape(batch_size, -1)
     confidence, topk_indexes = torch.topk(flat_scores, num_queries, dim=1)
@@ -593,3 +586,84 @@ def post_process_instance_segmentation_results_to_rle_masks(
             )
             offset = next_offset
     return final_results
+
+
+def _post_process_instance_segmentation_results_to_rle_masks_classic(
+    bboxes: torch.Tensor,
+    logits: torch.Tensor,
+    masks: torch.Tensor,
+    pre_processing_meta: List[PreProcessingMetadata],
+    threshold: Union[float, torch.Tensor],
+    num_classes: int,
+    classes_re_mapping: Optional[ClassesReMapping],
+) -> List[InstanceDetections]:
+    logits_sigmoid = torch.nn.functional.sigmoid(logits)
+    device = bboxes.device
+    if isinstance(threshold, torch.Tensor):
+        threshold = threshold.to(device=device, dtype=logits_sigmoid.dtype)
+    return [
+        _post_process_single_instance_segmentation_result_to_rle_masks_classic(
+            image_bboxes=image_bboxes,
+            image_logits=image_logits,
+            image_masks=image_masks,
+            image_meta=image_meta,
+            threshold=threshold,
+            num_classes=num_classes,
+            classes_re_mapping=classes_re_mapping,
+        )
+        for image_bboxes, image_logits, image_masks, image_meta in zip(
+            bboxes,
+            logits_sigmoid,
+            masks,
+            pre_processing_meta,
+        )
+    ]
+
+
+def post_process_instance_segmentation_results_to_rle_masks(
+    bboxes: torch.Tensor,
+    logits: torch.Tensor,
+    masks: torch.Tensor,
+    pre_processing_meta: List[PreProcessingMetadata],
+    threshold: Union[float, torch.Tensor],
+    num_classes: int,
+    classes_re_mapping: Optional[ClassesReMapping],
+) -> List[InstanceDetections]:
+    if not _TRITON_POSTPROC_ENABLED:
+        return _post_process_instance_segmentation_results_to_rle_masks_classic(
+            bboxes=bboxes,
+            logits=logits,
+            masks=masks,
+            pre_processing_meta=pre_processing_meta,
+            threshold=threshold,
+            num_classes=num_classes,
+            classes_re_mapping=classes_re_mapping,
+        )
+
+    logits_sigmoid = torch.nn.functional.sigmoid(logits)
+    batch_size = logits_sigmoid.shape[0]
+    device = bboxes.device
+    if isinstance(threshold, torch.Tensor):
+        threshold = threshold.to(device=device, dtype=logits_sigmoid.dtype)
+    if batch_size == 1:
+        return [
+            _post_process_single_instance_segmentation_result_to_rle_masks(
+                image_bboxes=bboxes[0],
+                image_logits=logits_sigmoid[0],
+                image_masks=masks[0],
+                image_meta=pre_processing_meta[0],
+                threshold=threshold,
+                num_classes=num_classes,
+                classes_re_mapping=classes_re_mapping,
+            )
+        ]
+
+    return _post_process_instance_segmentation_results_to_rle_masks_batched_dense(
+        bboxes=bboxes,
+        logits_sigmoid=logits_sigmoid,
+        masks=masks,
+        pre_processing_meta=pre_processing_meta,
+        threshold=threshold,
+        num_classes=num_classes,
+        classes_re_mapping=classes_re_mapping,
+    )
diff --git a/inference_models/tests/unit_tests/models/rfdetr/test_triton_postprocess.py b/inference_models/tests/unit_tests/models/rfdetr/test_triton_postprocess.py
index 9aedd69cd8..4cf67a4915 100644
--- a/inference_models/tests/unit_tests/models/rfdetr/test_triton_postprocess.py
+++ b/inference_models/tests/unit_tests/models/rfdetr/test_triton_postprocess.py
@@ -241,6 +241,35 @@ def fail_if_called(*args, **kwargs):
     assert results[0].confidence.shape == (1,)
 
 
+def test_rfdetr_triton_postproc_flag_false_bypasses_batched_rle_helper(
+    monkeypatch,
+) -> None:
+    monkeypatch.setattr(rfdetr_common, "_TRITON_POSTPROC_ENABLED", False)
+
+    def fail_if_called(*args, **kwargs):
+        raise AssertionError("Batched RLE helper should be disabled")
+
+    monkeypatch.setattr(
+        rfdetr_common,
+        "align_instance_segmentation_results_to_rle_masks_batch",
+        fail_if_called,
+    )
+
+    device = torch.device("cpu")
+    bboxes, logits, masks = _batched_inputs(device)
+    results = post_process_instance_segmentation_results_to_rle_masks(
+        bboxes=bboxes,
+        logits=logits,
+        masks=masks,
+        pre_processing_meta=[_metadata(), _metadata(), _metadata()],
+        threshold=0.4,
+        num_classes=2,
+        classes_re_mapping=_class_mapping(device),
+    )
+
+    assert [result.confidence.shape[0] for result in results] == [2, 0, 1]
+
+
 def test_rfdetr_triton_postproc_flag_true_uses_triton_result(monkeypatch) -> None:
     monkeypatch.setattr(rfdetr_common, "_TRITON_POSTPROC_ENABLED", True)
     sentinel = object()

From 54d125e41e38b2b9ae92801424abe9e424a6bd6b Mon Sep 17 00:00:00 2001
From: Aseem Saxena <aseem.bits@gmail.com>
Date: Tue, 2 Jun 2026 22:34:46 +0000
Subject: [PATCH 07/76] Remove RF-DETR dense batch RLE postproc path

---
 .../models/common/roboflow/post_processing.py | 122 --------
 .../inference_models/models/rfdetr/common.py  | 263 ++----------------
 .../common/roboflow/test_post_processing.py   | 162 -----------
 .../models/rfdetr/test_triton_postprocess.py  |  58 ++--
 4 files changed, 49 insertions(+), 556 deletions(-)

diff --git a/inference_models/inference_models/models/common/roboflow/post_processing.py b/inference_models/inference_models/models/common/roboflow/post_processing.py
index e820d6a950..a92d4242f8 100644
--- a/inference_models/inference_models/models/common/roboflow/post_processing.py
+++ b/inference_models/inference_models/models/common/roboflow/post_processing.py
@@ -506,128 +506,6 @@ def align_instance_segmentation_results(
     return image_bboxes, masks
 
 
-def align_instance_segmentation_results_to_rle_masks_batch(
-    image_bboxes: torch.Tensor,
-    masks: torch.Tensor,
-    padding: Tuple[int, int, int, int],
-    scale_width: float,
-    scale_height: float,
-    original_size: ImageDimensions,
-    size_after_pre_processing: ImageDimensions,
-    inference_size: ImageDimensions,
-    static_crop_offset: StaticCropOffset,
-    binarization_threshold: float = 0.0,
-) -> Tuple[torch.Tensor, List[dict]]:
-    if image_bboxes.shape[0] == 0:
-        return image_bboxes, []
-
-    pad_left, pad_top, pad_right, pad_bottom = padding
-    offsets = torch.tensor(
-        [pad_left, pad_top, pad_left, pad_top],
-        device=image_bboxes.device,
-    )
-    image_bboxes[:, :4].sub_(offsets)
-    scale = torch.as_tensor(
-        [scale_width, scale_height, scale_width, scale_height],
-        dtype=image_bboxes.dtype,
-        device=image_bboxes.device,
-    )
-    image_bboxes[:, :4].div_(scale)
-    n, mh, mw = masks.shape
-    mask_h_scale = mh / inference_size.height
-    mask_w_scale = mw / inference_size.width
-    mask_pad_top, mask_pad_bottom, mask_pad_left, mask_pad_right = (
-        round(mask_h_scale * pad_top),
-        round(mask_h_scale * pad_bottom),
-        round(mask_w_scale * pad_left),
-        round(mask_w_scale * pad_right),
-    )
-    if (
-        mask_pad_top < 0
-        or mask_pad_bottom < 0
-        or mask_pad_left < 0
-        or mask_pad_right < 0
-    ):
-        masks = torch.nn.functional.pad(
-            masks,
-            (
-                abs(min(mask_pad_left, 0)),
-                abs(min(mask_pad_right, 0)),
-                abs(min(mask_pad_top, 0)),
-                abs(min(mask_pad_bottom, 0)),
-            ),
-            "constant",
-            0,
-        )
-        padded_mask_offset_top = max(mask_pad_top, 0)
-        padded_mask_offset_bottom = max(mask_pad_bottom, 0)
-        padded_mask_offset_left = max(mask_pad_left, 0)
-        padded_mask_offset_right = max(mask_pad_right, 0)
-        masks = masks[
-            :,
-            padded_mask_offset_top : masks.shape[1] - padded_mask_offset_bottom,
-            padded_mask_offset_left : masks.shape[2] - padded_mask_offset_right,
-        ]
-    else:
-        masks = masks[
-            :, mask_pad_top : mh - mask_pad_bottom, mask_pad_left : mw - mask_pad_right
-        ]
-    masks = (
-        torch.nn.functional.interpolate(
-            masks[:, None],
-            size=(
-                size_after_pre_processing.height,
-                size_after_pre_processing.width,
-            ),
-            mode="bilinear",
-            align_corners=False,
-            antialias=True,
-        )
-        .squeeze(1)
-        .gt_(binarization_threshold)
-        .to(dtype=torch.bool)
-    )
-    if static_crop_offset.offset_x > 0 or static_crop_offset.offset_y > 0:
-        mask_canvas = torch.zeros(
-            (
-                masks.shape[0],
-                original_size.height,
-                original_size.width,
-            ),
-            dtype=torch.bool,
-            device=masks.device,
-        )
-        mask_canvas[
-            :,
-            static_crop_offset.offset_y : static_crop_offset.offset_y + masks.shape[1],
-            static_crop_offset.offset_x : static_crop_offset.offset_x + masks.shape[2],
-        ] = masks
-        static_crop_offsets = torch.as_tensor(
-            [
-                static_crop_offset.offset_x,
-                static_crop_offset.offset_y,
-                static_crop_offset.offset_x,
-                static_crop_offset.offset_y,
-            ],
-            dtype=image_bboxes.dtype,
-            device=image_bboxes.device,
-        )
-        image_bboxes[:, :4].add_(static_crop_offsets)
-        masks = mask_canvas
-    xyxy_max = torch.as_tensor(
-        [
-            original_size.width,
-            original_size.height,
-            original_size.width,
-            original_size.height,
-        ],
-        dtype=image_bboxes.dtype,
-        device=image_bboxes.device,
-    )
-    image_bboxes[:, :4].clamp_(min=torch.zeros_like(xyxy_max), max=xyxy_max)
-    return image_bboxes, [torch_mask_to_coco_rle(mask) for mask in masks]
-
-
 def align_instance_segmentation_results_to_rle_masks(
     image_bboxes: torch.Tensor,
     masks: torch.Tensor,
diff --git a/inference_models/inference_models/models/rfdetr/common.py b/inference_models/inference_models/models/rfdetr/common.py
index ad59b9ed76..6bc99ff54b 100644
--- a/inference_models/inference_models/models/rfdetr/common.py
+++ b/inference_models/inference_models/models/rfdetr/common.py
@@ -1,4 +1,4 @@
-from typing import List, Optional, Tuple, Union
+from typing import List, Optional, Union
 
 import torch
 from torchvision.transforms import functional
@@ -14,7 +14,6 @@
 from inference_models.models.common.roboflow.post_processing import (
     align_instance_segmentation_results,
     align_instance_segmentation_results_to_rle_masks,
-    align_instance_segmentation_results_to_rle_masks_batch,
     rescale_image_detections,
 )
 from inference_models.models.rfdetr.class_remapping import ClassesReMapping
@@ -52,29 +51,6 @@ def parse_model_type(config_path: str) -> str:
         ) from error
 
 
-def _pre_processing_metadata_key(image_meta: PreProcessingMetadata) -> tuple:
-    denorm_size = image_meta.nonsquare_intermediate_size or image_meta.inference_size
-    static_crop_offset = image_meta.static_crop_offset
-    return (
-        image_meta.pad_left,
-        image_meta.pad_top,
-        image_meta.pad_right,
-        image_meta.pad_bottom,
-        image_meta.scale_width,
-        image_meta.scale_height,
-        image_meta.original_size.height,
-        image_meta.original_size.width,
-        image_meta.size_after_pre_processing.height,
-        image_meta.size_after_pre_processing.width,
-        denorm_size.height,
-        denorm_size.width,
-        static_crop_offset.offset_x,
-        static_crop_offset.offset_y,
-        static_crop_offset.crop_width,
-        static_crop_offset.crop_height,
-    )
-
-
 def post_process_object_detection_results(
     bboxes: torch.Tensor,
     logits: torch.Tensor,
@@ -391,203 +367,6 @@ def _post_process_single_instance_segmentation_result_to_rle_masks_classic(
     )
 
 
-def _post_process_instance_segmentation_results_to_rle_masks_batched_dense(
-    bboxes: torch.Tensor,
-    logits_sigmoid: torch.Tensor,
-    masks: torch.Tensor,
-    pre_processing_meta: List[PreProcessingMetadata],
-    threshold: Union[float, torch.Tensor],
-    num_classes: int,
-    classes_re_mapping: Optional[ClassesReMapping],
-) -> List[InstanceDetections]:
-    batch_size, num_queries, num_logits_classes = logits_sigmoid.shape
-    final_results: List[Optional[InstanceDetections]] = [None] * batch_size
-    device = bboxes.device
-
-    flat_scores = logits_sigmoid.reshape(batch_size, -1)
-    confidence, topk_indexes = torch.topk(flat_scores, num_queries, dim=1)
-    query_indices = topk_indexes // num_logits_classes
-    top_classes = topk_indexes % num_logits_classes
-    image_bboxes = torch.gather(
-        bboxes,
-        dim=1,
-        index=query_indices[:, :, None].expand(-1, -1, bboxes.shape[-1]),
-    )
-    image_masks = torch.gather(
-        masks,
-        dim=1,
-        index=query_indices[:, :, None, None].expand(
-            -1,
-            -1,
-            masks.shape[-2],
-            masks.shape[-1],
-        ),
-    )
-
-    if classes_re_mapping is not None:
-        mapped_classes = torch.full_like(top_classes, -1)
-        mappable_classes = top_classes < classes_re_mapping.class_mapping.shape[0]
-        mapped_classes[mappable_classes] = classes_re_mapping.class_mapping[
-            top_classes[mappable_classes]
-        ]
-        class_mask = mapped_classes >= 0
-        top_classes = mapped_classes
-    else:
-        # drop DETR no-object rows
-        class_mask = top_classes < num_classes
-
-    if isinstance(threshold, torch.Tensor):
-        threshold_indices = top_classes.clamp(
-            min=0,
-            max=threshold.shape[0] - 1,
-        ).long()
-        threshold_values = threshold[threshold_indices]
-        confidence_mask = class_mask & (confidence > threshold_values)
-    else:
-        confidence_mask = class_mask & (confidence > threshold)
-    valid_counts = confidence_mask.sum(dim=1)
-    valid_sorted = torch.zeros_like(confidence_mask)
-    sorted_confidence = torch.zeros_like(confidence)
-    sorted_classes = torch.zeros_like(top_classes)
-    sorted_boxes = torch.zeros_like(image_bboxes)
-    sorted_masks = torch.empty_like(image_masks)
-    for valid_count in torch.unique(valid_counts).tolist():
-        if valid_count == 0:
-            continue
-        image_indices = (valid_counts == valid_count).nonzero(as_tuple=True)[0]
-        group_mask = confidence_mask[image_indices]
-        group_size = image_indices.shape[0]
-        group_confidence = confidence[image_indices][group_mask].reshape(
-            group_size,
-            valid_count,
-        )
-        group_classes = top_classes[image_indices][group_mask].reshape(
-            group_size,
-            valid_count,
-        )
-        group_boxes = image_bboxes[image_indices][group_mask].reshape(
-            group_size,
-            valid_count,
-            image_bboxes.shape[-1],
-        )
-        group_masks = image_masks[image_indices][group_mask].reshape(
-            group_size,
-            valid_count,
-            image_masks.shape[-2],
-            image_masks.shape[-1],
-        )
-        group_confidence, sorted_indices = torch.sort(
-            group_confidence,
-            dim=1,
-            descending=True,
-        )
-        sorted_confidence[image_indices, :valid_count] = group_confidence
-        sorted_classes[image_indices, :valid_count] = torch.gather(
-            group_classes,
-            dim=1,
-            index=sorted_indices,
-        )
-        sorted_boxes[image_indices, :valid_count] = torch.gather(
-            group_boxes,
-            dim=1,
-            index=sorted_indices[:, :, None].expand(-1, -1, group_boxes.shape[-1]),
-        )
-        sorted_masks[image_indices, :valid_count] = torch.gather(
-            group_masks,
-            dim=1,
-            index=sorted_indices[:, :, None, None].expand(
-                -1,
-                -1,
-                group_masks.shape[-2],
-                group_masks.shape[-1],
-            ),
-        )
-        valid_sorted[image_indices, :valid_count] = True
-    confidence = sorted_confidence
-    top_classes = sorted_classes
-    selected_boxes = sorted_boxes
-    selected_masks = sorted_masks
-
-    cxcy = selected_boxes[..., :2]
-    wh = selected_boxes[..., 2:]
-    xy_min = cxcy - 0.5 * wh
-    xy_max = cxcy + 0.5 * wh
-    selected_boxes_xyxy_pct = torch.cat([xy_min, xy_max], dim=-1)
-    denorm_sizes = [
-        image_meta.nonsquare_intermediate_size or image_meta.inference_size
-        for image_meta in pre_processing_meta
-    ]
-    denorm_size_whwh = torch.tensor(
-        [
-            [
-                denorm_size.width,
-                denorm_size.height,
-                denorm_size.width,
-                denorm_size.height,
-            ]
-            for denorm_size in denorm_sizes
-        ],
-        device=device,
-    )
-    selected_boxes_xyxy = selected_boxes_xyxy_pct * denorm_size_whwh[:, None, :]
-
-    metadata_groups = {}
-    for image_index, image_meta in enumerate(pre_processing_meta):
-        metadata_groups.setdefault(
-            _pre_processing_metadata_key(image_meta),
-            [],
-        ).append(image_index)
-
-    for image_indices in metadata_groups.values():
-        image_meta = pre_processing_meta[image_indices[0]]
-        denorm_size = (
-            image_meta.nonsquare_intermediate_size or image_meta.inference_size
-        )
-        padding = (
-            image_meta.pad_left,
-            image_meta.pad_top,
-            image_meta.pad_right,
-            image_meta.pad_bottom,
-        )
-        group_valid = valid_sorted[image_indices]
-        group_counts = group_valid.sum(dim=1).tolist()
-        group_boxes = selected_boxes_xyxy[image_indices][group_valid]
-        group_masks = selected_masks[image_indices][group_valid]
-        group_confidence = confidence[image_indices][group_valid]
-        group_classes = top_classes[image_indices][group_valid]
-        aligned_boxes_tensor, rle_masks = (
-            align_instance_segmentation_results_to_rle_masks_batch(
-                image_bboxes=group_boxes,
-                masks=group_masks,
-                padding=padding,
-                scale_height=image_meta.scale_height,
-                scale_width=image_meta.scale_width,
-                original_size=image_meta.original_size,
-                size_after_pre_processing=image_meta.size_after_pre_processing,
-                inference_size=denorm_size,
-                static_crop_offset=image_meta.static_crop_offset,
-            )
-        )
-        offset = 0
-        for image_index, count in zip(image_indices, group_counts):
-            next_offset = offset + count
-            instances_masks = InstancesRLEMasks.from_coco_rle_masks(
-                image_size=(
-                    image_meta.original_size.height,
-                    image_meta.original_size.width,
-                ),
-                masks=rle_masks[offset:next_offset],
-            )
-            final_results[image_index] = InstanceDetections(
-                xyxy=aligned_boxes_tensor[offset:next_offset].round().int(),
-                confidence=group_confidence[offset:next_offset],
-                class_id=group_classes[offset:next_offset].int(),
-                mask=instances_masks,
-            )
-            offset = next_offset
-    return final_results
-
-
 def _post_process_instance_segmentation_results_to_rle_masks_classic(
     bboxes: torch.Tensor,
     logits: torch.Tensor,
@@ -641,29 +420,23 @@ def post_process_instance_segmentation_results_to_rle_masks(
         )
 
     logits_sigmoid = torch.nn.functional.sigmoid(logits)
-    batch_size = logits_sigmoid.shape[0]
     device = bboxes.device
     if isinstance(threshold, torch.Tensor):
         threshold = threshold.to(device=device, dtype=logits_sigmoid.dtype)
-    if batch_size == 1:
-        return [
-            _post_process_single_instance_segmentation_result_to_rle_masks(
-                image_bboxes=bboxes[0],
-                image_logits=logits_sigmoid[0],
-                image_masks=masks[0],
-                image_meta=pre_processing_meta[0],
-                threshold=threshold,
-                num_classes=num_classes,
-                classes_re_mapping=classes_re_mapping,
-            )
-        ]
-
-    return _post_process_instance_segmentation_results_to_rle_masks_batched_dense(
-        bboxes=bboxes,
-        logits_sigmoid=logits_sigmoid,
-        masks=masks,
-        pre_processing_meta=pre_processing_meta,
-        threshold=threshold,
-        num_classes=num_classes,
-        classes_re_mapping=classes_re_mapping,
-    )
+    return [
+        _post_process_single_instance_segmentation_result_to_rle_masks(
+            image_bboxes=image_bboxes,
+            image_logits=image_logits,
+            image_masks=image_masks,
+            image_meta=image_meta,
+            threshold=threshold,
+            num_classes=num_classes,
+            classes_re_mapping=classes_re_mapping,
+        )
+        for image_bboxes, image_logits, image_masks, image_meta in zip(
+            bboxes,
+            logits_sigmoid,
+            masks,
+            pre_processing_meta,
+        )
+    ]
diff --git a/inference_models/tests/unit_tests/models/common/roboflow/test_post_processing.py b/inference_models/tests/unit_tests/models/common/roboflow/test_post_processing.py
index 885f67f15d..f85404c75b 100644
--- a/inference_models/tests/unit_tests/models/common/roboflow/test_post_processing.py
+++ b/inference_models/tests/unit_tests/models/common/roboflow/test_post_processing.py
@@ -5,10 +5,8 @@
   - NMS helpers: per-class `conf_thresh` tensor path
 """
 
-import numpy as np
 import pytest
 import torch
-from pycocotools import mask as mask_utils
 
 from inference_models.configuration import INFERENCE_MODELS_DEFAULT_CONFIDENCE
 from inference_models.entities import ImageDimensions
@@ -19,8 +17,6 @@
 from inference_models.models.common.roboflow.post_processing import (
     ConfidenceFilter,
     align_instance_segmentation_results,
-    align_instance_segmentation_results_to_rle_masks,
-    align_instance_segmentation_results_to_rle_masks_batch,
     post_process_nms_fused_model_output,
     rescale_image_detections,
     rescale_key_points_detections,
@@ -44,40 +40,6 @@ def _od_output(box_class_conf):
     return out
 
 
-def _decode_rles(rles, height: int, width: int) -> np.ndarray:
-    if not rles:
-        return np.empty((0, height, width), dtype=bool)
-    decoded = mask_utils.decode(rles)
-    if decoded.ndim == 2:
-        decoded = decoded[:, :, None]
-    return decoded.transpose(2, 0, 1).astype(bool)
-
-
-def _rle_alignment_inputs():
-    bboxes = torch.tensor(
-        [
-            [1.0, 2.0, 9.0, 7.0],
-            [-2.0, -1.0, 5.0, 4.0],
-            [4.0, 1.0, 14.0, 12.0],
-        ],
-        dtype=torch.float32,
-    )
-    masks = torch.full((3, 8, 10), -1.0, dtype=torch.float32)
-    masks[0, 2:6, 3:8] = 2.0
-    masks[1, 1:4, 1:5] = 1.0
-    masks[2, 4:7, 5:9] = 3.0
-    return bboxes, masks
-
-
-def _static_crop(offset_x: int, offset_y: int, width: int, height: int):
-    return StaticCropOffset(
-        offset_x=offset_x,
-        offset_y=offset_y,
-        crop_width=width,
-        crop_height=height,
-    )
-
-
 class TestRunNmsForObjectDetection:
     def test_scalar_keeps_all_above_threshold(self) -> None:
         # Three well-separated boxes, three classes, conf 0.7/0.5/0.3.
@@ -481,127 +443,3 @@ def test_clips_box_coords(self) -> None:
         assert out_bboxes[0, 1].item() == pytest.approx(20.0)
         assert out_bboxes[0, 2].item() == pytest.approx(600.0)
         assert out_bboxes[0, 3].item() == pytest.approx(400.0)
-
-
-class TestAlignInstanceSegmentationResultsToRleMasksBatch:
-
-    @pytest.mark.parametrize(
-        "case",
-        [
-            {
-                "padding": (0, 0, 0, 0),
-                "original_size": ImageDimensions(height=8, width=10),
-                "size_after_pre_processing": ImageDimensions(height=8, width=10),
-                "inference_size": ImageDimensions(height=8, width=10),
-                "static_crop_offset": _static_crop(0, 0, 10, 8),
-                "binarization_threshold": 0.0,
-            },
-            {
-                "padding": (1, 1, 1, 0),
-                "original_size": ImageDimensions(height=8, width=10),
-                "size_after_pre_processing": ImageDimensions(height=8, width=10),
-                "inference_size": ImageDimensions(height=8, width=10),
-                "static_crop_offset": _static_crop(0, 0, 10, 8),
-                "binarization_threshold": 0.0,
-            },
-            {
-                "padding": (-1, 0, -1, 0),
-                "original_size": ImageDimensions(height=8, width=10),
-                "size_after_pre_processing": ImageDimensions(height=8, width=10),
-                "inference_size": ImageDimensions(height=8, width=10),
-                "static_crop_offset": _static_crop(0, 0, 10, 8),
-                "binarization_threshold": 0.0,
-            },
-            {
-                "padding": (0, 0, 0, 0),
-                "original_size": ImageDimensions(height=11, width=13),
-                "size_after_pre_processing": ImageDimensions(height=8, width=10),
-                "inference_size": ImageDimensions(height=8, width=10),
-                "static_crop_offset": _static_crop(2, 1, 10, 8),
-                "binarization_threshold": 0.0,
-            },
-            {
-                "padding": (0, 0, 0, 0),
-                "original_size": ImageDimensions(height=8, width=10),
-                "size_after_pre_processing": ImageDimensions(height=8, width=10),
-                "inference_size": ImageDimensions(height=8, width=10),
-                "static_crop_offset": _static_crop(0, 0, 10, 8),
-                "binarization_threshold": 0.5,
-            },
-        ],
-    )
-    def test_batch_matches_generator_path(self, case: dict) -> None:
-        bboxes, masks = _rle_alignment_inputs()
-        batch_boxes = bboxes.clone()
-        generator_boxes = bboxes.clone()
-
-        actual_boxes, actual_rles = (
-            align_instance_segmentation_results_to_rle_masks_batch(
-                image_bboxes=batch_boxes,
-                masks=masks.clone(),
-                scale_width=1.0,
-                scale_height=1.0,
-                **case,
-            )
-        )
-        expected_pairs = list(
-            align_instance_segmentation_results_to_rle_masks(
-                image_bboxes=generator_boxes,
-                masks=masks.clone(),
-                scale_width=1.0,
-                scale_height=1.0,
-                **case,
-            )
-        )
-        expected_boxes = torch.stack([bbox for bbox, _ in expected_pairs])
-        expected_rles = [rle for _, rle in expected_pairs]
-
-        torch.testing.assert_close(actual_boxes, expected_boxes, rtol=0, atol=0)
-        torch.testing.assert_close(batch_boxes, expected_boxes, rtol=0, atol=0)
-        np.testing.assert_array_equal(
-            _decode_rles(
-                actual_rles,
-                case["original_size"].height,
-                case["original_size"].width,
-            ),
-            _decode_rles(
-                expected_rles,
-                case["original_size"].height,
-                case["original_size"].width,
-            ),
-        )
-
-    def test_empty_batch_matches_generator_path(self) -> None:
-        case = {
-            "padding": (0, 0, 0, 0),
-            "original_size": ImageDimensions(height=8, width=10),
-            "size_after_pre_processing": ImageDimensions(height=8, width=10),
-            "inference_size": ImageDimensions(height=8, width=10),
-            "static_crop_offset": _static_crop(0, 0, 10, 8),
-            "binarization_threshold": 0.0,
-        }
-        bboxes = torch.empty((0, 4), dtype=torch.float32)
-        masks = torch.empty((0, 8, 10), dtype=torch.float32)
-
-        actual_boxes, actual_rles = (
-            align_instance_segmentation_results_to_rle_masks_batch(
-                image_bboxes=bboxes.clone(),
-                masks=masks.clone(),
-                scale_width=1.0,
-                scale_height=1.0,
-                **case,
-            )
-        )
-        expected_pairs = list(
-            align_instance_segmentation_results_to_rle_masks(
-                image_bboxes=bboxes.clone(),
-                masks=masks.clone(),
-                scale_width=1.0,
-                scale_height=1.0,
-                **case,
-            )
-        )
-
-        assert actual_boxes.shape == (0, 4)
-        assert actual_rles == []
-        assert expected_pairs == []
diff --git a/inference_models/tests/unit_tests/models/rfdetr/test_triton_postprocess.py b/inference_models/tests/unit_tests/models/rfdetr/test_triton_postprocess.py
index 4cf67a4915..b46ebde104 100644
--- a/inference_models/tests/unit_tests/models/rfdetr/test_triton_postprocess.py
+++ b/inference_models/tests/unit_tests/models/rfdetr/test_triton_postprocess.py
@@ -241,41 +241,43 @@ def fail_if_called(*args, **kwargs):
     assert results[0].confidence.shape == (1,)
 
 
-def test_rfdetr_triton_postproc_flag_false_bypasses_batched_rle_helper(
-    monkeypatch,
-) -> None:
-    monkeypatch.setattr(rfdetr_common, "_TRITON_POSTPROC_ENABLED", False)
+def test_rfdetr_triton_postproc_flag_true_uses_triton_result(monkeypatch) -> None:
+    monkeypatch.setattr(rfdetr_common, "_TRITON_POSTPROC_ENABLED", True)
+    sentinel = object()
 
-    def fail_if_called(*args, **kwargs):
-        raise AssertionError("Batched RLE helper should be disabled")
+    def return_sentinel(*args, **kwargs):
+        return sentinel
 
     monkeypatch.setattr(
         rfdetr_common,
-        "align_instance_segmentation_results_to_rle_masks_batch",
-        fail_if_called,
+        "post_process_single_instance_segmentation_result_to_rle_masks_triton",
+        return_sentinel,
     )
 
-    device = torch.device("cpu")
-    bboxes, logits, masks = _batched_inputs(device)
+    bboxes, logits, masks = _single_detection_inputs(torch.device("cpu"))
     results = post_process_instance_segmentation_results_to_rle_masks(
-        bboxes=bboxes,
-        logits=logits,
-        masks=masks,
-        pre_processing_meta=[_metadata(), _metadata(), _metadata()],
+        bboxes=bboxes.unsqueeze(0),
+        logits=logits.unsqueeze(0),
+        masks=masks.unsqueeze(0),
+        pre_processing_meta=[_metadata()],
         threshold=0.4,
         num_classes=2,
-        classes_re_mapping=_class_mapping(device),
+        classes_re_mapping=_class_mapping(torch.device("cpu")),
     )
 
-    assert [result.confidence.shape[0] for result in results] == [2, 0, 1]
+    assert results == [sentinel]
 
 
-def test_rfdetr_triton_postproc_flag_true_uses_triton_result(monkeypatch) -> None:
+def test_rfdetr_triton_postproc_flag_true_uses_triton_per_image_for_batches(
+    monkeypatch,
+) -> None:
     monkeypatch.setattr(rfdetr_common, "_TRITON_POSTPROC_ENABLED", True)
-    sentinel = object()
+    sentinels = [object(), object(), object()]
+    calls = []
 
-    def return_sentinel(*args, **kwargs):
-        return sentinel
+    def return_sentinel(**kwargs):
+        calls.append(kwargs["image_bboxes"].shape)
+        return sentinels[len(calls) - 1]
 
     monkeypatch.setattr(
         rfdetr_common,
@@ -283,18 +285,20 @@ def return_sentinel(*args, **kwargs):
         return_sentinel,
     )
 
-    bboxes, logits, masks = _single_detection_inputs(torch.device("cpu"))
+    device = torch.device("cpu")
+    bboxes, logits, masks = _batched_inputs(device)
     results = post_process_instance_segmentation_results_to_rle_masks(
-        bboxes=bboxes.unsqueeze(0),
-        logits=logits.unsqueeze(0),
-        masks=masks.unsqueeze(0),
-        pre_processing_meta=[_metadata()],
+        bboxes=bboxes,
+        logits=logits,
+        masks=masks,
+        pre_processing_meta=[_metadata(), _metadata(), _metadata()],
         threshold=0.4,
         num_classes=2,
-        classes_re_mapping=_class_mapping(torch.device("cpu")),
+        classes_re_mapping=_class_mapping(device),
     )
 
-    assert results == [sentinel]
+    assert results == sentinels
+    assert calls == [torch.Size([3, 4]), torch.Size([3, 4]), torch.Size([3, 4])]
 
 
 def test_rfdetr_triton_postproc_reports_triton_unavailable(monkeypatch) -> None:

From 43ee7e65410cc4e18d59f3d2528f8f0010082a76 Mon Sep 17 00:00:00 2001
From: Aseem Saxena <aseem.bits@gmail.com>
Date: Tue, 2 Jun 2026 22:38:17 +0000
Subject: [PATCH 08/76] Remove legacy RF-DETR Triton postproc env flag

---
 inference_models/inference_models/configuration.py | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/inference_models/inference_models/configuration.py b/inference_models/inference_models/configuration.py
index c8673598de..9350627ce1 100644
--- a/inference_models/inference_models/configuration.py
+++ b/inference_models/inference_models/configuration.py
@@ -290,13 +290,9 @@
     default=INFERENCE_MODELS_DEFAULT_CONFIDENCE,
 )
 DEFAULT_INFERENCE_MODELS_RFDETR_TRITON_POSTPROC_ENABLED = False
-_LEGACY_RFDETR_TRITON_POSTPROC = get_boolean_from_env(
-    variable_name="RFDETR_TRITON_POSTPROC",
-    default=DEFAULT_INFERENCE_MODELS_RFDETR_TRITON_POSTPROC_ENABLED,
-)
 INFERENCE_MODELS_RFDETR_TRITON_POSTPROC_ENABLED = get_boolean_from_env(
     variable_name="INFERENCE_MODELS_RFDETR_TRITON_POSTPROC_ENABLED",
-    default=_LEGACY_RFDETR_TRITON_POSTPROC,
+    default=DEFAULT_INFERENCE_MODELS_RFDETR_TRITON_POSTPROC_ENABLED,
 )
 INFERENCE_MODELS_ROBOFLOW_INSTANT_DEFAULT_CONFIDENCE = get_float_from_env(
     variable_name="INFERENCE_MODELS_ROBOFLOW_INSTANT_DEFAULT_CONFIDENCE",

From 2a4bf73fdc89c39f1c67b8757ad43637894a0081 Mon Sep 17 00:00:00 2001
From: Aseem Saxena <aseem.bits@gmail.com>
Date: Tue, 2 Jun 2026 22:55:53 +0000
Subject: [PATCH 09/76] Cover RF-DETR Triton top-k postproc retry

---
 .../models/rfdetr/test_triton_postprocess.py  | 40 +++++++++++++++++++
 1 file changed, 40 insertions(+)

diff --git a/inference_models/tests/unit_tests/models/rfdetr/test_triton_postprocess.py b/inference_models/tests/unit_tests/models/rfdetr/test_triton_postprocess.py
index b46ebde104..5c3362be43 100644
--- a/inference_models/tests/unit_tests/models/rfdetr/test_triton_postprocess.py
+++ b/inference_models/tests/unit_tests/models/rfdetr/test_triton_postprocess.py
@@ -544,3 +544,43 @@ def test_rfdetr_triton_postproc_matches_classic_rle_path() -> None:
 
     assert actual is not None
     _assert_detections_equal(actual, expected)
+
+
+@pytest.mark.skipif(
+    not torch.cuda.is_available() or triton_postprocess.triton is None,
+    reason="CUDA and Triton are required",
+)
+def test_rfdetr_triton_postproc_topk_retry_matches_classic_rle_path() -> None:
+    cpu = torch.device("cpu")
+    cuda = torch.device("cuda")
+    bboxes_cpu, logits_cpu, masks_cpu = _single_detection_inputs(cpu)
+    logits_cpu[0, 0] = 5.0
+    logits_cpu[0, 1] = 4.0
+    scores_cpu = torch.sigmoid(logits_cpu)
+    metadata = _metadata()
+    expected = _post_process_single_instance_segmentation_result_to_rle_masks_classic(
+        image_bboxes=bboxes_cpu,
+        image_logits=scores_cpu,
+        image_masks=masks_cpu,
+        image_meta=metadata,
+        threshold=0.4,
+        num_classes=2,
+        classes_re_mapping=_class_mapping(cpu),
+    )
+    cuda_kwargs = {
+        "image_bboxes": bboxes_cpu.to(cuda),
+        "image_scores": scores_cpu.to(cuda),
+        "image_masks": masks_cpu.to(cuda),
+        "image_meta": metadata,
+        "threshold": 0.4,
+        "classes_re_mapping": _class_mapping(cuda),
+    }
+
+    assert expected.confidence.shape == (2,)
+    assert _unsupported_triton_postprocess_reason(**cuda_kwargs) is None
+    actual = post_process_single_instance_segmentation_result_to_rle_masks_triton(
+        **cuda_kwargs
+    )
+
+    assert actual is not None
+    _assert_detections_equal(actual, expected)

From 0869d789021a38d428eee940209f4f84dc329810 Mon Sep 17 00:00:00 2001
From: Aseem Saxena <aseem.bits@gmail.com>
Date: Tue, 2 Jun 2026 23:04:54 +0000
Subject: [PATCH 10/76] Keep RF-DETR RLE reference postprocess names

---
 .../inference_models/models/rfdetr/common.py  | 67 +++----------------
 .../models/rfdetr/test_triton_postprocess.py  | 30 ++++-----
 2 files changed, 24 insertions(+), 73 deletions(-)

diff --git a/inference_models/inference_models/models/rfdetr/common.py b/inference_models/inference_models/models/rfdetr/common.py
index 6bc99ff54b..d9735465f0 100644
--- a/inference_models/inference_models/models/rfdetr/common.py
+++ b/inference_models/inference_models/models/rfdetr/common.py
@@ -224,7 +224,7 @@ def post_process_instance_segmentation_results(
     return results
 
 
-def _post_process_single_instance_segmentation_result_to_rle_masks(
+def _post_process_single_instance_segmentation_result_to_rle_masks_with_triton(
     image_bboxes: torch.Tensor,
     image_logits: torch.Tensor,
     image_masks: torch.Tensor,
@@ -233,17 +233,6 @@ def _post_process_single_instance_segmentation_result_to_rle_masks(
     num_classes: int,
     classes_re_mapping: Optional[ClassesReMapping],
 ) -> InstanceDetections:
-    if not _TRITON_POSTPROC_ENABLED:
-        return _post_process_single_instance_segmentation_result_to_rle_masks_classic(
-            image_bboxes=image_bboxes,
-            image_logits=image_logits,
-            image_masks=image_masks,
-            image_meta=image_meta,
-            threshold=threshold,
-            num_classes=num_classes,
-            classes_re_mapping=classes_re_mapping,
-        )
-
     triton_result = (
         post_process_single_instance_segmentation_result_to_rle_masks_triton(
             image_bboxes=image_bboxes,
@@ -257,7 +246,7 @@ def _post_process_single_instance_segmentation_result_to_rle_masks(
     if triton_result is not None:
         return triton_result
 
-    return _post_process_single_instance_segmentation_result_to_rle_masks_classic(
+    return _post_process_single_instance_segmentation_result_to_rle_masks(
         image_bboxes=image_bboxes,
         image_logits=image_logits,
         image_masks=image_masks,
@@ -268,7 +257,7 @@ def _post_process_single_instance_segmentation_result_to_rle_masks(
     )
 
 
-def _post_process_single_instance_segmentation_result_to_rle_masks_classic(
+def _post_process_single_instance_segmentation_result_to_rle_masks(
     image_bboxes: torch.Tensor,
     image_logits: torch.Tensor,
     image_masks: torch.Tensor,
@@ -367,38 +356,6 @@ def _post_process_single_instance_segmentation_result_to_rle_masks_classic(
     )
 
 
-def _post_process_instance_segmentation_results_to_rle_masks_classic(
-    bboxes: torch.Tensor,
-    logits: torch.Tensor,
-    masks: torch.Tensor,
-    pre_processing_meta: List[PreProcessingMetadata],
-    threshold: Union[float, torch.Tensor],
-    num_classes: int,
-    classes_re_mapping: Optional[ClassesReMapping],
-) -> List[InstanceDetections]:
-    logits_sigmoid = torch.nn.functional.sigmoid(logits)
-    device = bboxes.device
-    if isinstance(threshold, torch.Tensor):
-        threshold = threshold.to(device=device, dtype=logits_sigmoid.dtype)
-    return [
-        _post_process_single_instance_segmentation_result_to_rle_masks_classic(
-            image_bboxes=image_bboxes,
-            image_logits=image_logits,
-            image_masks=image_masks,
-            image_meta=image_meta,
-            threshold=threshold,
-            num_classes=num_classes,
-            classes_re_mapping=classes_re_mapping,
-        )
-        for image_bboxes, image_logits, image_masks, image_meta in zip(
-            bboxes,
-            logits_sigmoid,
-            masks,
-            pre_processing_meta,
-        )
-    ]
-
-
 def post_process_instance_segmentation_results_to_rle_masks(
     bboxes: torch.Tensor,
     logits: torch.Tensor,
@@ -408,23 +365,17 @@ def post_process_instance_segmentation_results_to_rle_masks(
     num_classes: int,
     classes_re_mapping: Optional[ClassesReMapping],
 ) -> List[InstanceDetections]:
-    if not _TRITON_POSTPROC_ENABLED:
-        return _post_process_instance_segmentation_results_to_rle_masks_classic(
-            bboxes=bboxes,
-            logits=logits,
-            masks=masks,
-            pre_processing_meta=pre_processing_meta,
-            threshold=threshold,
-            num_classes=num_classes,
-            classes_re_mapping=classes_re_mapping,
-        )
-
     logits_sigmoid = torch.nn.functional.sigmoid(logits)
     device = bboxes.device
     if isinstance(threshold, torch.Tensor):
         threshold = threshold.to(device=device, dtype=logits_sigmoid.dtype)
+    post_process_single = (
+        _post_process_single_instance_segmentation_result_to_rle_masks_with_triton
+        if _TRITON_POSTPROC_ENABLED
+        else _post_process_single_instance_segmentation_result_to_rle_masks
+    )
     return [
-        _post_process_single_instance_segmentation_result_to_rle_masks(
+        post_process_single(
             image_bboxes=image_bboxes,
             image_logits=image_logits,
             image_masks=image_masks,
diff --git a/inference_models/tests/unit_tests/models/rfdetr/test_triton_postprocess.py b/inference_models/tests/unit_tests/models/rfdetr/test_triton_postprocess.py
index 5c3362be43..282a5c0ae7 100644
--- a/inference_models/tests/unit_tests/models/rfdetr/test_triton_postprocess.py
+++ b/inference_models/tests/unit_tests/models/rfdetr/test_triton_postprocess.py
@@ -12,7 +12,7 @@
 from inference_models.models.rfdetr import triton_postprocess
 from inference_models.models.rfdetr.class_remapping import ClassesReMapping
 from inference_models.models.rfdetr.common import (
-    _post_process_single_instance_segmentation_result_to_rle_masks_classic,
+    _post_process_single_instance_segmentation_result_to_rle_masks,
     post_process_instance_segmentation_results_to_rle_masks,
 )
 from inference_models.models.rfdetr.triton_postprocess import (
@@ -135,7 +135,7 @@ def _assert_detections_equal(actual, expected) -> None:
     np.testing.assert_array_equal(actual_mask, expected_mask)
 
 
-def _expected_classic_result(
+def _expected_result(
     bboxes: torch.Tensor,
     logits: torch.Tensor,
     masks: torch.Tensor,
@@ -144,7 +144,7 @@ def _expected_classic_result(
     classes_re_mapping,
     num_classes: int = 2,
 ):
-    return _post_process_single_instance_segmentation_result_to_rle_masks_classic(
+    return _post_process_single_instance_segmentation_result_to_rle_masks(
         image_bboxes=bboxes,
         image_logits=torch.sigmoid(logits),
         image_masks=masks,
@@ -190,7 +190,7 @@ def _batched_inputs(device: torch.device):
     return bboxes, logits, masks
 
 
-def _assert_batched_results_match_classic(
+def _assert_batched_results_match_reference(
     actual,
     bboxes: torch.Tensor,
     logits: torch.Tensor,
@@ -202,7 +202,7 @@ def _assert_batched_results_match_classic(
 ) -> None:
     assert len(actual) == bboxes.shape[0]
     for image_index, actual_detections in enumerate(actual):
-        expected = _expected_classic_result(
+        expected = _expected_result(
             bboxes=bboxes[image_index],
             logits=logits[image_index],
             masks=masks[image_index],
@@ -358,7 +358,7 @@ def test_rfdetr_triton_postproc_unsupported_reason_matrix(
 
 
 @pytest.mark.parametrize("case", ["no_class_mapping", "tensor_threshold", "padding"])
-def test_rfdetr_triton_postproc_unsupported_cases_fallback_to_classic(
+def test_rfdetr_triton_postproc_unsupported_cases_use_reference_path(
     monkeypatch,
     case: str,
 ) -> None:
@@ -391,7 +391,7 @@ def spy_triton_postprocess(*args, **kwargs):
     elif case == "padding":
         metadata = _metadata(padding=(1, 0, 0, 0))
 
-    expected = _expected_classic_result(
+    expected = _expected_result(
         bboxes=bboxes,
         logits=logits,
         masks=masks,
@@ -413,7 +413,7 @@ def spy_triton_postprocess(*args, **kwargs):
     _assert_detections_equal(actual, expected)
 
 
-def test_rfdetr_batched_rle_postprocess_matches_classic_for_mixed_counts_and_metadata(
+def test_rfdetr_batched_rle_postprocess_matches_reference_for_mixed_counts_and_metadata(
     monkeypatch,
 ) -> None:
     monkeypatch.setattr(rfdetr_common, "_TRITON_POSTPROC_ENABLED", False)
@@ -437,7 +437,7 @@ def test_rfdetr_batched_rle_postprocess_matches_classic_for_mixed_counts_and_met
         classes_re_mapping=classes_re_mapping,
     )
 
-    _assert_batched_results_match_classic(
+    _assert_batched_results_match_reference(
         actual=actual,
         bboxes=bboxes,
         logits=logits,
@@ -449,7 +449,7 @@ def test_rfdetr_batched_rle_postprocess_matches_classic_for_mixed_counts_and_met
     assert [result.confidence.shape[0] for result in actual] == [2, 0, 1]
 
 
-def test_rfdetr_batched_rle_postprocess_matches_classic_for_tensor_threshold_and_unmapped_classes(
+def test_rfdetr_batched_rle_postprocess_matches_reference_for_tensor_threshold_and_unmapped_classes(
     monkeypatch,
 ) -> None:
     monkeypatch.setattr(rfdetr_common, "_TRITON_POSTPROC_ENABLED", False)
@@ -477,7 +477,7 @@ def test_rfdetr_batched_rle_postprocess_matches_classic_for_tensor_threshold_and
         classes_re_mapping=classes_re_mapping,
     )
 
-    _assert_batched_results_match_classic(
+    _assert_batched_results_match_reference(
         actual=actual,
         bboxes=bboxes,
         logits=logits,
@@ -512,13 +512,13 @@ def test_rfdetr_triton_postproc_interpolation_weight_cache_is_bounded() -> None:
     not torch.cuda.is_available() or triton_postprocess.triton is None,
     reason="CUDA and Triton are required",
 )
-def test_rfdetr_triton_postproc_matches_classic_rle_path() -> None:
+def test_rfdetr_triton_postproc_matches_reference_rle_path() -> None:
     cpu = torch.device("cpu")
     cuda = torch.device("cuda")
     bboxes_cpu, logits_cpu, masks_cpu = _single_detection_inputs(cpu)
     scores_cpu = torch.sigmoid(logits_cpu)
     metadata = _metadata()
-    expected = _post_process_single_instance_segmentation_result_to_rle_masks_classic(
+    expected = _post_process_single_instance_segmentation_result_to_rle_masks(
         image_bboxes=bboxes_cpu,
         image_logits=scores_cpu,
         image_masks=masks_cpu,
@@ -550,7 +550,7 @@ def test_rfdetr_triton_postproc_matches_classic_rle_path() -> None:
     not torch.cuda.is_available() or triton_postprocess.triton is None,
     reason="CUDA and Triton are required",
 )
-def test_rfdetr_triton_postproc_topk_retry_matches_classic_rle_path() -> None:
+def test_rfdetr_triton_postproc_topk_retry_matches_reference_rle_path() -> None:
     cpu = torch.device("cpu")
     cuda = torch.device("cuda")
     bboxes_cpu, logits_cpu, masks_cpu = _single_detection_inputs(cpu)
@@ -558,7 +558,7 @@ def test_rfdetr_triton_postproc_topk_retry_matches_classic_rle_path() -> None:
     logits_cpu[0, 1] = 4.0
     scores_cpu = torch.sigmoid(logits_cpu)
     metadata = _metadata()
-    expected = _post_process_single_instance_segmentation_result_to_rle_masks_classic(
+    expected = _post_process_single_instance_segmentation_result_to_rle_masks(
         image_bboxes=bboxes_cpu,
         image_logits=scores_cpu,
         image_masks=masks_cpu,

From 6d3ccb851f0d8f22ccb9aee77464b95abd126de2 Mon Sep 17 00:00:00 2001
From: Aseem Saxena <aseem.bits@gmail.com>
Date: Tue, 2 Jun 2026 23:29:34 +0000
Subject: [PATCH 11/76] Document RF-DETR Triton postprocess internals

---
 .../models/rfdetr/triton_postprocess.py       | 113 ++++++++++++++++++
 1 file changed, 113 insertions(+)

diff --git a/inference_models/inference_models/models/rfdetr/triton_postprocess.py b/inference_models/inference_models/models/rfdetr/triton_postprocess.py
index 0851cdf824..975ecfcb37 100644
--- a/inference_models/inference_models/models/rfdetr/triton_postprocess.py
+++ b/inference_models/inference_models/models/rfdetr/triton_postprocess.py
@@ -1,3 +1,25 @@
+"""Sparse Triton RF-DETR instance-segmentation post-processing.
+
+The normal PyTorch path upsamples every selected mask to image resolution and
+then immediately converts that dense boolean tensor to COCO RLE. For 1080p
+frames that dense intermediate is the expensive part. This module keeps the
+same RF-DETR selection semantics, but asks Triton to interpolate only the
+active mask region and emit sparse RLE run records directly.
+
+The CUDA side writes two buffers:
+
+* ``metadata``: one fixed-width row per output detection candidate containing
+  active flag, mapped class id, score, clipped xyxy box, source query id, sort
+  key, and debug ROI bounds.
+* ``records``: a flat list of ``(rank, run_start, run_end)`` triples in COCO's
+  column-major order. ``records[0, 0]`` is the run count and ``records[0, 1]``
+  is an overflow / retry flag.
+
+CPU code then performs only the small ordered assembly step: copy metadata and
+run records back, sort detections by score, convert each candidate's runs into
+compressed COCO RLE counts, and wrap them in ``InstanceDetections``.
+"""
+
 from collections import OrderedDict
 from threading import Lock
 from typing import List, Optional, Tuple, Union
@@ -22,8 +44,15 @@
 
 
 _HEADER_SIZE = 16
+# One Triton program scans this many output rows per column tile. Keeping this
+# bounded avoids materializing a full HxW mask while still amortizing per-tile
+# interpolation setup.
 _BLOCK_ROI_H = 512
+# RLE flat positions are stored as int32 and converted exactly through fp32
+# metadata fields. Keep H*W below the fp32 exact-integer range.
 _MAX_EXACT_FLAT_INDEX = 1 << 24
+# The sparse RLE kernel processes columns in bands. The common case has small
+# active mask bounds, but the while loop below can advance through wider ROIs.
 _SPARSE_MAX_ROI_WIDTH = 512
 _SPARSE_BLOCK_COLS = 8
 _SPARSE_MAX_TOTAL_RUNS = 8192
@@ -40,6 +69,14 @@ def _get_interpolation_weights(
     device: torch.device,
     axis: str,
 ) -> Tuple[torch.Tensor, torch.Tensor]:
+    """Return sparse two-tap bilinear interpolation tables for one axis.
+
+    The Triton RLE kernel needs to reproduce ``torchvision.functional.resize``
+    with bilinear antialiasing, but it cannot call PyTorch's resize from inside
+    a kernel. We build the interpolation matrix once by resizing an identity
+    basis, keep only the non-zero source index and weight pairs for each output
+    coordinate, and cache those small tables per device/shape/axis.
+    """
     device_key = _interpolation_cache_key(src_size, output_size, device, axis)
     with _INTERPOLATION_WEIGHT_CACHE_LOCK:
         cached = _INTERPOLATION_WEIGHT_CACHE.get(device_key)
@@ -47,6 +84,9 @@ def _get_interpolation_weights(
             _INTERPOLATION_WEIGHT_CACHE.move_to_end(device_key)
             return cached
 
+    # Resize an identity basis so PyTorch gives us exactly the interpolation
+    # coefficients used by the reference path. This keeps the Triton path tied
+    # to PyTorch semantics instead of maintaining a second resize formula.
     if axis == "height":
         basis = torch.eye(src_size, device=device).reshape(src_size, 1, src_size, 1)
         weights = F.interpolate(
@@ -100,6 +140,12 @@ def _interpolation_cache_key(
     device: torch.device,
     axis: str,
 ) -> Tuple[str, int, int, int, int, str]:
+    """Build the LRU key for interpolation tables.
+
+    CUDA tensors may have ``device.index is None`` when they refer to the
+    current device, so the key also includes ``torch.cuda.current_device()`` to
+    avoid reusing tables across devices in multi-GPU processes.
+    """
     return (
         device.type,
         -1 if device.index is None else device.index,
@@ -118,6 +164,17 @@ def post_process_single_instance_segmentation_result_to_rle_masks_triton(
     threshold: Union[float, torch.Tensor],
     classes_re_mapping: Optional[ClassesReMapping],
 ) -> Optional[InstanceDetections]:
+    """Run the sparse Triton RF-DETR RLE postprocess path for one image.
+
+    Returns an ``InstanceDetections`` object when the input shape and metadata
+    are supported. Returns ``None`` when the caller should use the reference
+    PyTorch/RLE implementation instead.
+
+    The fast path first emits one candidate per query. If any query has more
+    than one class above threshold, the first pass asks for a retry and the
+    second pass emits up to ``_SPARSE_MAX_CLASSES_PER_QUERY`` query-class
+    candidates per query.
+    """
     unsupported_reason = _unsupported_triton_postprocess_reason(
         image_bboxes=image_bboxes,
         image_scores=image_scores,
@@ -143,6 +200,8 @@ def post_process_single_instance_segmentation_result_to_rle_masks_triton(
     output_width = image_meta.original_size.width
     confidence_threshold = float(threshold)
 
+    # Precompute resize tables outside the hot kernel. The tables are tiny
+    # compared with the full-resolution masks and can be reused across frames.
     y_idx, y_weight = _get_interpolation_weights(
         src_size=mask_height,
         output_size=output_height,
@@ -156,6 +215,8 @@ def post_process_single_instance_segmentation_result_to_rle_masks_triton(
         axis="width",
     )
 
+    # First pass: keep the common case small by selecting only the best class
+    # for each query and emitting sparse RLE runs for those query masks.
     metadata = torch.empty(
         (num_queries, _HEADER_SIZE),
         dtype=torch.float32,
@@ -227,6 +288,10 @@ def post_process_single_instance_segmentation_result_to_rle_masks_triton(
     ):
         return None
 
+    # Retry only when the first pass detected multiple passing classes for a
+    # query. This preserves RF-DETR's flat top-k query-class semantics without
+    # paying the expanded metadata/RLE cost on the usual one-class-per-query
+    # path.
     topk_metadata_rows = num_queries * _SPARSE_MAX_CLASSES_PER_QUERY
     metadata = torch.empty(
         (topk_metadata_rows, _HEADER_SIZE),
@@ -306,6 +371,15 @@ def _instance_detections_from_sparse_records(
     width: int,
     max_detections: Optional[int] = None,
 ) -> Optional[InstanceDetections]:
+    """Convert sparse device records into ``InstanceDetections``.
+
+    ``metadata_host`` is already on CPU because it is small and needed to decide
+    ordering and retry/fallback. ``records`` may still live on CUDA; this helper
+    copies it only after the metadata indicates at least one active candidate.
+
+    ``None`` means the sparse device result is incomplete or overflowed and the
+    caller should retry or fall back to the reference implementation.
+    """
     active_ranks = np.flatnonzero(metadata_host[:, 0] > 0.5)
     if active_ranks.size == 0:
         return InstanceDetections(
@@ -324,6 +398,9 @@ def _instance_detections_from_sparse_records(
     if int(records_host[0, 1]) != 0 or total_runs < 0 or total_runs > max_total_runs:
         return None
 
+    # Match RF-DETR's descending score order. ``metadata[:, 10]`` is the flat
+    # query-class index and gives a deterministic secondary order for equal
+    # scores without touching the mask records.
     order = np.lexsort(
         (
             -metadata_host[active_ranks, 10],
@@ -347,6 +424,8 @@ def _instance_detections_from_sparse_records(
         if rank_records.size:
             starts_array = rank_records[:, 1].astype(np.int64, copy=False)
             ends_array = rank_records[:, 2].astype(np.int64, copy=False)
+            # Atomic writes from different column tiles are not globally
+            # ordered, so sort runs before converting them into COCO counts.
             order = np.argsort(starts_array, kind="stable")
             starts_array = starts_array[order]
             ends_array = ends_array[order]
@@ -378,6 +457,7 @@ def _should_retry_sparse_topk_metadata(
     records: torch.Tensor,
     max_total_runs: int,
 ) -> bool:
+    """Return whether first-pass sparse metadata needs query-class expansion."""
     active_ranks = np.flatnonzero(metadata_host[:, 0] > 0.5)
     if active_ranks.size == 0 or np.any(metadata_host[active_ranks, 8] > 0.5):
         return False
@@ -396,6 +476,7 @@ def _supports_triton_postprocess_path(
     threshold: Union[float, torch.Tensor],
     classes_re_mapping: Optional[ClassesReMapping],
 ) -> bool:
+    """Return ``True`` when the sparse Triton path can represent this input."""
     return (
         _unsupported_triton_postprocess_reason(
             image_bboxes=image_bboxes,
@@ -417,6 +498,7 @@ def _unsupported_triton_postprocess_reason(
     threshold: Union[float, torch.Tensor],
     classes_re_mapping: Optional[ClassesReMapping],
 ) -> Optional[str]:
+    """Explain why the Triton path should not run, or ``None`` when supported."""
     if triton is None:
         return "triton_unavailable"
     if classes_re_mapping is None:
@@ -477,6 +559,7 @@ def _counts_from_runs(
     height: int,
     width: int,
 ) -> List[int]:
+    """Build uncompressed COCO RLE counts from sorted column-major runs."""
     total = height * width
     lengths = ends - starts
     valid = lengths > 0
@@ -487,6 +570,9 @@ def _counts_from_runs(
 
     if starts.size:
         run_count = starts.size
+        # COCO counts alternate background gaps and foreground lengths. Starts
+        # are absolute flat positions; subtract the prior end in-place to get
+        # each background gap.
         gaps = starts.astype(np.int64, copy=True)
         gaps[1:] -= ends[:-1]
         tail = total - int(ends[-1])
@@ -504,6 +590,7 @@ def _counts_from_runs(
 
 
 def _rle_from_counts(counts: List[int], height: int, width: int) -> dict:
+    """Compress uncompressed COCO RLE counts with pycocotools."""
     return mask_utils.frPyObjects(
         {"counts": counts, "size": [height, width]}, height, width
     )
@@ -528,6 +615,7 @@ def _select_best_query_metadata_kernel(
         METADATA_STRIDE: tl.constexpr,
         FLAG_MULTICLASS: tl.constexpr,
     ):
+        """Select the highest-scoring mapped class for one query."""
         rank = tl.program_id(0)
         meta_base = rank * METADATA_STRIDE
         if rank == 0:
@@ -551,6 +639,9 @@ def _select_best_query_metadata_kernel(
         passing_class_count = tl.sum(tl.where(passing_classes, 1, 0), axis=0)
         if FLAG_MULTICLASS and passing_class_count > 1:
             tl.store(records + 1, 1)
+        # Select over valid mapped classes, not just passing classes. The
+        # threshold is applied after selection so inactive metadata rows still
+        # carry a stable class/score shape.
         selected_score = tl.max(tl.where(valid_classes, class_scores, -1.0), axis=0)
         selected_class = tl.max(
             tl.where(
@@ -569,6 +660,8 @@ def _select_best_query_metadata_kernel(
         query_index = rank
         selected_index = rank * num_classes + selected_class
 
+        # Metadata is float32 because Python copies it back as one compact
+        # array; integer-like fields stay below the exact fp32 integer range.
         tl.store(
             metadata + meta_base + 0,
             tl.where(is_valid_detection, 1.0, 0.0),
@@ -638,6 +731,7 @@ def _select_topk_query_class_metadata_kernel(
         FLAG_WRITE_QUERY_METADATA: tl.constexpr,
         FLAG_OVERFLOW_CLASSES: tl.constexpr,
     ):
+        """Emit up to four passing query-class candidates for one query."""
         query_index = tl.program_id(0)
         if query_index == 0:
             tl.store(records + 0, 0)
@@ -664,6 +758,8 @@ def _select_topk_query_class_metadata_kernel(
 
         work_scores = tl.where(passing_classes, class_scores, -1.0)
         for class_rank in tl.static_range(0, 4):
+            # Repeated max-and-mask avoids sorting all classes and keeps the
+            # register footprint bounded by the configured class block.
             selected_score = tl.max(work_scores, axis=0)
             selected_class = tl.max(
                 tl.where(
@@ -736,6 +832,9 @@ def _select_topk_query_class_metadata_kernel(
             tl.store(metadata + meta_base + 5, x2)
             tl.store(metadata + meta_base + 6, y2)
             if FLAG_WRITE_QUERY_METADATA and class_rank == 0:
+                # The pipeline path wants best-query metadata for the RLE
+                # kernel while retaining expanded class metadata for CPU
+                # finalization.
                 query_meta_base = query_index * METADATA_STRIDE
                 tl.store(
                     query_metadata + query_meta_base + 0,
@@ -793,6 +892,7 @@ def _sparse_atomic_rle_from_metadata_kernel(
         METADATA_STRIDE: tl.constexpr,
         BLOCK_COLS: tl.constexpr,
     ):
+        """Interpolate sparse mask ROIs and emit column-major RLE runs."""
         rank = tl.program_id(0)
         tile_x = tl.program_id(1)
         local_x_offsets = tile_x * BLOCK_COLS + tl.arange(0, BLOCK_COLS)
@@ -816,6 +916,9 @@ def _sparse_atomic_rle_from_metadata_kernel(
             other=-1.0,
         )
         positive_source = mask_active & (mask_values > 0.0)
+        # Any output pixel depending only on non-positive source pixels cannot
+        # cross the >0 threshold, so derive the minimal candidate ROI from the
+        # positive source support plus a one-pixel interpolation halo.
         source_y_min = tl.min(tl.where(positive_source, source_y, mask_height), axis=0)
         source_y_max = tl.max(tl.where(positive_source, source_y, -1), axis=0)
         source_x_min = tl.min(tl.where(positive_source, source_x, mask_width), axis=0)
@@ -891,6 +994,8 @@ def _sparse_atomic_rle_from_metadata_kernel(
         roi_width = roi_x_end - roi_x_start
 
         if tile_x == 0:
+            # ROI bounds are diagnostic/fallback metadata; one tile writes them
+            # to avoid redundant stores from every column group.
             tl.store(metadata + meta_base + 11, roi_y_start.to(tl.float32))
             tl.store(metadata + meta_base + 12, roi_y_end.to(tl.float32))
             tl.store(metadata + meta_base + 13, roi_x_start.to(tl.float32))
@@ -931,6 +1036,8 @@ def _sparse_atomic_rle_from_metadata_kernel(
                 other=0.0,
             )
 
+            # Open slots carry a run that began in a prior row tile but has not
+            # ended yet. The slot stores the record index whose end is pending.
             open_slots = tl.full((BLOCK_COLS,), -1, tl.int32)
             y_tile_start = roi_y_start
             while y_tile_start <= roi_y_end:
@@ -983,6 +1090,9 @@ def _sparse_atomic_rle_from_metadata_kernel(
                 ) * x_weight0 + (value01 * y_weight0 + value11 * y_weight1) * x_weight1
                 current_positive = active & (current_values > 0.0)
 
+                # Starts/ends are transitions along a COCO column-major scan:
+                # current positive after previous background starts a run;
+                # previous positive followed by current background ends it.
                 previous_y = output_y - 1
                 previous_active = boundary_active & (row_y[:, None] > roi_y_start)
                 previous_y_base = previous_y * 2
@@ -1070,6 +1180,9 @@ def _sparse_atomic_rle_from_metadata_kernel(
                     )
                     open_at_start = open_slot >= 0
                     open_at_start_i = tl.where(open_at_start, 1, 0).to(tl.int32)
+                    # Reserve a contiguous span for this column's new starts.
+                    # Atomic ordering between columns is irrelevant because CPU
+                    # sorts records by flat start before building COCO counts.
                     col_base = tl.atomic_add(
                         records + 0,
                         col_start_count,

From 8b74db87352439849cfcb16edcdff703792a2d4a Mon Sep 17 00:00:00 2001
From: Aseem Saxena <aseem.bits@gmail.com>
Date: Tue, 2 Jun 2026 23:35:40 +0000
Subject: [PATCH 12/76] Remove dead RF-DETR top-k query metadata path

---
 .../models/rfdetr/triton_postprocess.py       | 37 -------------------
 1 file changed, 37 deletions(-)

diff --git a/inference_models/inference_models/models/rfdetr/triton_postprocess.py b/inference_models/inference_models/models/rfdetr/triton_postprocess.py
index 975ecfcb37..c13922c8e9 100644
--- a/inference_models/inference_models/models/rfdetr/triton_postprocess.py
+++ b/inference_models/inference_models/models/rfdetr/triton_postprocess.py
@@ -308,7 +308,6 @@ def post_process_single_instance_segmentation_result_to_rle_masks_triton(
         image_bboxes,
         class_mapping,
         metadata,
-        metadata,
         records,
         confidence_threshold,
         num_queries,
@@ -319,7 +318,6 @@ def post_process_single_instance_segmentation_result_to_rle_masks_triton(
         BLOCK_CLASSES=triton.next_power_of_2(num_classes),
         METADATA_STRIDE=_HEADER_SIZE,
         MAX_CLASSES_PER_QUERY=_SPARSE_MAX_CLASSES_PER_QUERY,
-        FLAG_WRITE_QUERY_METADATA=False,
         FLAG_OVERFLOW_CLASSES=True,
     )
     _sparse_atomic_rle_from_metadata_kernel[
@@ -717,7 +715,6 @@ def _select_topk_query_class_metadata_kernel(
         bboxes,
         class_mapping,
         metadata,
-        query_metadata,
         records,
         threshold: tl.constexpr,
         num_queries: tl.constexpr,
@@ -728,7 +725,6 @@ def _select_topk_query_class_metadata_kernel(
         BLOCK_CLASSES: tl.constexpr,
         METADATA_STRIDE: tl.constexpr,
         MAX_CLASSES_PER_QUERY: tl.constexpr,
-        FLAG_WRITE_QUERY_METADATA: tl.constexpr,
         FLAG_OVERFLOW_CLASSES: tl.constexpr,
     ):
         """Emit up to four passing query-class candidates for one query."""
@@ -831,39 +827,6 @@ def _select_topk_query_class_metadata_kernel(
             tl.store(metadata + meta_base + 4, y1)
             tl.store(metadata + meta_base + 5, x2)
             tl.store(metadata + meta_base + 6, y2)
-            if FLAG_WRITE_QUERY_METADATA and class_rank == 0:
-                # The pipeline path wants best-query metadata for the RLE
-                # kernel while retaining expanded class metadata for CPU
-                # finalization.
-                query_meta_base = query_index * METADATA_STRIDE
-                tl.store(
-                    query_metadata + query_meta_base + 0,
-                    tl.where(is_valid_detection, 1.0, 0.0),
-                )
-                tl.store(
-                    query_metadata + query_meta_base + 1, mapped_class.to(tl.float32)
-                )
-                tl.store(
-                    query_metadata + query_meta_base + 2,
-                    tl.where(is_valid_detection, selected_score, 0.0),
-                )
-                tl.store(query_metadata + query_meta_base + 3, x1)
-                tl.store(query_metadata + query_meta_base + 4, y1)
-                tl.store(query_metadata + query_meta_base + 5, x2)
-                tl.store(query_metadata + query_meta_base + 6, y2)
-                tl.store(query_metadata + query_meta_base + 7, 0.0)
-                tl.store(query_metadata + query_meta_base + 8, 0.0)
-                tl.store(
-                    query_metadata + query_meta_base + 9, query_index.to(tl.float32)
-                )
-                tl.store(
-                    query_metadata + query_meta_base + 10, selected_index.to(tl.float32)
-                )
-                tl.store(query_metadata + query_meta_base + 11, 0.0)
-                tl.store(query_metadata + query_meta_base + 12, 0.0)
-                tl.store(query_metadata + query_meta_base + 13, 0.0)
-                tl.store(query_metadata + query_meta_base + 14, 0.0)
-                tl.store(query_metadata + query_meta_base + 15, 0.0)
             work_scores = tl.where(class_offsets == selected_class, -1.0, work_scores)
 
     @triton.jit

From ff3e2fb493dfe08098299f8cd196d9a5e5d3bf83 Mon Sep 17 00:00:00 2001
From: Aseem Saxena <aseem.bits@gmail.com>
Date: Tue, 2 Jun 2026 23:49:19 +0000
Subject: [PATCH 13/76] Document RF-DETR Triton kernel contracts

---
 .../models/rfdetr/triton_postprocess.py       | 137 +++++++++++++++++-
 1 file changed, 134 insertions(+), 3 deletions(-)

diff --git a/inference_models/inference_models/models/rfdetr/triton_postprocess.py b/inference_models/inference_models/models/rfdetr/triton_postprocess.py
index c13922c8e9..3e2d38a539 100644
--- a/inference_models/inference_models/models/rfdetr/triton_postprocess.py
+++ b/inference_models/inference_models/models/rfdetr/triton_postprocess.py
@@ -613,7 +613,46 @@ def _select_best_query_metadata_kernel(
         METADATA_STRIDE: tl.constexpr,
         FLAG_MULTICLASS: tl.constexpr,
     ):
-        """Select the highest-scoring mapped class for one query."""
+        """Select one query-level detection row from RF-DETR scores.
+
+        Launch grid:
+            ``(num_queries,)``. Program id 0 is also responsible for clearing
+            the two-word ``records`` header before the RLE kernel runs.
+
+        Args:
+            scores: CUDA tensor with shape ``[num_queries, num_classes]`` and
+                dtype float32. Values are sigmoid class probabilities.
+            bboxes: CUDA tensor with shape ``[num_queries, 4]`` and dtype
+                float32. Boxes are normalized ``cx, cy, width, height`` values.
+            class_mapping: CUDA int tensor with at least ``num_classes`` values.
+                ``class_mapping[class_id]`` is the public class id; negative
+                entries mark model classes that should be ignored.
+            metadata: CUDA float32 tensor with shape
+                ``[num_queries, METADATA_STRIDE]``. The kernel writes columns
+                ``0`` active flag, ``1`` mapped class id, ``2`` score, ``3:7``
+                clipped xyxy pixel box, ``8`` unsupported flag, ``9`` source
+                query id, ``10`` flat query-class sort key, and ``11:15`` zeroed
+                ROI/debug fields later filled by the RLE kernel.
+            records: CUDA int32 tensor with shape ``[MAX_TOTAL_RUNS + 1, 3]``.
+                Only ``records[0, 0]`` and ``records[0, 1]`` are touched here:
+                run count and retry/overflow flag.
+            threshold: Confidence threshold applied after the best valid mapped
+                class is selected.
+            num_queries: Number of RF-DETR object queries, matching
+                ``scores.shape[0]`` and ``bboxes.shape[0]``.
+            num_classes: Number of model class columns in ``scores``.
+            class_mapping_size: Number of entries available in
+                ``class_mapping``.
+            output_height: Original image height used to convert normalized box
+                coordinates to pixel coordinates.
+            output_width: Original image width used to convert normalized box
+                coordinates to pixel coordinates.
+            BLOCK_CLASSES: Power-of-two tile width covering ``num_classes``.
+            METADATA_STRIDE: Number of float32 fields per metadata row.
+            FLAG_MULTICLASS: When true, writes ``records[0, 1] = 1`` if more
+                than one mapped class for this query exceeds ``threshold`` so
+                the caller can rerun the top-k query-class path.
+        """
         rank = tl.program_id(0)
         meta_base = rank * METADATA_STRIDE
         if rank == 0:
@@ -727,7 +766,44 @@ def _select_topk_query_class_metadata_kernel(
         MAX_CLASSES_PER_QUERY: tl.constexpr,
         FLAG_OVERFLOW_CLASSES: tl.constexpr,
     ):
-        """Emit up to four passing query-class candidates for one query."""
+        """Emit top passing query-class metadata rows for one RF-DETR query.
+
+        Launch grid:
+            ``(num_queries,)``. Each program scans all class scores for one
+            query and writes up to ``MAX_CLASSES_PER_QUERY`` rows. The current
+            implementation uses a static loop of four iterations, so
+            ``MAX_CLASSES_PER_QUERY`` is expected to be ``4``.
+
+        Args:
+            scores: CUDA float32 tensor with shape
+                ``[num_queries, num_classes]`` containing sigmoid class scores.
+            bboxes: CUDA float32 tensor with shape ``[num_queries, 4]`` in
+                normalized ``cx, cy, width, height`` format.
+            class_mapping: CUDA int tensor with class remap entries. Negative
+                mapped ids are ignored.
+            metadata: CUDA float32 tensor with shape
+                ``[num_queries * MAX_CLASSES_PER_QUERY, METADATA_STRIDE]``.
+                Row ``query_index * MAX_CLASSES_PER_QUERY + class_rank`` holds
+                the ``class_rank``-th highest passing class for that query.
+                Columns have the same layout as
+                ``_select_best_query_metadata_kernel``.
+            records: CUDA int32 tensor with shape ``[MAX_TOTAL_RUNS + 1, 3]``.
+                Program 0 resets ``records[0, 0]`` and ``records[0, 1]`` before
+                the RLE kernel appends runs.
+            threshold: Minimum class score required for a metadata row to be
+                marked active.
+            num_queries: Number of query rows in ``scores`` and ``bboxes``.
+            num_classes: Number of class columns in ``scores``.
+            class_mapping_size: Number of valid entries in ``class_mapping``.
+            output_height: Original image height used for xyxy box conversion.
+            output_width: Original image width used for xyxy box conversion.
+            BLOCK_CLASSES: Power-of-two tile width covering all class columns.
+            METADATA_STRIDE: Number of float32 fields per metadata row.
+            MAX_CLASSES_PER_QUERY: Number of metadata rows reserved per query.
+            FLAG_OVERFLOW_CLASSES: When true, writes ``records[0, 1] = 1`` if
+                more than ``MAX_CLASSES_PER_QUERY`` classes pass threshold; the
+                caller treats that as unsupported for exact top-k parity.
+        """
         query_index = tl.program_id(0)
         if query_index == 0:
             tl.store(records + 0, 0)
@@ -855,7 +931,62 @@ def _sparse_atomic_rle_from_metadata_kernel(
         METADATA_STRIDE: tl.constexpr,
         BLOCK_COLS: tl.constexpr,
     ):
-        """Interpolate sparse mask ROIs and emit column-major RLE runs."""
+        """Interpolate active mask ROIs and emit COCO-order RLE run records.
+
+        Launch grid:
+            ``(metadata_rows, ceil(MAX_ROI_WIDTH / BLOCK_COLS))``. Program id 0
+            selects a metadata row / output detection rank. Program id 1 selects
+            the starting column tile. If a mask ROI is wider than
+            ``MAX_ROI_WIDTH``, each program advances by ``MAX_ROI_WIDTH`` in a
+            loop so large ROIs are still covered without launching a fallback.
+
+        Args:
+            masks: CUDA float32 tensor with logical shape
+                ``[num_queries, mask_height, mask_width]``. Strides are passed
+                separately because callers may hand in contiguous or view-backed
+                tensors. Values are mask logits already in the RF-DETR mask
+                space; output pixels are positive when bilinear interpolation is
+                greater than zero.
+            y_idx: CUDA int32 tensor with shape ``[output_height, 2]``. For each
+                output row, stores the two source mask rows used by the
+                reference antialiased bilinear resize.
+            y_weight: CUDA float32 tensor with shape ``[output_height, 2]``.
+                Weights matching ``y_idx``.
+            x_idx: CUDA int32 tensor with shape ``[output_width, 2]``. For each
+                output column, stores the two source mask columns used by the
+                reference resize.
+            x_weight: CUDA float32 tensor with shape ``[output_width, 2]``.
+                Weights matching ``x_idx``.
+            metadata: CUDA float32 tensor with shape
+                ``[metadata_rows, METADATA_STRIDE]``. The kernel reads column
+                ``0`` active flag and column ``9`` source query id. It writes
+                columns ``11:15`` with ``roi_y_start, roi_y_end, roi_x_start,
+                roi_x_end`` for diagnostics.
+            records: CUDA int32 tensor with shape ``[MAX_TOTAL_RUNS + 1, 3]``.
+                ``records[0, 0]`` is atomically incremented for every emitted
+                run and ``records[0, 1]`` is set when capacity is exceeded.
+                Data rows are ``(rank, start, end)`` where ``start`` and ``end``
+                are flat COCO/Fortran-order positions ``x * output_height + y``.
+            num_queries: Number of query masks in ``masks``.
+            mask_height: Height of each low-resolution RF-DETR mask.
+            mask_width: Width of each low-resolution RF-DETR mask.
+            output_height: Original image height for the output RLE mask.
+            output_width: Original image width for the output RLE mask.
+            mask_stride_q: Stride between query masks in ``masks``.
+            mask_stride_h: Row stride for ``masks``.
+            mask_stride_w: Column stride for ``masks``.
+            BLOCK_MASK: Power-of-two tile covering ``mask_height * mask_width``
+                so the kernel can find positive source support in one vector.
+            BLOCK_OUT_H: Power-of-two tile covering all output rows.
+            BLOCK_OUT_W: Power-of-two tile covering all output columns.
+            BLOCK_ROI_H: Number of output rows scanned per inner row tile.
+            MAX_ROI_WIDTH: Column band width handled per program before the
+                large-ROI loop advances to the next band.
+            MAX_TOTAL_RUNS: Maximum number of sparse runs that fit in
+                ``records`` excluding the header row.
+            METADATA_STRIDE: Number of float32 fields per metadata row.
+            BLOCK_COLS: Number of output columns scanned together.
+        """
         rank = tl.program_id(0)
         tile_x = tl.program_id(1)
         local_x_offsets = tile_x * BLOCK_COLS + tl.arange(0, BLOCK_COLS)

From 85a3215d6f8f2ddccdca0b3fa3079062d66b6ed1 Mon Sep 17 00:00:00 2001
From: Aseem Saxena <aseem.bits@gmail.com>
Date: Wed, 3 Jun 2026 00:12:32 +0000
Subject: [PATCH 14/76] Restore RF-DETR postproc benchmark harnesses

---
 .../rfdetr_coco_same_shape_parity.py          | 716 ++++++++++++++++++
 .../rfdetr_nano_seg_trt_workflow.py           | 210 +++++
 .../rfdetr_rle_postprocess_microbenchmark.py  | 622 +++++++++++++++
 3 files changed, 1548 insertions(+)
 create mode 100644 development/stream_interface/rfdetr_coco_same_shape_parity.py
 create mode 100644 development/stream_interface/rfdetr_nano_seg_trt_workflow.py
 create mode 100644 development/stream_interface/rfdetr_rle_postprocess_microbenchmark.py

diff --git a/development/stream_interface/rfdetr_coco_same_shape_parity.py b/development/stream_interface/rfdetr_coco_same_shape_parity.py
new file mode 100644
index 0000000000..2ed30eec50
--- /dev/null
+++ b/development/stream_interface/rfdetr_coco_same_shape_parity.py
@@ -0,0 +1,716 @@
+"""Compare RF-DETR instance-segmentation outputs on same-shape COCO images.
+
+This harness is used to reproduce the correctness table in the RF-DETR Triton
+postprocess PR. It runs a baseline git ref with all RF-DETR fast paths disabled
+and a candidate ref with only Triton RLE postprocess enabled, then compares
+detection counts, classes, boxes, scores, and RLE masks.
+
+Example:
+
+    env PARITY_MODEL_PATH=/path/to/rfdetr-seg-nano-orin-trt-package \
+      python development/stream_interface/rfdetr_coco_same_shape_parity.py \
+        --base-ref main \
+        --candidate-ref opt-python-postproc \
+        --height 480 \
+        --width 640 \
+        --image-count 1000
+"""
+
+import argparse
+import os
+import pickle
+import shutil
+import subprocess
+import sys
+import tempfile
+from collections import deque
+from pathlib import Path
+from typing import Deque, Optional
+
+import cv2
+import numpy as np
+
+SCRIPT_REPO_ROOT = Path(__file__).resolve().parents[2]
+SELF = Path(__file__).resolve()
+PY = sys.executable
+MODEL_ID = "rfdetr-seg-nano"
+CONFIDENCE = 0.4
+DEFAULT_BASE_OUT = "/tmp/rfdetr_coco_same_shape_base.pkl"
+DEFAULT_CANDIDATE_OUT = "/tmp/rfdetr_coco_same_shape_candidate.pkl"
+
+BASE_FLAGS_OFF = {
+    "INFERENCE_MODELS_RFDETR_TRITON_POSTPROC_ENABLED": "false",
+    "INFERENCE_MODELS_RFDETR_TRITON_PREPROC_ENABLED": "false",
+    "RFDETR_PIPELINE_DEPTH": "1",
+    "ENABLE_AUTO_CUDA_GRAPHS_FOR_TRT_BACKEND": "false",
+    "RFDETR_NSIGHT_MARKERS": "false",
+}
+CANDIDATE_FLAGS_ON = {
+    "INFERENCE_MODELS_RFDETR_TRITON_POSTPROC_ENABLED": "true",
+    "INFERENCE_MODELS_RFDETR_TRITON_PREPROC_ENABLED": "false",
+    "RFDETR_PIPELINE_DEPTH": "1",
+    "ENABLE_AUTO_CUDA_GRAPHS_FOR_TRT_BACKEND": "false",
+    "RFDETR_NSIGHT_MARKERS": "false",
+}
+TRT_PACKAGE_REQUIRED_FILES = (
+    "model_config.json",
+    "class_names.txt",
+    "inference_config.json",
+    "engine.plan",
+)
+
+
+def _repo_import_roots(repo_root: Path) -> list[Path]:
+    return [repo_root, repo_root / "inference_models"]
+
+
+def _child_pythonpath(repo_root: Path, existing_pythonpath: Optional[str]) -> str:
+    entries = [str(path) for path in _repo_import_roots(repo_root) if path.exists()]
+    if existing_pythonpath:
+        entries.append(existing_pythonpath)
+    return os.pathsep.join(entries)
+
+
+def _prioritize_local_packages(repo_root: Path) -> None:
+    for search_root in reversed(_repo_import_roots(repo_root)):
+        search_root_str = str(search_root)
+        if search_root_str in sys.path:
+            sys.path.remove(search_root_str)
+        if search_root.exists():
+            sys.path.insert(0, search_root_str)
+    for module_name in list(sys.modules):
+        if module_name == "inference" or module_name.startswith("inference."):
+            sys.modules.pop(module_name, None)
+        if module_name == "inference_models" or module_name.startswith(
+            "inference_models."
+        ):
+            sys.modules.pop(module_name, None)
+
+
+def _bootstrap_repo_root(repo_root: str) -> Path:
+    repo_path = Path(repo_root).resolve()
+    os.chdir(repo_path)
+    _prioritize_local_packages(repo_path)
+    return repo_path
+
+
+def _git_output(repo_root: Path, *args: str) -> str:
+    return subprocess.check_output(
+        ["git", *args],
+        cwd=str(repo_root),
+        text=True,
+        stderr=subprocess.DEVNULL,
+    ).strip()
+
+
+def _safe_git_output(repo_root: Path, *args: str, default: str = "<unknown>") -> str:
+    try:
+        return _git_output(repo_root, *args)
+    except subprocess.CalledProcessError:
+        return default
+
+
+def _remove_worktree(worktree_root: Path) -> None:
+    subprocess.run(
+        ["git", "worktree", "remove", "--force", str(worktree_root)],
+        cwd=str(SCRIPT_REPO_ROOT),
+        check=False,
+        stdout=subprocess.DEVNULL,
+        stderr=subprocess.DEVNULL,
+    )
+    shutil.rmtree(worktree_root, ignore_errors=True)
+
+
+def _materialize_target(ref: str) -> dict:
+    if ref.lower() in {"working-tree", "worktree", "current"}:
+        return {
+            "label": (
+                f"{_safe_git_output(SCRIPT_REPO_ROOT, 'rev-parse', '--abbrev-ref', 'HEAD')} "
+                "(working-tree)"
+            ),
+            "repo_root": SCRIPT_REPO_ROOT,
+            "cleanup": None,
+        }
+    worktree_root = Path(tempfile.mkdtemp(prefix="rfdetr-coco-parity-"))
+    subprocess.run(
+        ["git", "worktree", "add", "--detach", str(worktree_root), ref],
+        cwd=str(SCRIPT_REPO_ROOT),
+        check=True,
+    )
+    return {
+        "label": ref,
+        "repo_root": worktree_root,
+        "cleanup": lambda: _remove_worktree(worktree_root),
+    }
+
+
+def _is_trt_package(package_dir: Path) -> bool:
+    return package_dir.is_dir() and all(
+        (package_dir / filename).exists() for filename in TRT_PACKAGE_REQUIRED_FILES
+    )
+
+
+def _resolve_model_reference() -> str:
+    explicit_model_path = os.environ.get("PARITY_MODEL_PATH")
+    if explicit_model_path and _is_trt_package(Path(explicit_model_path)):
+        return str(Path(explicit_model_path).resolve())
+    for root in (SCRIPT_REPO_ROOT, Path.cwd(), Path(tempfile.gettempdir())):
+        for name in (
+            "rfdetr-seg-nano-orin-trt-package",
+            "rfdetr-seg-nano-trt-package",
+        ):
+            package = root / name
+            if _is_trt_package(package):
+                return str(package.resolve())
+    return MODEL_ID
+
+
+def _select_same_shape_images(
+    coco_dir: Path, shape: tuple[int, int], limit: int
+) -> list[str]:
+    target_h, target_w = shape
+    selected = []
+    for path in sorted(coco_dir.glob("*.jpg")):
+        image = cv2.imread(str(path), cv2.IMREAD_COLOR)
+        if image is None:
+            continue
+        h, w = image.shape[:2]
+        if (h, w) == (target_h, target_w):
+            selected.append(str(path.resolve()))
+            if len(selected) >= limit:
+                break
+    if len(selected) < limit:
+        raise RuntimeError(
+            f"Found only {len(selected)} images with shape {(target_h, target_w)}"
+        )
+    return selected
+
+
+def _normalized_rle(rle: dict) -> dict:
+    counts = rle["counts"]
+    if isinstance(counts, bytes):
+        counts = counts.decode("ascii")
+    return {"size": list(rle["size"]), "counts": counts}
+
+
+def _rle_for_coco_iou(rle: dict) -> dict:
+    counts = rle["counts"]
+    if isinstance(counts, str):
+        counts = counts.encode("ascii")
+    return {"size": list(rle["size"]), "counts": counts}
+
+
+def _rles_equal(left: dict, right: dict) -> bool:
+    left_norm = _rle_for_coco_iou(left)
+    right_norm = _rle_for_coco_iou(right)
+    return (
+        left_norm["size"] == right_norm["size"]
+        and left_norm["counts"] == right_norm["counts"]
+    )
+
+
+def _rle_iou(left: dict, right: dict) -> float:
+    from pycocotools import mask as mask_utils
+
+    return float(
+        mask_utils.iou([_rle_for_coco_iou(left)], [_rle_for_coco_iou(right)], [False])[
+            0, 0
+        ]
+    )
+
+
+def _record_from_response(image_path: str, response) -> dict:
+    predictions = response.predictions
+    if not predictions:
+        return {
+            "_kind": "rec",
+            "image_path": image_path,
+            "xyxy": None,
+            "conf": None,
+            "cls": None,
+            "rles": None,
+        }
+
+    xyxy = np.empty((len(predictions), 4), dtype=np.float32)
+    conf = np.empty((len(predictions),), dtype=np.float32)
+    cls = np.empty((len(predictions),), dtype=np.int32)
+    rles = []
+    for idx, pred in enumerate(predictions):
+        x1 = float(pred.x) - float(pred.width) / 2.0
+        y1 = float(pred.y) - float(pred.height) / 2.0
+        x2 = float(pred.x) + float(pred.width) / 2.0
+        y2 = float(pred.y) + float(pred.height) / 2.0
+        xyxy[idx] = (x1, y1, x2, y2)
+        conf[idx] = float(pred.confidence)
+        cls[idx] = int(pred.class_id)
+        rle = getattr(pred, "rle", None)
+        if rle is None:
+            raise ValueError("Expected RLE predictions; got polygon response.")
+        rles.append(_normalized_rle(rle))
+    return {
+        "_kind": "rec",
+        "image_path": image_path,
+        "xyxy": xyxy,
+        "conf": conf,
+        "cls": cls,
+        "rles": rles,
+    }
+
+
+def _run_warmup(model, frame, warmup_frames: int) -> None:
+    for _ in range(warmup_frames):
+        preprocessed, metadata = model.preprocess(
+            frame,
+            confidence=CONFIDENCE,
+            response_mask_format="rle",
+        )
+        prediction_handle = model.predict(
+            preprocessed,
+            confidence=CONFIDENCE,
+            response_mask_format="rle",
+        )
+        model.postprocess(
+            prediction_handle,
+            metadata,
+            confidence=CONFIDENCE,
+            response_mask_format="rle",
+        )
+    if hasattr(model, "flush"):
+        model.flush()
+
+
+def do_run(
+    out_path: str,
+    repo_root: str,
+    label: str,
+    image_list_path: str,
+    warmup_frames: int,
+) -> None:
+    repo_path = _bootstrap_repo_root(repo_root)
+    os.environ.setdefault(
+        "DISABLED_INFERENCE_MODELS_BACKENDS",
+        "torch,torch-script,onnx,hugging-face,ultralytics,custom",
+    )
+    os.environ.setdefault(
+        "ALLOW_INFERENCE_MODELS_DIRECTLY_ACCESS_LOCAL_PACKAGES", "true"
+    )
+
+    import torch
+    from inference.core.models.inference_models_adapters import (
+        InferenceModelsInstanceSegmentationAdapter,
+    )
+
+    with open(image_list_path, "rb") as f:
+        image_paths = pickle.load(f)
+
+    model_reference = _resolve_model_reference()
+    model = InferenceModelsInstanceSegmentationAdapter(model_reference)
+    pipeline_depth = int(getattr(model, "_pipeline_depth", 1))
+    response_delay = max(0, int(getattr(model, "_response_delay", 0)))
+    signature = {
+        "git_head": _safe_git_output(repo_path, "rev-parse", "--short", "HEAD"),
+        "git_describe": _safe_git_output(
+            repo_path, "describe", "--always", "--dirty", "--broken"
+        ),
+    }
+
+    first_frame = cv2.imread(image_paths[0], cv2.IMREAD_COLOR)
+    if first_frame is None:
+        raise RuntimeError(f"Could not read image: {image_paths[0]}")
+
+    print(
+        "[run] "
+        f"label={label} repo_root={repo_path} head={signature['git_head']} "
+        f"model_reference={model_reference} pipeline_depth={pipeline_depth}",
+        flush=True,
+    )
+    _run_warmup(model=model, frame=first_frame, warmup_frames=warmup_frames)
+    if torch.cuda.is_available():
+        torch.cuda.synchronize()
+
+    header = {
+        "_kind": "header",
+        "label": label,
+        "repo_root": str(repo_path),
+        "model_reference": model_reference,
+        "confidence": CONFIDENCE,
+        "git_head": signature["git_head"],
+        "git_describe": signature["git_describe"],
+        "pipeline_depth": pipeline_depth,
+        "response_delay": response_delay,
+        "image_count": len(image_paths),
+        "flags": {
+            key: os.environ.get(key)
+            for key in sorted({*BASE_FLAGS_OFF.keys(), *CANDIDATE_FLAGS_ON.keys()})
+        },
+    }
+
+    pending: Deque[str] = deque()
+    n_records = 0
+    with open(out_path, "wb") as f:
+        pickle.dump(header, f)
+        for index, image_path in enumerate(image_paths):
+            frame = cv2.imread(image_path, cv2.IMREAD_COLOR)
+            if frame is None:
+                raise RuntimeError(f"Could not read image: {image_path}")
+            preprocessed, metadata = model.preprocess(
+                frame,
+                confidence=CONFIDENCE,
+                response_mask_format="rle",
+            )
+            prediction_handle = model.predict(
+                preprocessed,
+                confidence=CONFIDENCE,
+                response_mask_format="rle",
+            )
+            responses = model.postprocess(
+                prediction_handle,
+                metadata,
+                confidence=CONFIDENCE,
+                response_mask_format="rle",
+            )
+
+            if pipeline_depth <= 1:
+                if len(responses) != 1:
+                    raise ValueError(f"image {index}: expected one response")
+                pickle.dump(_record_from_response(image_path, responses[0]), f)
+                n_records += 1
+            else:
+                pending.append(image_path)
+                if len(pending) > response_delay:
+                    if len(responses) != 1:
+                        raise ValueError(f"image {index}: expected one response")
+                    response_image_path = pending.popleft()
+                    pickle.dump(
+                        _record_from_response(response_image_path, responses[0]), f
+                    )
+                    n_records += 1
+            if (index + 1) % 25 == 0:
+                print(f"  [{label}] images={index + 1} records={n_records}", flush=True)
+
+        flush_responses = model.flush() if hasattr(model, "flush") else []
+        for response in flush_responses:
+            if not pending:
+                raise ValueError("flush returned response with no pending image")
+            response_image_path = pending.popleft()
+            pickle.dump(_record_from_response(response_image_path, response), f)
+            n_records += 1
+        if pending:
+            raise ValueError(f"pending images left after flush: {len(pending)}")
+        pickle.dump(
+            {
+                "_kind": "footer",
+                "label": label,
+                "n_records": n_records,
+            },
+            f,
+        )
+    print(f"[run] label={label} records={n_records} saved={out_path}", flush=True)
+
+
+def _iter_pickles(path: str):
+    with open(path, "rb") as f:
+        while True:
+            try:
+                yield pickle.load(f)
+            except EOFError:
+                return
+
+
+def _box_iou(left, right) -> float:
+    x0 = max(left[0], right[0])
+    y0 = max(left[1], right[1])
+    x1 = min(left[2], right[2])
+    y1 = min(left[3], right[3])
+    iw = max(0.0, float(x1 - x0))
+    ih = max(0.0, float(y1 - y0))
+    inter = iw * ih
+    left_area = max(0.0, float(left[2] - left[0])) * max(0.0, float(left[3] - left[1]))
+    right_area = max(0.0, float(right[2] - right[0])) * max(
+        0.0, float(right[3] - right[1])
+    )
+    union = left_area + right_area - inter
+    return inter / union if union > 0 else 0.0
+
+
+def do_compare(base_path: str, candidate_path: str) -> None:
+    base_iter = _iter_pickles(base_path)
+    candidate_iter = _iter_pickles(candidate_path)
+    base_header = next(base_iter)
+    candidate_header = next(candidate_iter)
+
+    n_images = 0
+    tot_base = 0
+    tot_candidate = 0
+    matched = 0
+    count_mismatch = 0
+    class_disagree = 0
+    pixel_identical = 0
+    box_ious = []
+    score_deltas = []
+    mask_ious = []
+    first_mismatches = []
+    base_footer = None
+    candidate_footer = None
+
+    for base_record, candidate_record in zip(base_iter, candidate_iter):
+        if (
+            base_record.get("_kind") == "footer"
+            or candidate_record.get("_kind") == "footer"
+        ):
+            base_footer = base_record
+            candidate_footer = candidate_record
+            break
+        if base_record["image_path"] != candidate_record["image_path"]:
+            raise AssertionError(
+                (base_record["image_path"], candidate_record["image_path"])
+            )
+        n_images += 1
+        n_base = 0 if base_record["xyxy"] is None else len(base_record["xyxy"])
+        n_candidate = (
+            0 if candidate_record["xyxy"] is None else len(candidate_record["xyxy"])
+        )
+        tot_base += n_base
+        tot_candidate += n_candidate
+        if n_base != n_candidate:
+            count_mismatch += 1
+            if len(first_mismatches) < 10:
+                first_mismatches.append(
+                    (Path(base_record["image_path"]).name, n_base, n_candidate)
+                )
+        if n_base == 0 and n_candidate == 0:
+            continue
+
+        base_boxes = base_record["xyxy"] if n_base else np.zeros((0, 4), dtype=float)
+        candidate_boxes = (
+            candidate_record["xyxy"] if n_candidate else np.zeros((0, 4), dtype=float)
+        )
+        base_scores = base_record["conf"] if n_base else np.zeros(0, dtype=float)
+        candidate_scores = (
+            candidate_record["conf"] if n_candidate else np.zeros(0, dtype=float)
+        )
+        base_classes = base_record["cls"] if n_base else np.zeros(0, dtype=np.int32)
+        candidate_classes = (
+            candidate_record["cls"] if n_candidate else np.zeros(0, dtype=np.int32)
+        )
+        base_rles = base_record["rles"] or []
+        candidate_rles = candidate_record["rles"] or []
+
+        used = set()
+        for candidate_idx in range(n_candidate):
+            best_base_idx = -1
+            best_iou = 0.5
+            for base_idx in range(n_base):
+                if base_idx in used:
+                    continue
+                if int(base_classes[base_idx]) != int(candidate_classes[candidate_idx]):
+                    continue
+                box_iou = _box_iou(base_boxes[base_idx], candidate_boxes[candidate_idx])
+                if box_iou > best_iou:
+                    best_iou = box_iou
+                    best_base_idx = base_idx
+            if best_base_idx < 0:
+                continue
+
+            used.add(best_base_idx)
+            matched += 1
+            box_ious.append(best_iou)
+            score_deltas.append(
+                abs(
+                    float(base_scores[best_base_idx])
+                    - float(candidate_scores[candidate_idx])
+                )
+            )
+            if int(base_classes[best_base_idx]) != int(
+                candidate_classes[candidate_idx]
+            ):
+                class_disagree += 1
+            if base_rles and candidate_rles:
+                base_rle = base_rles[best_base_idx]
+                candidate_rle = candidate_rles[candidate_idx]
+                mask_ious.append(_rle_iou(base_rle, candidate_rle))
+                if _rles_equal(base_rle, candidate_rle):
+                    pixel_identical += 1
+
+    if base_footer is None:
+        for obj in base_iter:
+            if obj.get("_kind") == "footer":
+                base_footer = obj
+                break
+    if candidate_footer is None:
+        for obj in candidate_iter:
+            if obj.get("_kind") == "footer":
+                candidate_footer = obj
+                break
+
+    print()
+    print(
+        f"==== COCO same-shape parity: {base_header['label']} vs "
+        f"{candidate_header['label']} ===="
+    )
+    print(f"  base repo                    : {base_header['git_describe']}")
+    print(f"  candidate repo               : {candidate_header['git_describe']}")
+    print(
+        f"  pipeline depth (base/cand)   : "
+        f"{base_header['pipeline_depth']} / {candidate_header['pipeline_depth']}"
+    )
+    print(
+        f"  images base / candidate      : "
+        f"{base_footer['n_records']} / {candidate_footer['n_records']}"
+    )
+    print(f"  detections base / candidate  : {tot_base} / {tot_candidate}")
+    print(
+        f"  matched same-class IoU>0.5   : {matched} "
+        f"({100 * matched / max(1, tot_base):.2f}% of base)"
+    )
+    print(f"  count-mismatch images        : {count_mismatch}")
+    if first_mismatches:
+        print(f"  first count mismatches       : {first_mismatches}")
+    print(f"  class-id disagreements       : {class_disagree}")
+    if box_ious:
+        print(
+            f"  mean / min box IoU           : {np.mean(box_ious):.6f} / {np.min(box_ious):.6f}"
+        )
+    if score_deltas:
+        print(
+            f"  mean / max |delta score|     : "
+            f"{np.mean(score_deltas):.3e} / {np.max(score_deltas):.3e}"
+        )
+    if mask_ious:
+        mask_iou_array = np.array(mask_ious)
+        print(
+            f"  mean / min mask IoU          : "
+            f"{mask_iou_array.mean():.6f} / {mask_iou_array.min():.6f}"
+        )
+        print(f"  pixel-identical masks        : {pixel_identical}/{len(mask_ious)}")
+
+
+def _run_child(
+    repo_root: Path,
+    label: str,
+    out_path: str,
+    image_list_path: str,
+    flags: dict,
+    warmup_frames: int,
+) -> None:
+    env = os.environ.copy()
+    env.update(flags)
+    env["MPLCONFIGDIR"] = "/tmp/mpl"
+    env["PARITY_MODEL_PATH"] = str(
+        (SCRIPT_REPO_ROOT / "rfdetr-seg-nano-orin-trt-package").resolve()
+    )
+    env["PYTHONPATH"] = _child_pythonpath(repo_root, env.get("PYTHONPATH"))
+    args = [
+        PY,
+        str(SELF),
+        "--mode",
+        "run",
+        "--repo-root",
+        str(repo_root),
+        "--label",
+        label,
+        "--out",
+        out_path,
+        "--image-list",
+        image_list_path,
+        "--warmup-frames",
+        str(warmup_frames),
+    ]
+    print(
+        "\n---- child ----\n"
+        f"  label={label}\n"
+        f"  repo_root={repo_root}\n"
+        f"  out={out_path}\n"
+        f"  flags={flags}",
+        flush=True,
+    )
+    subprocess.run(args, cwd=str(SCRIPT_REPO_ROOT), env=env, check=True)
+
+
+def main() -> None:
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--mode", choices=("driver", "run", "compare"), default="driver"
+    )
+    parser.add_argument("--repo-root")
+    parser.add_argument("--label")
+    parser.add_argument("--out")
+    parser.add_argument("--image-list")
+    parser.add_argument("--base", default=DEFAULT_BASE_OUT)
+    parser.add_argument("--candidate", default=DEFAULT_CANDIDATE_OUT)
+    parser.add_argument("--base-ref", default="main")
+    parser.add_argument("--candidate-ref", default="working-tree")
+    parser.add_argument("--coco-dir", default="coco/val2017")
+    parser.add_argument("--height", type=int, default=480)
+    parser.add_argument("--width", type=int, default=640)
+    parser.add_argument("--image-count", type=int, default=100)
+    parser.add_argument("--warmup-frames", type=int, default=10)
+    parser.add_argument("--keep-worktrees", action="store_true")
+    args = parser.parse_args()
+
+    if args.mode == "run":
+        if not args.out or not args.image_list:
+            raise ValueError("--out and --image-list are required in run mode")
+        do_run(
+            out_path=args.out,
+            repo_root=args.repo_root or str(SCRIPT_REPO_ROOT),
+            label=args.label or "run",
+            image_list_path=args.image_list,
+            warmup_frames=args.warmup_frames,
+        )
+        return
+    if args.mode == "compare":
+        do_compare(args.base, args.candidate)
+        return
+
+    coco_dir = (SCRIPT_REPO_ROOT / args.coco_dir).resolve()
+    image_paths = _select_same_shape_images(
+        coco_dir=coco_dir,
+        shape=(args.height, args.width),
+        limit=args.image_count,
+    )
+    image_list_path = Path(
+        tempfile.mkstemp(prefix="rfdetr-coco-images-", suffix=".pkl")[1]
+    )
+    with open(image_list_path, "wb") as f:
+        pickle.dump(image_paths, f)
+    print(
+        f"[driver] selected {len(image_paths)} images with shape "
+        f"{(args.height, args.width)} from {coco_dir}",
+        flush=True,
+    )
+
+    base_target = _materialize_target(args.base_ref)
+    candidate_target = _materialize_target(args.candidate_ref)
+    cleanup_callbacks = [
+        target["cleanup"]
+        for target in (base_target, candidate_target)
+        if callable(target["cleanup"])
+    ]
+    try:
+        _run_child(
+            repo_root=Path(base_target["repo_root"]),
+            label=f"{base_target['label']} flags-off",
+            out_path=args.base,
+            image_list_path=str(image_list_path),
+            flags=BASE_FLAGS_OFF,
+            warmup_frames=args.warmup_frames,
+        )
+        _run_child(
+            repo_root=Path(candidate_target["repo_root"]),
+            label=f"{candidate_target['label']} flags-on",
+            out_path=args.candidate,
+            image_list_path=str(image_list_path),
+            flags=CANDIDATE_FLAGS_ON,
+            warmup_frames=args.warmup_frames,
+        )
+    finally:
+        image_list_path.unlink(missing_ok=True)
+        if not args.keep_worktrees:
+            for cleanup in reversed(cleanup_callbacks):
+                cleanup()
+    do_compare(args.base, args.candidate)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/development/stream_interface/rfdetr_nano_seg_trt_workflow.py b/development/stream_interface/rfdetr_nano_seg_trt_workflow.py
new file mode 100644
index 0000000000..9f209af93f
--- /dev/null
+++ b/development/stream_interface/rfdetr_nano_seg_trt_workflow.py
@@ -0,0 +1,210 @@
+"""Minimal benchmark: RF-DETR instance segmentation through inference-models,
+run via InferencePipeline on a single video source.
+
+Workflow has exactly one block — the segmentation model. No annotators, no
+buffer strategies, no rate limiting.
+
+The `--backend` flag (trt | onnx | torch) is parsed before importing
+`inference` and pins the auto-loader by setting
+`DISABLED_INFERENCE_MODELS_BACKENDS` to every backend except the chosen one,
+so the benchmark numbers correspond unambiguously to a single execution path.
+
+Defaults: rfdetr-seg-nano @ confidence 0.4 on the native TRT backend.
+"""
+
+import argparse
+import importlib.util
+import json
+import os
+from pathlib import Path
+import sys
+
+_ALL_BACKENDS = {
+    "torch",
+    "torch-script",
+    "onnx",
+    "trt",
+    "hugging-face",
+    "ultralytics",
+    "custom",
+}
+_DEFAULT_MODEL_ID = "rfdetr-seg-nano"
+_PREFERRED_LOCAL_TRT_PACKAGE = "rfdetr-seg-nano-orin-trt-package"
+_LOCAL_WORKFLOW_MODEL_ID = f"{_DEFAULT_MODEL_ID}/1"
+_REPO_ROOT = Path(__file__).resolve().parents[2]
+_INFERENCE_MODELS_ROOT = _REPO_ROOT / "inference_models"
+
+
+def _is_local_trt_package(path: Path) -> bool:
+    if not path.is_dir():
+        return False
+    required_files = ("engine.plan", "model_config.json", "inference_config.json")
+    if not all((path / f).is_file() for f in required_files):
+        return False
+    try:
+        model_config = json.loads((path / "model_config.json").read_text())
+    except (OSError, json.JSONDecodeError):
+        return False
+    return model_config.get("backend_type") == "trt"
+
+
+def _find_local_trt_package() -> str | None:
+    preferred = Path.cwd() / _PREFERRED_LOCAL_TRT_PACKAGE
+    if _is_local_trt_package(preferred):
+        return str(preferred.resolve())
+
+    candidates = sorted(
+        path.resolve() for path in Path.cwd().iterdir() if _is_local_trt_package(path)
+    )
+    if len(candidates) == 1:
+        return str(candidates[0])
+    return None
+
+
+def _select_backend_from_argv() -> str:
+    pre = argparse.ArgumentParser(add_help=False)
+    pre.add_argument("--backend", choices=("trt", "onnx", "torch"), default="trt")
+    args, _ = pre.parse_known_args()
+    return args.backend
+
+
+_BACKEND = _select_backend_from_argv()
+_LOCAL_TRT_PACKAGE = _find_local_trt_package() if _BACKEND == "trt" else None
+if _LOCAL_TRT_PACKAGE is not None:
+    os.environ.setdefault(
+        "ALLOW_INFERENCE_MODELS_DIRECTLY_ACCESS_LOCAL_PACKAGES", "True"
+    )
+os.environ.setdefault(
+    "ONNXRUNTIME_EXECUTION_PROVIDERS",
+    "[TensorrtExecutionProvider,CUDAExecutionProvider,CPUExecutionProvider]",
+)
+os.environ["DISABLED_INFERENCE_MODELS_BACKENDS"] = ",".join(
+    sorted(_ALL_BACKENDS - {_BACKEND})
+)
+for path in (str(_INFERENCE_MODELS_ROOT), str(_REPO_ROOT)):
+    if path not in sys.path:
+        sys.path.insert(0, path)
+for module_name in list(sys.modules):
+    if module_name == "inference" or module_name.startswith("inference."):
+        del sys.modules[module_name]
+    if module_name == "inference_models" or module_name.startswith("inference_models."):
+        del sys.modules[module_name]
+
+from time import perf_counter
+
+_LOCAL_INFERENCE_SPEC = importlib.util.spec_from_file_location(
+    "inference",
+    _REPO_ROOT / "inference" / "__init__.py",
+    submodule_search_locations=[str(_REPO_ROOT / "inference")],
+)
+if _LOCAL_INFERENCE_SPEC is None or _LOCAL_INFERENCE_SPEC.loader is None:
+    raise RuntimeError("Could not load local inference package")
+_LOCAL_INFERENCE_MODULE = importlib.util.module_from_spec(_LOCAL_INFERENCE_SPEC)
+sys.modules["inference"] = _LOCAL_INFERENCE_MODULE
+_LOCAL_INFERENCE_SPEC.loader.exec_module(_LOCAL_INFERENCE_MODULE)
+InferencePipeline = _LOCAL_INFERENCE_MODULE.InferencePipeline
+
+
+def _resolve_model_id(model_id: str, backend: str) -> str:
+    if backend == "trt" and model_id == _DEFAULT_MODEL_ID and _LOCAL_TRT_PACKAGE:
+        return _LOCAL_WORKFLOW_MODEL_ID
+    return model_id
+
+
+def _prepare_local_workflow_model_bundle(model_id: str) -> None:
+    if _LOCAL_TRT_PACKAGE is None or model_id != _LOCAL_WORKFLOW_MODEL_ID:
+        return
+
+    model_dir = Path(model_id)
+    model_dir.parent.mkdir(parents=True, exist_ok=True)
+    target_dir = Path(_LOCAL_TRT_PACKAGE)
+    if not model_dir.exists():
+        model_dir.symlink_to(target_dir, target_is_directory=True)
+
+    model_cache_dir = Path(os.environ.get("MODEL_CACHE_DIR", "/tmp/cache")) / model_id
+    model_cache_dir.mkdir(parents=True, exist_ok=True)
+    model_type_path = model_cache_dir / "model_type.json"
+    model_metadata = {
+        "project_task_type": "instance-segmentation",
+        "model_type": "rfdetr-seg-nano",
+    }
+    model_type_path.write_text(json.dumps(model_metadata, indent=4))
+
+
+def build_workflow(model_id: str, confidence: float) -> dict:
+    return {
+        "version": "1.0",
+        "inputs": [{"type": "WorkflowImage", "name": "image"}],
+        "steps": [
+            {
+                "type": "roboflow_core/roboflow_instance_segmentation_model@v3",
+                "name": "segmentation",
+                "images": "$inputs.image",
+                "model_id": model_id,
+                "confidence_mode": "custom",
+                "custom_confidence": confidence,
+            },
+        ],
+        "outputs": [
+            {
+                "type": "JsonField",
+                "name": "predictions",
+                "selector": "$steps.segmentation.predictions",
+            },
+        ],
+    }
+
+
+FRAME_COUNT = 0
+START_TIME = None
+PROGRESS_EVERY = 50
+
+
+def sink(predictions, _video_frames) -> None:
+    global FRAME_COUNT, START_TIME
+    del _video_frames
+    if not isinstance(predictions, list):
+        predictions = [predictions]
+    FRAME_COUNT += sum(p is not None for p in predictions)
+    if START_TIME is None:
+        START_TIME = perf_counter()
+    if FRAME_COUNT % PROGRESS_EVERY == 0:
+        fps = FRAME_COUNT / (perf_counter() - START_TIME)
+        print(f"[progress] frames={FRAME_COUNT} fps={fps:.2f}", flush=True)
+
+
+def main() -> None:
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--video_reference", required=True)
+    parser.add_argument("--model_id", default=_DEFAULT_MODEL_ID)
+    parser.add_argument("--confidence", type=float, default=0.4)
+    parser.add_argument(
+        "--backend",
+        choices=("trt", "onnx", "torch"),
+        default="trt",
+        help="inference-models backend (consumed pre-import via env var).",
+    )
+    args = parser.parse_args()
+    model_id = _resolve_model_id(args.model_id, args.backend)
+    _prepare_local_workflow_model_bundle(model_id)
+    if model_id != args.model_id:
+        print(
+            f"[model] using local TRT package via workflow model id: {model_id}",
+            flush=True,
+        )
+
+    pipeline = InferencePipeline.init_with_workflow(
+        video_reference=args.video_reference,
+        workflow_specification=build_workflow(model_id, args.confidence),
+        on_prediction=sink,
+    )
+    pipeline.start()
+    pipeline.join()
+
+    elapsed = perf_counter() - START_TIME if START_TIME else 0.0
+    fps = FRAME_COUNT / elapsed if elapsed > 0 else 0.0
+    print(f"frames={FRAME_COUNT} elapsed={elapsed:.2f}s fps={fps:.2f}")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/development/stream_interface/rfdetr_rle_postprocess_microbenchmark.py b/development/stream_interface/rfdetr_rle_postprocess_microbenchmark.py
new file mode 100644
index 0000000000..5b939a6994
--- /dev/null
+++ b/development/stream_interface/rfdetr_rle_postprocess_microbenchmark.py
@@ -0,0 +1,622 @@
+"""Capture/replay benchmark for RF-DETR RLE instance-segmentation postprocess.
+
+Default usage captures 100 invocations from the e2e workflow and immediately
+replays them:
+
+    python development/stream_interface/rfdetr_rle_postprocess_microbenchmark.py \
+        --video_reference vehicles_1080p.mp4
+
+Replay-only usage:
+
+    python development/stream_interface/rfdetr_rle_postprocess_microbenchmark.py \
+        --mode replay --cases-dir temp/rfdetr_rle_postprocess_cases
+"""
+
+import argparse
+import functools
+import importlib.util
+import json
+import os
+from pathlib import Path
+import pickle
+import sys
+import threading
+from time import perf_counter, time
+from typing import Any, Dict, List, Optional, Tuple, Union
+
+import torch
+
+_REPO_ROOT = Path(__file__).resolve().parents[2]
+_INFERENCE_MODELS_ROOT = _REPO_ROOT / "inference_models"
+_WORKFLOW_PATH = (
+    _REPO_ROOT / "development" / "stream_interface" / "rfdetr_nano_seg_trt_workflow.py"
+)
+_TARGET_FUNCTION = "post_process_instance_segmentation_results_to_rle_masks"
+_SCHEMA_VERSION = 1
+
+
+def _ensure_local_import_paths() -> None:
+    for path in (str(_INFERENCE_MODELS_ROOT), str(_REPO_ROOT)):
+        if path not in sys.path:
+            sys.path.insert(0, path)
+
+
+def _load_workflow_module() -> Any:
+    spec = importlib.util.spec_from_file_location(
+        "rfdetr_nano_seg_trt_workflow_for_microbenchmark",
+        _WORKFLOW_PATH,
+    )
+    if spec is None or spec.loader is None:
+        raise RuntimeError(f"Could not load workflow module from {_WORKFLOW_PATH}")
+    module = importlib.util.module_from_spec(spec)
+    spec.loader.exec_module(module)
+    return module
+
+
+def _tensor_to_cpu(tensor: torch.Tensor) -> torch.Tensor:
+    return tensor.detach().cpu().clone()
+
+
+def _threshold_to_cpu(
+    threshold: Union[float, torch.Tensor],
+) -> Union[float, torch.Tensor]:
+    if isinstance(threshold, torch.Tensor):
+        return _tensor_to_cpu(threshold)
+    return threshold
+
+
+def _classes_re_mapping_to_cpu(classes_re_mapping: Optional[Any]) -> Optional[dict]:
+    if classes_re_mapping is None:
+        return None
+    return {
+        "remaining_class_ids": _tensor_to_cpu(classes_re_mapping.remaining_class_ids),
+        "class_mapping": _tensor_to_cpu(classes_re_mapping.class_mapping),
+    }
+
+
+def _snapshot_inputs(
+    *,
+    bboxes: torch.Tensor,
+    logits: torch.Tensor,
+    masks: torch.Tensor,
+    pre_processing_meta: List[Any],
+    threshold: Union[float, torch.Tensor],
+    num_classes: int,
+    classes_re_mapping: Optional[Any],
+) -> dict:
+    return {
+        "bboxes": _tensor_to_cpu(bboxes),
+        "logits": _tensor_to_cpu(logits),
+        "masks": _tensor_to_cpu(masks),
+        "pre_processing_meta": pre_processing_meta,
+        "threshold": _threshold_to_cpu(threshold),
+        "num_classes": num_classes,
+        "classes_re_mapping": _classes_re_mapping_to_cpu(classes_re_mapping),
+    }
+
+
+def _snapshot_mask(mask: Any) -> dict:
+    if isinstance(mask, torch.Tensor):
+        return {"kind": "dense", "tensor": _tensor_to_cpu(mask)}
+    return {
+        "kind": "rle",
+        "image_size": tuple(mask.image_size),
+        "masks": list(mask.masks),
+    }
+
+
+def _snapshot_output(output: List[Any]) -> List[dict]:
+    return [
+        {
+            "xyxy": _tensor_to_cpu(detection.xyxy),
+            "class_id": _tensor_to_cpu(detection.class_id),
+            "confidence": _tensor_to_cpu(detection.confidence),
+            "mask": _snapshot_mask(detection.mask),
+            "image_metadata": detection.image_metadata,
+            "bboxes_metadata": detection.bboxes_metadata,
+        }
+        for detection in output
+    ]
+
+
+def _bind_target_arguments(args: tuple, kwargs: dict) -> dict:
+    names = (
+        "bboxes",
+        "logits",
+        "masks",
+        "pre_processing_meta",
+        "threshold",
+        "num_classes",
+        "classes_re_mapping",
+    )
+    bound = dict(zip(names, args))
+    bound.update(kwargs)
+    missing = [name for name in names if name not in bound]
+    if missing:
+        raise RuntimeError(f"Cannot capture target call; missing args: {missing}")
+    return {name: bound[name] for name in names}
+
+
+def _write_pickle(path: Path, payload: dict) -> None:
+    tmp_path = path.with_suffix(path.suffix + ".tmp")
+    with tmp_path.open("wb") as f:
+        pickle.dump(payload, f, protocol=pickle.HIGHEST_PROTOCOL)
+    os.replace(tmp_path, path)
+
+
+class _CaptureState:
+    def __init__(self, cases_dir: Path, limit: int) -> None:
+        self.cases_dir = cases_dir
+        self.limit = limit
+        self.count = 0
+        self.lock = threading.Lock()
+
+    def maybe_save(self, inputs: dict, output: List[Any]) -> None:
+        with self.lock:
+            if self.count >= self.limit:
+                return
+            case_index = self.count
+            payload = {
+                "schema_version": _SCHEMA_VERSION,
+                "case_index": case_index,
+                "inputs": _snapshot_inputs(**inputs),
+                "expected_output": _snapshot_output(output),
+            }
+            _write_pickle(
+                self.cases_dir / f"case_{case_index:04d}.pkl",
+                payload,
+            )
+            self.count += 1
+            if self.count == 1 or self.count % 10 == 0 or self.count == self.limit:
+                print(
+                    f"[capture] saved {self.count}/{self.limit} postprocess calls",
+                    flush=True,
+                )
+
+
+def _install_capture_hook(state: _CaptureState) -> None:
+    _ensure_local_import_paths()
+    from inference_models.models.rfdetr import common as rfdetr_common
+
+    original = getattr(rfdetr_common, _TARGET_FUNCTION)
+
+    @functools.wraps(original)
+    def wrapper(*args: Any, **kwargs: Any) -> List[Any]:
+        result = original(*args, **kwargs)
+        state.maybe_save(inputs=_bind_target_arguments(args, kwargs), output=result)
+        return result
+
+    setattr(rfdetr_common, _TARGET_FUNCTION, wrapper)
+    for module_name in (
+        "inference_models.models.rfdetr.rfdetr_instance_segmentation_trt",
+        "inference_models.models.rfdetr.rfdetr_instance_segmentation_onnx",
+        "inference_models.models.rfdetr.rfdetr_instance_segmentation_pytorch",
+    ):
+        module = sys.modules.get(module_name)
+        if module is not None and hasattr(module, _TARGET_FUNCTION):
+            setattr(module, _TARGET_FUNCTION, wrapper)
+
+
+def _prepare_cases_dir(cases_dir: Path, overwrite: bool) -> None:
+    cases_dir.mkdir(parents=True, exist_ok=True)
+    existing = list(cases_dir.glob("case_*.pkl"))
+    manifest_path = cases_dir / "manifest.json"
+    if not overwrite and (existing or manifest_path.exists()):
+        raise RuntimeError(
+            f"{cases_dir} already contains captured cases; pass --overwrite "
+            "or choose a different --cases-dir."
+        )
+    if overwrite:
+        for path in existing:
+            path.unlink()
+        if manifest_path.exists():
+            manifest_path.unlink()
+
+
+def _write_manifest(cases_dir: Path, payload: dict) -> None:
+    with (cases_dir / "manifest.json").open("w") as f:
+        json.dump(payload, f, indent=2, sort_keys=True)
+        f.write("\n")
+
+
+def _run_capture(args: argparse.Namespace) -> int:
+    cases_dir = args.cases_dir.resolve()
+    _prepare_cases_dir(cases_dir=cases_dir, overwrite=args.overwrite)
+
+    workflow = _load_workflow_module()
+    model_id = workflow._resolve_model_id(args.model_id, args.backend)
+    workflow._prepare_local_workflow_model_bundle(model_id)
+    if model_id != args.model_id:
+        print(
+            f"[model] using local TRT package via workflow model id: {model_id}",
+            flush=True,
+        )
+
+    state = _CaptureState(cases_dir=cases_dir, limit=args.capture_count)
+    _install_capture_hook(state=state)
+
+    frame_count = 0
+    start_time: Optional[float] = None
+    pipeline_ref: Dict[str, Any] = {}
+
+    def sink(predictions: Any, video_frames: Any) -> None:
+        nonlocal frame_count, start_time
+        del video_frames
+        if not isinstance(predictions, list):
+            predictions = [predictions]
+        frame_count += sum(p is not None for p in predictions)
+        if start_time is None:
+            start_time = perf_counter()
+        if frame_count % args.progress_every == 0:
+            elapsed = perf_counter() - start_time
+            fps = frame_count / elapsed if elapsed > 0 else 0.0
+            print(
+                f"[progress] frames={frame_count} fps={fps:.2f} "
+                f"captures={state.count}/{state.limit}",
+                flush=True,
+            )
+        if state.count >= state.limit and "pipeline" in pipeline_ref:
+            pipeline_ref["pipeline"].terminate()
+
+    pipeline = workflow.InferencePipeline.init_with_workflow(
+        video_reference=args.video_reference,
+        workflow_specification=workflow.build_workflow(model_id, args.confidence),
+        on_prediction=sink,
+    )
+    pipeline_ref["pipeline"] = pipeline
+    pipeline.start()
+    pipeline.join()
+
+    if state.count < args.capture_count:
+        raise RuntimeError(
+            f"Captured only {state.count}/{args.capture_count} invocations. "
+            "Use a longer video or lower --capture-count."
+        )
+
+    elapsed = perf_counter() - start_time if start_time else 0.0
+    _write_manifest(
+        cases_dir=cases_dir,
+        payload={
+            "schema_version": _SCHEMA_VERSION,
+            "function": (
+                "inference_models.models.rfdetr.common."
+                "post_process_instance_segmentation_results_to_rle_masks"
+            ),
+            "case_count": state.count,
+            "video_reference": args.video_reference,
+            "backend": args.backend,
+            "model_id": model_id,
+            "confidence": args.confidence,
+            "frames_seen_by_sink": frame_count,
+            "capture_elapsed_seconds": elapsed,
+            "created_at_unix": time(),
+        },
+    )
+    print(f"[capture] wrote {state.count} cases to {cases_dir}", flush=True)
+    return state.count
+
+
+def _resolve_device(device: str) -> torch.device:
+    if device == "auto":
+        return torch.device("cuda" if torch.cuda.is_available() else "cpu")
+    resolved = torch.device(device)
+    if resolved.type == "cuda" and not torch.cuda.is_available():
+        raise RuntimeError("--device cuda requested, but CUDA is not available")
+    return resolved
+
+
+def _to_device(value: Union[float, torch.Tensor], device: torch.device) -> Any:
+    if isinstance(value, torch.Tensor):
+        return value.to(device=device).clone()
+    return value
+
+
+def _classes_re_mapping_to_device(
+    payload: Optional[dict], device: torch.device
+) -> Optional[Any]:
+    if payload is None:
+        return None
+    from inference_models.models.rfdetr.class_remapping import ClassesReMapping
+
+    return ClassesReMapping(
+        remaining_class_ids=payload["remaining_class_ids"].to(device=device).clone(),
+        class_mapping=payload["class_mapping"].to(device=device).clone(),
+    )
+
+
+def _materialize_inputs(case: dict, device: torch.device) -> dict:
+    inputs = case["inputs"]
+    return {
+        "bboxes": inputs["bboxes"].to(device=device).clone(),
+        "logits": inputs["logits"].to(device=device).clone(),
+        "masks": inputs["masks"].to(device=device).clone(),
+        "pre_processing_meta": inputs["pre_processing_meta"],
+        "threshold": _to_device(inputs["threshold"], device=device),
+        "num_classes": inputs["num_classes"],
+        "classes_re_mapping": _classes_re_mapping_to_device(
+            inputs["classes_re_mapping"],
+            device=device,
+        ),
+    }
+
+
+def _synchronize(device: torch.device) -> None:
+    if device.type == "cuda":
+        torch.cuda.synchronize(device)
+
+
+def _load_case(path: Path) -> dict:
+    with path.open("rb") as f:
+        payload = pickle.load(f)
+    if payload.get("schema_version") != _SCHEMA_VERSION:
+        raise RuntimeError(
+            f"{path} has schema_version={payload.get('schema_version')}; "
+            f"expected {_SCHEMA_VERSION}."
+        )
+    return payload
+
+
+def _assert_tensor_equal(
+    *,
+    actual: torch.Tensor,
+    expected: torch.Tensor,
+    label: str,
+    atol: float,
+    rtol: float,
+) -> None:
+    actual_cpu = actual.detach().cpu()
+    if torch.is_floating_point(actual_cpu) and (atol != 0.0 or rtol != 0.0):
+        equal = torch.allclose(actual_cpu, expected, atol=atol, rtol=rtol)
+    else:
+        equal = torch.equal(actual_cpu, expected)
+    if not equal:
+        raise AssertionError(
+            f"{label} differs: actual shape={tuple(actual_cpu.shape)} "
+            f"expected shape={tuple(expected.shape)}"
+        )
+
+
+def _assert_mask_equal(
+    *,
+    actual: Any,
+    expected: dict,
+    label: str,
+    atol: float,
+    rtol: float,
+) -> None:
+    if expected["kind"] == "dense":
+        if not isinstance(actual, torch.Tensor):
+            raise AssertionError(f"{label} expected dense tensor mask")
+        _assert_tensor_equal(
+            actual=actual,
+            expected=expected["tensor"],
+            label=f"{label}.tensor",
+            atol=atol,
+            rtol=rtol,
+        )
+        return
+    if isinstance(actual, torch.Tensor):
+        raise AssertionError(f"{label} expected RLE mask")
+    if tuple(actual.image_size) != tuple(expected["image_size"]):
+        raise AssertionError(
+            f"{label}.image_size differs: {actual.image_size} != "
+            f"{expected['image_size']}"
+        )
+    if list(actual.masks) != list(expected["masks"]):
+        raise AssertionError(f"{label}.masks differ")
+
+
+def _assert_outputs_equal(
+    *,
+    actual: List[Any],
+    expected: List[dict],
+    case_index: int,
+    atol: float,
+    rtol: float,
+) -> None:
+    if len(actual) != len(expected):
+        raise AssertionError(
+            f"case {case_index}: output length differs: "
+            f"{len(actual)} != {len(expected)}"
+        )
+    for output_index, (actual_detection, expected_detection) in enumerate(
+        zip(actual, expected)
+    ):
+        label = f"case {case_index} output {output_index}"
+        _assert_tensor_equal(
+            actual=actual_detection.xyxy,
+            expected=expected_detection["xyxy"],
+            label=f"{label}.xyxy",
+            atol=atol,
+            rtol=rtol,
+        )
+        _assert_tensor_equal(
+            actual=actual_detection.class_id,
+            expected=expected_detection["class_id"],
+            label=f"{label}.class_id",
+            atol=atol,
+            rtol=rtol,
+        )
+        _assert_tensor_equal(
+            actual=actual_detection.confidence,
+            expected=expected_detection["confidence"],
+            label=f"{label}.confidence",
+            atol=atol,
+            rtol=rtol,
+        )
+        _assert_mask_equal(
+            actual=actual_detection.mask,
+            expected=expected_detection["mask"],
+            label=f"{label}.mask",
+            atol=atol,
+            rtol=rtol,
+        )
+        if actual_detection.image_metadata != expected_detection["image_metadata"]:
+            raise AssertionError(f"{label}.image_metadata differs")
+        if actual_detection.bboxes_metadata != expected_detection["bboxes_metadata"]:
+            raise AssertionError(f"{label}.bboxes_metadata differs")
+
+
+def _run_one_replay_case(
+    *,
+    case_path: Path,
+    device: torch.device,
+    atol: float,
+    rtol: float,
+) -> float:
+    from inference_models.models.rfdetr.common import (
+        post_process_instance_segmentation_results_to_rle_masks,
+    )
+
+    case = _load_case(case_path)
+    inputs = _materialize_inputs(case=case, device=device)
+    _synchronize(device)
+    start = perf_counter()
+    actual = post_process_instance_segmentation_results_to_rle_masks(**inputs)
+    _synchronize(device)
+    elapsed = perf_counter() - start
+    _assert_outputs_equal(
+        actual=actual,
+        expected=case["expected_output"],
+        case_index=case["case_index"],
+        atol=atol,
+        rtol=rtol,
+    )
+    return elapsed
+
+
+def _summarize_timings(timings: List[float]) -> dict:
+    sorted_timings = sorted(timings)
+    total = sum(sorted_timings)
+    count = len(sorted_timings)
+
+    def percentile(p: float) -> float:
+        if count == 0:
+            return 0.0
+        index = min(count - 1, int(round((count - 1) * p)))
+        return sorted_timings[index]
+
+    return {
+        "count": count,
+        "total_seconds": total,
+        "mean_ms": (total / count) * 1000 if count else 0.0,
+        "min_ms": sorted_timings[0] * 1000 if count else 0.0,
+        "p50_ms": percentile(0.50) * 1000,
+        "p90_ms": percentile(0.90) * 1000,
+        "p99_ms": percentile(0.99) * 1000,
+        "max_ms": sorted_timings[-1] * 1000 if count else 0.0,
+    }
+
+
+def _print_timing_summary(summary: dict) -> None:
+    print(
+        "[replay] "
+        f"calls={summary['count']} "
+        f"total={summary['total_seconds']:.3f}s "
+        f"mean={summary['mean_ms']:.3f}ms "
+        f"p50={summary['p50_ms']:.3f}ms "
+        f"p90={summary['p90_ms']:.3f}ms "
+        f"p99={summary['p99_ms']:.3f}ms "
+        f"min={summary['min_ms']:.3f}ms "
+        f"max={summary['max_ms']:.3f}ms",
+        flush=True,
+    )
+
+
+def _run_replay(args: argparse.Namespace) -> dict:
+    _ensure_local_import_paths()
+    device = _resolve_device(args.device)
+    cases_dir = args.cases_dir.resolve()
+    case_paths = sorted(cases_dir.glob("case_*.pkl"))
+    if args.max_cases is not None:
+        case_paths = case_paths[: args.max_cases]
+    if not case_paths:
+        raise RuntimeError(f"No case_*.pkl files found in {cases_dir}")
+
+    print(
+        f"[replay] cases={len(case_paths)} repeats={args.repeats} "
+        f"warmup_repeats={args.warmup_repeats} device={device}",
+        flush=True,
+    )
+    for _ in range(args.warmup_repeats):
+        for case_path in case_paths:
+            _run_one_replay_case(
+                case_path=case_path,
+                device=device,
+                atol=args.atol,
+                rtol=args.rtol,
+            )
+
+    timings = []
+    for repeat_index in range(args.repeats):
+        for case_path in case_paths:
+            timings.append(
+                _run_one_replay_case(
+                    case_path=case_path,
+                    device=device,
+                    atol=args.atol,
+                    rtol=args.rtol,
+                )
+            )
+        print(
+            f"[replay] completed repeat {repeat_index + 1}/{args.repeats}",
+            flush=True,
+        )
+
+    summary = _summarize_timings(timings)
+    _print_timing_summary(summary)
+    print("[replay] all outputs matched captured e2e outputs", flush=True)
+    return summary
+
+
+def _parse_args() -> argparse.Namespace:
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--mode",
+        choices=("capture", "replay", "capture-and-replay"),
+        default="capture-and-replay",
+    )
+    parser.add_argument("--video_reference", default="vehicles_1080p.mp4")
+    parser.add_argument("--model_id", default="rfdetr-seg-nano")
+    parser.add_argument("--confidence", type=float, default=0.4)
+    parser.add_argument("--backend", choices=("trt", "onnx", "torch"), default="trt")
+    parser.add_argument(
+        "--cases-dir",
+        type=Path,
+        default=Path("temp/rfdetr_rle_postprocess_cases"),
+    )
+    parser.add_argument("--capture-count", type=int, default=100)
+    parser.add_argument("--progress-every", type=int, default=50)
+    parser.add_argument(
+        "--overwrite",
+        action=argparse.BooleanOptionalAction,
+        default=True,
+    )
+    parser.add_argument("--device", default="auto")
+    parser.add_argument("--repeats", type=int, default=1)
+    parser.add_argument("--warmup-repeats", type=int, default=0)
+    parser.add_argument("--max-cases", type=int, default=None)
+    parser.add_argument("--atol", type=float, default=0.0)
+    parser.add_argument("--rtol", type=float, default=0.0)
+    args = parser.parse_args()
+    if args.capture_count <= 0:
+        raise ValueError("--capture-count must be positive")
+    if args.repeats <= 0:
+        raise ValueError("--repeats must be positive")
+    if args.warmup_repeats < 0:
+        raise ValueError("--warmup-repeats must be non-negative")
+    if args.progress_every <= 0:
+        raise ValueError("--progress-every must be positive")
+    return args
+
+
+def main() -> None:
+    args = _parse_args()
+    if args.mode in {"capture", "capture-and-replay"}:
+        _run_capture(args=args)
+    if args.mode in {"replay", "capture-and-replay"}:
+        _run_replay(args=args)
+
+
+if __name__ == "__main__":
+    main()

From 60dea684fd0ba866fb5173718a1770ad153139d0 Mon Sep 17 00:00:00 2001
From: Aseem Saxena <aseem.bits@gmail.com>
Date: Wed, 3 Jun 2026 00:12:32 +0000
Subject: [PATCH 15/76] Warn on unsupported RF-DETR Triton postprocess path

---
 .../inference_models/models/rfdetr/triton_postprocess.py | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/inference_models/inference_models/models/rfdetr/triton_postprocess.py b/inference_models/inference_models/models/rfdetr/triton_postprocess.py
index 3e2d38a539..163f316bf6 100644
--- a/inference_models/inference_models/models/rfdetr/triton_postprocess.py
+++ b/inference_models/inference_models/models/rfdetr/triton_postprocess.py
@@ -20,6 +20,7 @@
 compressed COCO RLE counts, and wrap them in ``InstanceDetections``.
 """
 
+import warnings
 from collections import OrderedDict
 from threading import Lock
 from typing import List, Optional, Tuple, Union
@@ -29,7 +30,6 @@
 import torch.nn.functional as F
 from pycocotools import mask as mask_utils
 
-from inference_models.logger import LOGGER
 from inference_models.models.base.instance_segmentation import InstanceDetections
 from inference_models.models.base.types import InstancesRLEMasks
 from inference_models.models.common.roboflow.model_packages import PreProcessingMetadata
@@ -184,9 +184,10 @@ def post_process_single_instance_segmentation_result_to_rle_masks_triton(
         classes_re_mapping=classes_re_mapping,
     )
     if unsupported_reason is not None:
-        LOGGER.debug(
-            "RF-DETR Triton postprocess path is unsupported: %s",
-            unsupported_reason,
+        warnings.warn(
+            f"RF-DETR Triton postprocess path is unsupported: {unsupported_reason}",
+            RuntimeWarning,
+            stacklevel=2,
         )
         return None
 

From e64a87889ce7c96861ff4b0c94949811583523f8 Mon Sep 17 00:00:00 2001
From: Aseem Saxena <aseem.bits@gmail.com>
Date: Wed, 3 Jun 2026 03:30:01 +0000
Subject: [PATCH 16/76] Force sparse RF-DETR masks in postproc benchmark

---
 development/stream_interface/rfdetr_nano_seg_trt_workflow.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/development/stream_interface/rfdetr_nano_seg_trt_workflow.py b/development/stream_interface/rfdetr_nano_seg_trt_workflow.py
index 9f209af93f..74bd3d2907 100644
--- a/development/stream_interface/rfdetr_nano_seg_trt_workflow.py
+++ b/development/stream_interface/rfdetr_nano_seg_trt_workflow.py
@@ -143,6 +143,7 @@ def build_workflow(model_id: str, confidence: float) -> dict:
                 "model_id": model_id,
                 "confidence_mode": "custom",
                 "custom_confidence": confidence,
+                "enforce_dense_masks_in_inference_models": False,
             },
         ],
         "outputs": [

From 0558da56542379dc2e31508e913445bebc33134a Mon Sep 17 00:00:00 2001
From: Aseem Saxena <aseem.bits@gmail.com>
Date: Sat, 9 May 2026 00:30:01 +0000
Subject: [PATCH 17/76] perf(rfdetr-seg): fused Triton preproc kernel for TRT
 path
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Replace the per-frame PIL-bilinear-antialias + to_tensor + normalize chain
in the RF-DETR TRT instance-segmentation model with a single Triton
kernel that resizes, swaps BGR↔RGB, scales by 1/255, and applies
ImageNet normalization — writing straight into the preallocated TRT
input buffer.

Byte-exact port of PIL's separable bilinear-antialias resize
(PRECISION_BITS=22, int32 fixed-point, uint8 quantization between the
horizontal and vertical passes). The horizontal uint8 intermediate
lives in registers.

Correctness
- Preproc max abs error vs PIL: 4.77e-7 (fp32 ULP on the final
  /255+normalize step; the uint8 resize result is byte-identical).
- Full coco/val2017 detection parity (rfdetr-seg-nano, conf=0.4):
  26,721 / 26,721 matched at IoU>0.5, mean box IoU 1.0000,
  |Δscore| 0, 0 class-id disagreements, all matched masks
  pixel-identical.

Performance (vehicles_312px.mp4, 538 frames)
- Baseline (PIL path): 76.25 fps
- Triton fast path:    99.83 fps (+31%)
- Preproc microbench (1080p → 312²): 27.0 ms → 2.8 ms per frame (~10×)

Scope
- Gated on: single-image numpy uint8 HWC input, stretch/letterbox/
  center-crop/letterbox-reflect resize modes (all collapse to a single
  PIL stretch when dataset_version_resize_dimensions is None, verified
  via synthetic-package test), no static_crop/grayscale/contrast,
  3-channel, scaling_factor in {None, 255}, normalization set.
- Falls back to the existing PIL-based pre_process_network_input
  when any precondition fails.

Also adds the benchmark driver
development/stream_interface/rfdetr_nano_seg_trt_workflow.py used to
measure the above numbers.
---
 .../rfdetr_instance_segmentation_trt.py       | 223 ++++++++++
 .../models/rfdetr/triton_preprocess.py        | 383 ++++++++++++++++++
 2 files changed, 606 insertions(+)
 create mode 100644 inference_models/inference_models/models/rfdetr/triton_preprocess.py

diff --git a/inference_models/inference_models/models/rfdetr/rfdetr_instance_segmentation_trt.py b/inference_models/inference_models/models/rfdetr/rfdetr_instance_segmentation_trt.py
index e8196f8eae..afa70f9c3d 100644
--- a/inference_models/inference_models/models/rfdetr/rfdetr_instance_segmentation_trt.py
+++ b/inference_models/inference_models/models/rfdetr/rfdetr_instance_segmentation_trt.py
@@ -26,10 +26,13 @@
     use_primary_cuda_context,
 )
 from inference_models.models.common.model_packages import get_model_package_contents
+from inference_models.entities import ImageDimensions
 from inference_models.models.common.roboflow.model_packages import (
+    ColorMode,
     InferenceConfig,
     PreProcessingMetadata,
     ResizeMode,
+    StaticCropOffset,
     TRTConfig,
     parse_class_names_file,
     parse_inference_config,
@@ -52,6 +55,17 @@
     post_process_instance_segmentation_results_to_rle_masks,
 )
 from inference_models.models.rfdetr.pre_processing import pre_process_network_input
+
+try:
+    from inference_models.models.rfdetr.triton_preprocess import (
+        TRITON_AVAILABLE as _TRITON_AVAILABLE,
+        build_resample_tables,
+        triton_preprocess_rfdetr_stretch,
+    )
+except ImportError:
+    _TRITON_AVAILABLE = False
+    build_resample_tables = None
+    triton_preprocess_rfdetr_stretch = None
 from inference_models.weights_providers.entities import RecommendedParameters
 
 try:
@@ -85,6 +99,84 @@
     ) from import_error
 
 
+class _FastPathState:
+    """Per-(src_shape, target_shape) cache of GPU buffers + resample tables
+    that the Triton fast path reuses across frames."""
+
+    __slots__ = (
+        "src_h",
+        "src_w",
+        "target_h",
+        "target_w",
+        "pinned_host",
+        "src_gpu",
+        "out_buffer",
+        "tables",
+    )
+
+    def __init__(
+        self,
+        src_h: int,
+        src_w: int,
+        target_h: int,
+        target_w: int,
+        pinned_host: torch.Tensor,
+        src_gpu: torch.Tensor,
+        out_buffer: torch.Tensor,
+        tables,
+    ) -> None:
+        self.src_h = src_h
+        self.src_w = src_w
+        self.target_h = target_h
+        self.target_w = target_w
+        self.pinned_host = pinned_host
+        self.src_gpu = src_gpu
+        self.out_buffer = out_buffer
+        self.tables = tables
+
+    @classmethod
+    def build(
+        cls,
+        src_h: int,
+        src_w: int,
+        target_h: int,
+        target_w: int,
+        device: torch.device,
+    ) -> "_FastPathState":
+        pinned_host = torch.empty((src_h, src_w, 3), dtype=torch.uint8, pin_memory=True)
+        src_gpu = torch.empty((src_h, src_w, 3), dtype=torch.uint8, device=device)
+        out_buffer = torch.empty(
+            (1, 3, target_h, target_w), dtype=torch.float32, device=device
+        )
+        tables = build_resample_tables(
+            src_h=src_h,
+            src_w=src_w,
+            target_h=target_h,
+            target_w=target_w,
+            device=device,
+        )
+        return cls(
+            src_h=src_h,
+            src_w=src_w,
+            target_h=target_h,
+            target_w=target_w,
+            pinned_host=pinned_host,
+            src_gpu=src_gpu,
+            out_buffer=out_buffer,
+            tables=tables,
+        )
+
+    def is_stale(
+        self, src_h: int, src_w: int, target_h: int, target_w: int
+    ) -> bool:
+        return (
+            self.src_h != src_h
+            or self.src_w != src_w
+            or self.target_h != target_h
+            or self.target_w != target_w
+        )
+
+
 class RFDetrForInstanceSegmentationTRT(
     InstanceSegmentationModel[
         torch.Tensor,
@@ -220,6 +312,7 @@ def __init__(
         self._inference_stream = torch.cuda.Stream(device=self._device)
         self._thread_local_storage = threading.local()
         self.recommended_parameters = recommended_parameters
+        self._fast_path_state: Optional[_FastPathState] = None
 
     @property
     def class_names(self) -> List[str]:
@@ -237,6 +330,14 @@ def pre_process(
         pre_processing_overrides: Optional[PreProcessingOverrides] = None,
         **kwargs,
     ) -> Tuple[torch.Tensor, List[PreProcessingMetadata]]:
+        fast = self._try_fast_preprocess(
+            images=images,
+            input_color_format=input_color_format,
+            image_size=image_size,
+            pre_processing_overrides=pre_processing_overrides,
+        )
+        if fast is not None:
+            return fast
         with torch.cuda.stream(self._pre_process_stream):
             pre_processed_images, pre_processing_meta = pre_process_network_input(
                 images=images,
@@ -250,6 +351,128 @@ def pre_process(
         self._pre_process_stream.synchronize()
         return pre_processed_images, pre_processing_meta
 
+    def _try_fast_preprocess(
+        self,
+        images,
+        input_color_format,
+        image_size,
+        pre_processing_overrides,
+    ) -> Optional[Tuple[torch.Tensor, List[PreProcessingMetadata]]]:
+        if not _TRITON_AVAILABLE:
+            return None
+        if image_size is not None:
+            return None
+        # pre_processing_overrides can only *disable* transforms; it has no
+        # "enable" knob. The fast path never applies static_crop / grayscale /
+        # contrast regardless, so the override flags are irrelevant — we just
+        # gate on whether the image_pre_processing config itself asks for them.
+        ipp = self._inference_config.image_pre_processing
+        if (
+            (ipp.static_crop is not None and ipp.static_crop.enabled)
+            or (ipp.contrast is not None and ipp.contrast.enabled)
+            or (ipp.grayscale is not None and ipp.grayscale.enabled)
+        ):
+            return None
+
+        ni = self._inference_config.network_input
+        if ni.dataset_version_resize_dimensions is not None:
+            return None
+        if ni.input_channels != 3:
+            return None
+        if ni.scaling_factor not in (None, 255):
+            return None
+        if ni.normalization is None:
+            return None
+        # When dataset_version_resize_dimensions is None, the prod path collapses
+        # non-stretch resize modes to a single PIL stretch as well
+        # (pre_processing.py:_needs_two_step_resize), so we accept all modes here.
+        if ni.resize_mode not in (
+            ResizeMode.STRETCH_TO,
+            ResizeMode.LETTERBOX,
+            ResizeMode.CENTER_CROP,
+            ResizeMode.LETTERBOX_REFLECT_EDGES,
+        ):
+            return None
+
+        if isinstance(images, list):
+            if len(images) != 1:
+                return None
+            candidate = images[0]
+        else:
+            candidate = images
+        if not isinstance(candidate, np.ndarray):
+            return None
+        if (
+            candidate.dtype != np.uint8
+            or candidate.ndim != 3
+            or candidate.shape[2] != 3
+        ):
+            return None
+
+        caller_mode = (
+            ColorMode(input_color_format)
+            if input_color_format is not None
+            else ColorMode.BGR
+        )
+        swap_rb = caller_mode != ni.color_mode
+
+        means, stds = ni.normalization
+        means_t = (float(means[0]), float(means[1]), float(means[2]))
+        stds_t = (float(stds[0]), float(stds[1]), float(stds[2]))
+        target_h = ni.training_input_size.height
+        target_w = ni.training_input_size.width
+        orig_h, orig_w = int(candidate.shape[0]), int(candidate.shape[1])
+
+        state = self._fast_path_state
+        if state is None or state.is_stale(
+            src_h=orig_h,
+            src_w=orig_w,
+            target_h=target_h,
+            target_w=target_w,
+        ):
+            state = _FastPathState.build(
+                src_h=orig_h,
+                src_w=orig_w,
+                target_h=target_h,
+                target_w=target_w,
+                device=self._device,
+            )
+            self._fast_path_state = state
+
+        pinned_np = state.pinned_host.numpy()
+        np.copyto(pinned_np, candidate, casting="no")
+
+        with torch.cuda.stream(self._pre_process_stream):
+            state.src_gpu.copy_(state.pinned_host, non_blocking=True)
+            triton_preprocess_rfdetr_stretch(
+                src=state.src_gpu,
+                tables=state.tables,
+                target_h=target_h,
+                target_w=target_w,
+                means=means_t,
+                stds=stds_t,
+                swap_rb=swap_rb,
+                out=state.out_buffer,
+            )
+            state.out_buffer.record_stream(self._pre_process_stream)
+        self._pre_process_stream.synchronize()
+
+        meta = PreProcessingMetadata(
+            pad_left=0,
+            pad_top=0,
+            pad_right=0,
+            pad_bottom=0,
+            original_size=ImageDimensions(width=orig_w, height=orig_h),
+            size_after_pre_processing=ImageDimensions(width=orig_w, height=orig_h),
+            inference_size=ImageDimensions(width=target_w, height=target_h),
+            scale_width=target_w / orig_w,
+            scale_height=target_h / orig_h,
+            static_crop_offset=StaticCropOffset(
+                offset_x=0, offset_y=0, crop_width=orig_w, crop_height=orig_h
+            ),
+        )
+        return state.out_buffer, [meta]
+
     def forward(
         self,
         pre_processed_images: torch.Tensor,
diff --git a/inference_models/inference_models/models/rfdetr/triton_preprocess.py b/inference_models/inference_models/models/rfdetr/triton_preprocess.py
new file mode 100644
index 0000000000..0b9d7eaa8e
--- /dev/null
+++ b/inference_models/inference_models/models/rfdetr/triton_preprocess.py
@@ -0,0 +1,383 @@
+"""Fused Triton preprocessing kernel for RF-DETR.
+
+Byte-exact port of PIL's separable bilinear-antialias resize (the algorithm
+torchvision's `TF.resize(pil, ..., antialias=True)` uses on PIL inputs), with
+the subsequent `/255` + ImageNet normalize fused into the same pass.
+
+PIL's scheme (src/libImaging/Resample.c):
+
+    PRECISION_BITS = 22
+    scale       = in_size / out_size
+    filterscale = max(1.0, scale)
+    support     = 1.0 * filterscale          # triangle radius = 1
+    ksize       = ceil(support) * 2 + 1
+    center(o)   = (o + 0.5) * scale
+    xmin(o)     = int(center - support + 0.5)                  clipped to [0, in]
+    xmax(o)     = int(center + support + 0.5)                  clipped to [0, in]
+    w_f(o, k)   = triangle((k + xmin - center + 0.5) / filterscale)
+    w_f normalised to sum to 1 per output pixel
+    w_i(o, k)   = round(w_f(o, k) * (1 << PRECISION_BITS))      int32
+    out(o)      = clamp((Σ w_i(o, k) * src_u8) + (1 << (PRECISION_BITS-1)) >> PRECISION_BITS, 0, 255)
+
+Single fused kernel: the horizontal uint8 intermediate lives in registers
+rather than a DRAM scratch buffer. For each output tile we loop over
+KSIZE_Y source rows; for each contributing source row we recompute the
+horizontal convolution (int32 fixed-point, uint8 quantize) on the fly,
+multiply by the vertical weight, and accumulate. Final: uint8 quantize,
+BGR↔RGB swap, /255, ImageNet normalize, fp32 CHW store.
+
+A separable two-pass variant (horizontal then vertical, via a DRAM uint8
+intermediate) is ~0.4 fps faster end-to-end on the 312² RF-DETR workload
+because it avoids redoing KSIZE_X MACs per output row. We picked the fused
+version for simplicity (no intermediate buffer, one launch, one piece of
+math).
+"""
+
+from __future__ import annotations
+
+import math
+from typing import Optional, Tuple
+
+import numpy as np
+import torch
+
+try:
+    import triton
+    import triton.language as tl
+
+    TRITON_AVAILABLE = True
+except ImportError:  # pragma: no cover
+    triton = None
+    tl = None
+    TRITON_AVAILABLE = False
+
+
+PRECISION_BITS = 22
+
+
+def _bilinear_antialias_weights_1d_int(
+    in_size: int, out_size: int
+) -> Tuple[np.ndarray, np.ndarray, int]:
+    """PIL's precompute_coeffs, int32 fixed-point form."""
+    scale = in_size / out_size
+    filterscale = max(1.0, scale)
+    support = filterscale
+    ksize = int(math.ceil(support)) * 2 + 1
+
+    starts = np.zeros(out_size, dtype=np.int32)
+    weights_fp = np.zeros((out_size, ksize), dtype=np.float64)
+    inv_fs = 1.0 / filterscale
+
+    for o in range(out_size):
+        center = (o + 0.5) * scale
+        xmin = int(center - support + 0.5)
+        if xmin < 0:
+            xmin = 0
+        xmax = int(center + support + 0.5)
+        if xmax > in_size:
+            xmax = in_size
+        actual = xmax - xmin
+        starts[o] = xmin
+        total = 0.0
+        for k in range(actual):
+            t = (k + xmin - center + 0.5) * inv_fs
+            t_abs = -t if t < 0.0 else t
+            w = 1.0 - t_abs if t_abs < 1.0 else 0.0
+            weights_fp[o, k] = w
+            total += w
+        if total != 0.0:
+            weights_fp[o, :actual] /= total
+
+    weights_int = np.rint(weights_fp * (1 << PRECISION_BITS)).astype(np.int32)
+    return starts, weights_int, ksize
+
+
+if TRITON_AVAILABLE:
+
+    _HALF = 1 << (PRECISION_BITS - 1)
+
+    @triton.jit
+    def _fused_resize_normalize_kernel(
+        src_ptr,
+        dst_ptr,
+        ymin_ptr,
+        xmin_ptr,
+        wy_ptr,
+        wx_ptr,
+        src_h,
+        src_w,
+        src_stride_h,
+        src_stride_w,
+        dst_stride_c,
+        dst_stride_h,
+        target_h,
+        target_w,
+        inv_std_255_r,
+        inv_std_255_g,
+        inv_std_255_b,
+        offset_r,
+        offset_g,
+        offset_b,
+        CH_R: tl.constexpr,
+        CH_G: tl.constexpr,
+        CH_B: tl.constexpr,
+        KSIZE_Y: tl.constexpr,
+        KSIZE_X: tl.constexpr,
+        PRECISION_BITS_C: tl.constexpr,
+        HALF_C: tl.constexpr,
+        BLOCK_H: tl.constexpr,
+        BLOCK_W: tl.constexpr,
+    ):
+        """One kernel per (tile_y, tile_x) over target image.
+
+        In : src uint8 HWC (src_h, src_w, 3), source color order.
+        Out: dst fp32 CHW (1, 3, target_h, target_w), network color order,
+             (pixel/255 - mean)/std.
+        """
+        pid_y = tl.program_id(0)
+        pid_x = tl.program_id(1)
+
+        offs_y = pid_y * BLOCK_H + tl.arange(0, BLOCK_H)
+        offs_x = pid_x * BLOCK_W + tl.arange(0, BLOCK_W)
+        mask_y = offs_y < target_h
+        mask_x = offs_x < target_w
+        mask_out = mask_y[:, None] & mask_x[None, :]
+
+        ymin = tl.load(ymin_ptr + offs_y, mask=mask_y, other=0)
+        xmin = tl.load(xmin_ptr + offs_x, mask=mask_x, other=0)
+
+        # Vertical pass accumulators (int32 fixed-point) for 3 channels.
+        vacc_0 = tl.zeros((BLOCK_H, BLOCK_W), dtype=tl.int32)
+        vacc_1 = tl.zeros((BLOCK_H, BLOCK_W), dtype=tl.int32)
+        vacc_2 = tl.zeros((BLOCK_H, BLOCK_W), dtype=tl.int32)
+
+        for ky in tl.static_range(KSIZE_Y):
+            # Source row contributing to each output row in this tile.
+            sy = ymin + ky
+            sy_c = tl.maximum(tl.minimum(sy, src_h - 1), 0)
+            wy = tl.load(wy_ptr + offs_y * KSIZE_Y + ky, mask=mask_y, other=0)
+
+            # Horizontal pass for (output_rows_in_tile, output_cols_in_tile):
+            # for each source column in the kernel, gather src[sy_c, sx_c, :]
+            # and accumulate with wx[output_col, kx].
+            hacc_0 = tl.zeros((BLOCK_H, BLOCK_W), dtype=tl.int32)
+            hacc_1 = tl.zeros((BLOCK_H, BLOCK_W), dtype=tl.int32)
+            hacc_2 = tl.zeros((BLOCK_H, BLOCK_W), dtype=tl.int32)
+
+            for kx in tl.static_range(KSIZE_X):
+                sx = xmin + kx
+                sx_c = tl.maximum(tl.minimum(sx, src_w - 1), 0)
+                wx = tl.load(wx_ptr + offs_x * KSIZE_X + kx, mask=mask_x, other=0)
+                base = sy_c[:, None] * src_stride_h + sx_c[None, :] * src_stride_w
+                p0 = tl.load(src_ptr + base + 0, mask=mask_out, other=0).to(tl.int32)
+                p1 = tl.load(src_ptr + base + 1, mask=mask_out, other=0).to(tl.int32)
+                p2 = tl.load(src_ptr + base + 2, mask=mask_out, other=0).to(tl.int32)
+                wx_2d = wx[None, :]
+                hacc_0 += p0 * wx_2d
+                hacc_1 += p1 * wx_2d
+                hacc_2 += p2 * wx_2d
+
+            # Horizontal uint8 quantization (byte-exact to PIL's intermediate).
+            hacc_0 = (hacc_0 + HALF_C) >> PRECISION_BITS_C
+            hacc_1 = (hacc_1 + HALF_C) >> PRECISION_BITS_C
+            hacc_2 = (hacc_2 + HALF_C) >> PRECISION_BITS_C
+            hacc_0 = tl.minimum(tl.maximum(hacc_0, 0), 255)
+            hacc_1 = tl.minimum(tl.maximum(hacc_1, 0), 255)
+            hacc_2 = tl.minimum(tl.maximum(hacc_2, 0), 255)
+
+            wy_2d = wy[:, None]
+            vacc_0 += hacc_0 * wy_2d
+            vacc_1 += hacc_1 * wy_2d
+            vacc_2 += hacc_2 * wy_2d
+
+        # Vertical uint8 quantization.
+        q_0 = (vacc_0 + HALF_C) >> PRECISION_BITS_C
+        q_1 = (vacc_1 + HALF_C) >> PRECISION_BITS_C
+        q_2 = (vacc_2 + HALF_C) >> PRECISION_BITS_C
+        q_0 = tl.minimum(tl.maximum(q_0, 0), 255)
+        q_1 = tl.minimum(tl.maximum(q_1, 0), 255)
+        q_2 = tl.minimum(tl.maximum(q_2, 0), 255)
+
+        # Source-to-output channel remap (triton requires constexpr branches).
+        if CH_R == 0:
+            q_r = q_0
+        elif CH_R == 1:
+            q_r = q_1
+        else:
+            q_r = q_2
+        if CH_G == 0:
+            q_g = q_0
+        elif CH_G == 1:
+            q_g = q_1
+        else:
+            q_g = q_2
+        if CH_B == 0:
+            q_b = q_0
+        elif CH_B == 1:
+            q_b = q_1
+        else:
+            q_b = q_2
+
+        # (pixel/255 - mean)/std  ==  pixel * (1/(255*std)) + (-mean/std)
+        out_r = q_r.to(tl.float32) * inv_std_255_r + offset_r
+        out_g = q_g.to(tl.float32) * inv_std_255_g + offset_g
+        out_b = q_b.to(tl.float32) * inv_std_255_b + offset_b
+
+        out_row = offs_y[:, None] * dst_stride_h + offs_x[None, :]
+        tl.store(dst_ptr + 0 * dst_stride_c + out_row, out_r, mask=mask_out)
+        tl.store(dst_ptr + 1 * dst_stride_c + out_row, out_g, mask=mask_out)
+        tl.store(dst_ptr + 2 * dst_stride_c + out_row, out_b, mask=mask_out)
+
+
+class _ResampleTables:
+    """Cache of per-axis PIL-int32 weight tables for one (src, dst) pair."""
+
+    __slots__ = (
+        "ymin_gpu",
+        "xmin_gpu",
+        "wy_gpu",
+        "wx_gpu",
+        "ksize_y",
+        "ksize_x",
+    )
+
+    def __init__(
+        self,
+        ymin_gpu: torch.Tensor,
+        xmin_gpu: torch.Tensor,
+        wy_gpu: torch.Tensor,
+        wx_gpu: torch.Tensor,
+        ksize_y: int,
+        ksize_x: int,
+    ) -> None:
+        self.ymin_gpu = ymin_gpu
+        self.xmin_gpu = xmin_gpu
+        self.wy_gpu = wy_gpu
+        self.wx_gpu = wx_gpu
+        self.ksize_y = ksize_y
+        self.ksize_x = ksize_x
+
+
+def build_resample_tables(
+    src_h: int,
+    src_w: int,
+    target_h: int,
+    target_w: int,
+    device: torch.device,
+) -> _ResampleTables:
+    ymin, wy, ksize_y = _bilinear_antialias_weights_1d_int(src_h, target_h)
+    xmin, wx, ksize_x = _bilinear_antialias_weights_1d_int(src_w, target_w)
+    return _ResampleTables(
+        ymin_gpu=torch.from_numpy(ymin).to(device=device, non_blocking=True),
+        xmin_gpu=torch.from_numpy(xmin).to(device=device, non_blocking=True),
+        wy_gpu=torch.from_numpy(wy.ravel()).to(device=device, non_blocking=True),
+        wx_gpu=torch.from_numpy(wx.ravel()).to(device=device, non_blocking=True),
+        ksize_y=ksize_y,
+        ksize_x=ksize_x,
+    )
+
+
+def triton_preprocess_rfdetr_stretch(
+    src: torch.Tensor,
+    tables: _ResampleTables,
+    target_h: int,
+    target_w: int,
+    means: Tuple[float, float, float] = (0.485, 0.456, 0.406),
+    stds: Tuple[float, float, float] = (0.229, 0.224, 0.225),
+    swap_rb: bool = True,
+    out: Optional[torch.Tensor] = None,
+) -> torch.Tensor:
+    """Fused PIL-exact resize + color swap + normalize.
+
+    Args:
+        src: uint8 CUDA tensor, shape (H, W, 3), HWC layout.
+        tables: precomputed int32 resample tables from `build_resample_tables`.
+        target_h, target_w: output spatial dims.
+        means, stds: normalization in output channel order (R, G, B for
+            network_input.color_mode == 'rgb').
+        swap_rb: if True, source channel 0 → output B (BGR input, RGB network).
+        out: optional preallocated fp32 (1, 3, H, W) CUDA tensor.
+
+    Returns:
+        fp32 (1, 3, target_h, target_w) on the same device as `src`.
+    """
+    if not TRITON_AVAILABLE:
+        raise RuntimeError("triton is not installed")
+    if not src.is_cuda:
+        raise ValueError(f"expected CUDA src tensor, got device={src.device}")
+    if src.dtype != torch.uint8:
+        raise ValueError(f"expected uint8 src, got {src.dtype}")
+    if src.ndim != 3 or src.shape[2] != 3:
+        raise ValueError(f"expected HWC 3-channel, got shape={tuple(src.shape)}")
+
+    src = src.contiguous()
+    src_h, src_w = int(src.shape[0]), int(src.shape[1])
+    src_stride_h = int(src.stride(0))
+    src_stride_w = int(src.stride(1))
+
+    if out is None:
+        out = torch.empty(
+            (1, 3, target_h, target_w), dtype=torch.float32, device=src.device
+        )
+    else:
+        if tuple(out.shape) != (1, 3, target_h, target_w):
+            raise ValueError(
+                f"out has shape {tuple(out.shape)}, expected "
+                f"(1, 3, {target_h}, {target_w})"
+            )
+        if out.dtype != torch.float32 or not out.is_cuda:
+            raise ValueError("out must be fp32 CUDA tensor")
+
+    dst_stride_c = target_h * target_w
+    dst_stride_h = target_w
+
+    if swap_rb:
+        ch_r, ch_g, ch_b = 2, 1, 0
+    else:
+        ch_r, ch_g, ch_b = 0, 1, 2
+
+    inv_std_255_r = 1.0 / (255.0 * stds[0])
+    inv_std_255_g = 1.0 / (255.0 * stds[1])
+    inv_std_255_b = 1.0 / (255.0 * stds[2])
+    offset_r = -means[0] / stds[0]
+    offset_g = -means[1] / stds[1]
+    offset_b = -means[2] / stds[2]
+
+    BLOCK_H = 16
+    BLOCK_W = 16
+    grid = (
+        (target_h + BLOCK_H - 1) // BLOCK_H,
+        (target_w + BLOCK_W - 1) // BLOCK_W,
+    )
+    _fused_resize_normalize_kernel[grid](
+        src,
+        out,
+        tables.ymin_gpu,
+        tables.xmin_gpu,
+        tables.wy_gpu,
+        tables.wx_gpu,
+        src_h,
+        src_w,
+        src_stride_h,
+        src_stride_w,
+        dst_stride_c,
+        dst_stride_h,
+        target_h,
+        target_w,
+        float(inv_std_255_r),
+        float(inv_std_255_g),
+        float(inv_std_255_b),
+        float(offset_r),
+        float(offset_g),
+        float(offset_b),
+        CH_R=ch_r,
+        CH_G=ch_g,
+        CH_B=ch_b,
+        KSIZE_Y=tables.ksize_y,
+        KSIZE_X=tables.ksize_x,
+        PRECISION_BITS_C=PRECISION_BITS,
+        HALF_C=_HALF,
+        BLOCK_H=BLOCK_H,
+        BLOCK_W=BLOCK_W,
+    )
+    return out

From 1b8d6c1c8ad1491b07cb36bf11460618057cdf1a Mon Sep 17 00:00:00 2001
From: Aseem Saxena <aseem.bits@gmail.com>
Date: Mon, 11 May 2026 03:20:58 +0000
Subject: [PATCH 18/76] perf(rfdetr): add kill switch for Triton preproc fast
 path
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

INFERENCE_MODELS_RFDETR_TRITON_PREPROC_ENABLED (default true). Setting
it to false short-circuits _try_fast_preprocess so every call falls
back to the PIL reference path — useful for A/B benchmarking and as an
escape hatch if the fused kernel is ever implicated in a regression.

e2e on vehicles_312px.mp4 (538 frames, rfdetr-seg-nano TRT, mean of 3):
  ON  (default): 98.57 fps
  OFF (env=false): 76.60 fps
  Δ: +28.7% / −2.90 ms/frame
---
 .../models/rfdetr/rfdetr_instance_segmentation_trt.py    | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/inference_models/inference_models/models/rfdetr/rfdetr_instance_segmentation_trt.py b/inference_models/inference_models/models/rfdetr/rfdetr_instance_segmentation_trt.py
index afa70f9c3d..6b2d98db11 100644
--- a/inference_models/inference_models/models/rfdetr/rfdetr_instance_segmentation_trt.py
+++ b/inference_models/inference_models/models/rfdetr/rfdetr_instance_segmentation_trt.py
@@ -55,6 +55,7 @@
     post_process_instance_segmentation_results_to_rle_masks,
 )
 from inference_models.models.rfdetr.pre_processing import pre_process_network_input
+from inference_models.utils.environment import get_boolean_from_env
 
 try:
     from inference_models.models.rfdetr.triton_preprocess import (
@@ -66,6 +67,12 @@
     _TRITON_AVAILABLE = False
     build_resample_tables = None
     triton_preprocess_rfdetr_stretch = None
+
+# Kill switch: set INFERENCE_MODELS_RFDETR_TRITON_PREPROC_ENABLED=false to force
+# the PIL reference path for every call, regardless of other predicates.
+_FAST_PATH_ENABLED = get_boolean_from_env(
+    "INFERENCE_MODELS_RFDETR_TRITON_PREPROC_ENABLED", default=True
+)
 from inference_models.weights_providers.entities import RecommendedParameters
 
 try:
@@ -358,6 +365,8 @@ def _try_fast_preprocess(
         image_size,
         pre_processing_overrides,
     ) -> Optional[Tuple[torch.Tensor, List[PreProcessingMetadata]]]:
+        if not _FAST_PATH_ENABLED:
+            return None
         if not _TRITON_AVAILABLE:
             return None
         if image_size is not None:

From f959d81f1354547eddb346bae967c9fc85c20138 Mon Sep 17 00:00:00 2001
From: Aseem Saxena <aseem.bits@gmail.com>
Date: Fri, 29 May 2026 21:06:43 +0000
Subject: [PATCH 19/76] Optimize RF-DETR Triton preprocessing

---
 .../rfdetr_preprocess_microbenchmark.py       | 747 +++++++++++++++++
 .../models/rfdetr/triton_preprocess.py        | 783 ++++++++++++++++--
 2 files changed, 1472 insertions(+), 58 deletions(-)
 create mode 100644 development/stream_interface/rfdetr_preprocess_microbenchmark.py

diff --git a/development/stream_interface/rfdetr_preprocess_microbenchmark.py b/development/stream_interface/rfdetr_preprocess_microbenchmark.py
new file mode 100644
index 0000000000..b8397bd852
--- /dev/null
+++ b/development/stream_interface/rfdetr_preprocess_microbenchmark.py
@@ -0,0 +1,747 @@
+"""Capture/replay benchmark for RF-DETR reference preprocessing.
+
+Default usage captures 100 invocations of
+``inference_models.models.rfdetr.pre_processing.pre_process_network_input`` from
+the e2e workflow and immediately replays them:
+
+    python development/stream_interface/rfdetr_preprocess_microbenchmark.py \
+        --video_reference vehicles_1080p.mp4
+
+Replay-only usage:
+
+    python development/stream_interface/rfdetr_preprocess_microbenchmark.py \
+        --mode replay --cases-dir temp/rfdetr_preprocess_cases
+
+The TRT RF-DETR model has a Triton fast path that bypasses this function. Capture
+mode forces ``INFERENCE_MODELS_RFDETR_TRITON_PREPROC_ENABLED=false`` before
+loading the workflow so the reference preprocessing function is exercised.
+"""
+
+import argparse
+import functools
+import importlib.util
+import json
+import os
+from pathlib import Path
+import pickle
+import sys
+import threading
+from time import perf_counter, time
+from typing import Any, Dict, List, Optional, Tuple, Union
+
+import numpy as np
+import torch
+
+
+_REPO_ROOT = Path(__file__).resolve().parents[2]
+_INFERENCE_MODELS_ROOT = _REPO_ROOT / "inference_models"
+_WORKFLOW_PATH = (
+    _REPO_ROOT / "development" / "stream_interface" / "rfdetr_nano_seg_trt_workflow.py"
+)
+_TARGET_FUNCTION = "pre_process_network_input"
+_SCHEMA_VERSION = 1
+_FORCED_PREPROC_ENV = "INFERENCE_MODELS_RFDETR_TRITON_PREPROC_ENABLED"
+_TRITON_REPLAY_STATE: Dict[Tuple[str, int, int, int, int], Any] = {}
+
+
+def _ensure_local_import_paths() -> None:
+    for path in (str(_INFERENCE_MODELS_ROOT), str(_REPO_ROOT)):
+        if path not in sys.path:
+            sys.path.insert(0, path)
+
+
+def _load_workflow_module() -> Any:
+    spec = importlib.util.spec_from_file_location(
+        "rfdetr_nano_seg_trt_workflow_for_preprocess_microbenchmark",
+        _WORKFLOW_PATH,
+    )
+    if spec is None or spec.loader is None:
+        raise RuntimeError(f"Could not load workflow module from {_WORKFLOW_PATH}")
+    module = importlib.util.module_from_spec(spec)
+    spec.loader.exec_module(module)
+    return module
+
+
+def _tensor_to_cpu(tensor: torch.Tensor) -> torch.Tensor:
+    return tensor.detach().cpu().clone()
+
+
+def _snapshot_images(images: Any) -> Any:
+    if isinstance(images, torch.Tensor):
+        return {"kind": "tensor", "value": _tensor_to_cpu(images)}
+    if isinstance(images, np.ndarray):
+        return {"kind": "ndarray", "value": np.array(images, copy=True)}
+    if isinstance(images, list):
+        return {"kind": "list", "value": [_snapshot_images(image) for image in images]}
+    return {"kind": "raw", "value": images}
+
+
+def _materialize_images(payload: Any, device: torch.device) -> Any:
+    kind = payload["kind"]
+    value = payload["value"]
+    if kind == "tensor":
+        return value.to(device=device).clone()
+    if kind == "ndarray":
+        return np.array(value, copy=True)
+    if kind == "list":
+        return [_materialize_images(image, device=device) for image in value]
+    if kind == "raw":
+        return value
+    raise RuntimeError(f"Unknown image payload kind: {kind}")
+
+
+def _snapshot_inputs(
+    *,
+    images: Any,
+    image_pre_processing: Any,
+    network_input: Any,
+    target_device: torch.device,
+    input_color_format: Optional[Any],
+    image_size_wh: Optional[Union[int, Tuple[int, int]]],
+    pre_processing_overrides: Optional[Any],
+) -> dict:
+    return {
+        "images": _snapshot_images(images),
+        "image_pre_processing": image_pre_processing,
+        "network_input": network_input,
+        "target_device": str(target_device),
+        "input_color_format": input_color_format,
+        "image_size_wh": image_size_wh,
+        "pre_processing_overrides": pre_processing_overrides,
+    }
+
+
+def _snapshot_output(output: Tuple[torch.Tensor, List[Any]]) -> dict:
+    tensor, metadata = output
+    return {
+        "tensor": _tensor_to_cpu(tensor),
+        "metadata": list(metadata),
+    }
+
+
+def _bind_target_arguments(args: tuple, kwargs: dict) -> dict:
+    names = (
+        "images",
+        "image_pre_processing",
+        "network_input",
+        "target_device",
+        "input_color_format",
+        "image_size_wh",
+        "pre_processing_overrides",
+    )
+    bound = {
+        "input_color_format": None,
+        "image_size_wh": None,
+        "pre_processing_overrides": None,
+    }
+    bound.update(dict(zip(names, args)))
+    bound.update(kwargs)
+    missing = [
+        name
+        for name in ("images", "image_pre_processing", "network_input", "target_device")
+        if name not in bound
+    ]
+    if missing:
+        raise RuntimeError(f"Cannot capture target call; missing args: {missing}")
+    return {name: bound[name] for name in names}
+
+
+def _write_pickle(path: Path, payload: dict) -> None:
+    tmp_path = path.with_suffix(path.suffix + ".tmp")
+    with tmp_path.open("wb") as f:
+        pickle.dump(payload, f, protocol=pickle.HIGHEST_PROTOCOL)
+    os.replace(tmp_path, path)
+
+
+class _CaptureState:
+    def __init__(self, cases_dir: Path, limit: int) -> None:
+        self.cases_dir = cases_dir
+        self.limit = limit
+        self.count = 0
+        self.lock = threading.Lock()
+
+    def maybe_save(self, inputs: dict, output: Tuple[torch.Tensor, List[Any]]) -> None:
+        with self.lock:
+            if self.count >= self.limit:
+                return
+            case_index = self.count
+            payload = {
+                "schema_version": _SCHEMA_VERSION,
+                "case_index": case_index,
+                "inputs": _snapshot_inputs(**inputs),
+                "expected_output": _snapshot_output(output),
+            }
+            _write_pickle(self.cases_dir / f"case_{case_index:04d}.pkl", payload)
+            self.count += 1
+            if self.count == 1 or self.count % 10 == 0 or self.count == self.limit:
+                print(
+                    f"[capture] saved {self.count}/{self.limit} preprocess calls",
+                    flush=True,
+                )
+
+
+def _install_capture_hook(state: _CaptureState) -> None:
+    _ensure_local_import_paths()
+    from inference_models.models.rfdetr import pre_processing as rfdetr_pre_processing
+
+    original = getattr(rfdetr_pre_processing, _TARGET_FUNCTION)
+
+    @functools.wraps(original)
+    def wrapper(*args: Any, **kwargs: Any) -> Tuple[torch.Tensor, List[Any]]:
+        result = original(*args, **kwargs)
+        state.maybe_save(inputs=_bind_target_arguments(args, kwargs), output=result)
+        return result
+
+    setattr(rfdetr_pre_processing, _TARGET_FUNCTION, wrapper)
+    for module_name in (
+        "inference_models.models.rfdetr.rfdetr_instance_segmentation_trt",
+        "inference_models.models.rfdetr.rfdetr_instance_segmentation_onnx",
+        "inference_models.models.rfdetr.rfdetr_instance_segmentation_pytorch",
+        "inference_models.models.rfdetr.rfdetr_object_detection_trt",
+        "inference_models.models.rfdetr.rfdetr_object_detection_onnx",
+        "inference_models.models.rfdetr.rfdetr_object_detection_pytorch",
+    ):
+        module = sys.modules.get(module_name)
+        if module is not None and hasattr(module, _TARGET_FUNCTION):
+            setattr(module, _TARGET_FUNCTION, wrapper)
+
+
+def _prepare_cases_dir(cases_dir: Path, overwrite: bool) -> None:
+    cases_dir.mkdir(parents=True, exist_ok=True)
+    existing = list(cases_dir.glob("case_*.pkl"))
+    manifest_path = cases_dir / "manifest.json"
+    if not overwrite and (existing or manifest_path.exists()):
+        raise RuntimeError(
+            f"{cases_dir} already contains captured cases; pass --overwrite "
+            "or choose a different --cases-dir."
+        )
+    if overwrite:
+        for path in existing:
+            path.unlink()
+        if manifest_path.exists():
+            manifest_path.unlink()
+
+
+def _write_manifest(cases_dir: Path, payload: dict) -> None:
+    with (cases_dir / "manifest.json").open("w") as f:
+        json.dump(payload, f, indent=2, sort_keys=True)
+        f.write("\n")
+
+
+def _run_capture(args: argparse.Namespace) -> int:
+    if args.force_reference_preprocess:
+        os.environ[_FORCED_PREPROC_ENV] = "false"
+
+    cases_dir = args.cases_dir.resolve()
+    _prepare_cases_dir(cases_dir=cases_dir, overwrite=args.overwrite)
+
+    workflow = _load_workflow_module()
+    model_id = workflow._resolve_model_id(args.model_id, args.backend)
+    workflow._prepare_local_workflow_model_bundle(model_id)
+    if model_id != args.model_id:
+        print(
+            f"[model] using local TRT package via workflow model id: {model_id}",
+            flush=True,
+        )
+
+    state = _CaptureState(cases_dir=cases_dir, limit=args.capture_count)
+    _install_capture_hook(state=state)
+
+    frame_count = 0
+    start_time: Optional[float] = None
+    pipeline_ref: Dict[str, Any] = {}
+
+    def sink(predictions: Any, video_frames: Any) -> None:
+        nonlocal frame_count, start_time
+        del video_frames
+        if not isinstance(predictions, list):
+            predictions = [predictions]
+        frame_count += sum(p is not None for p in predictions)
+        if start_time is None:
+            start_time = perf_counter()
+        if frame_count % args.progress_every == 0:
+            elapsed = perf_counter() - start_time
+            fps = frame_count / elapsed if elapsed > 0 else 0.0
+            print(
+                f"[progress] frames={frame_count} fps={fps:.2f} "
+                f"captures={state.count}/{state.limit}",
+                flush=True,
+            )
+        if state.count >= state.limit and "pipeline" in pipeline_ref:
+            pipeline_ref["pipeline"].terminate()
+
+    pipeline = workflow.InferencePipeline.init_with_workflow(
+        video_reference=args.video_reference,
+        workflow_specification=workflow.build_workflow(model_id, args.confidence),
+        on_prediction=sink,
+    )
+    pipeline_ref["pipeline"] = pipeline
+    pipeline.start()
+    pipeline.join()
+
+    if state.count < args.capture_count:
+        raise RuntimeError(
+            f"Captured only {state.count}/{args.capture_count} invocations. "
+            "Use a longer video or lower --capture-count."
+        )
+
+    elapsed = perf_counter() - start_time if start_time else 0.0
+    _write_manifest(
+        cases_dir=cases_dir,
+        payload={
+            "schema_version": _SCHEMA_VERSION,
+            "function": (
+                "inference_models.models.rfdetr.pre_processing."
+                "pre_process_network_input"
+            ),
+            "case_count": state.count,
+            "video_reference": args.video_reference,
+            "backend": args.backend,
+            "model_id": model_id,
+            "confidence": args.confidence,
+            "frames_seen_by_sink": frame_count,
+            "capture_elapsed_seconds": elapsed,
+            "created_at_unix": time(),
+            "forced_env": (
+                f"{_FORCED_PREPROC_ENV}=false"
+                if args.force_reference_preprocess
+                else None
+            ),
+        },
+    )
+    print(f"[capture] wrote {state.count} cases to {cases_dir}", flush=True)
+    return state.count
+
+
+def _resolve_device(device: str, captured: str) -> torch.device:
+    if device == "captured":
+        return torch.device(captured)
+    if device == "auto":
+        return torch.device("cuda" if torch.cuda.is_available() else "cpu")
+    resolved = torch.device(device)
+    if resolved.type == "cuda" and not torch.cuda.is_available():
+        raise RuntimeError("--device cuda requested, but CUDA is not available")
+    return resolved
+
+
+def _materialize_inputs(case: dict, device_override: str) -> dict:
+    inputs = case["inputs"]
+    target_device = _resolve_device(
+        device=device_override,
+        captured=inputs["target_device"],
+    )
+    return {
+        "images": _materialize_images(inputs["images"], device=target_device),
+        "image_pre_processing": inputs["image_pre_processing"],
+        "network_input": inputs["network_input"],
+        "target_device": target_device,
+        "input_color_format": inputs["input_color_format"],
+        "image_size_wh": inputs["image_size_wh"],
+        "pre_processing_overrides": inputs["pre_processing_overrides"],
+    }
+
+
+def _uses_enabled(config: Optional[Any]) -> bool:
+    return bool(config is not None and config.enabled)
+
+
+def _run_triton_fast_preprocess(inputs: dict) -> Tuple[torch.Tensor, List[Any]]:
+    from inference_models.entities import ImageDimensions
+    from inference_models.models.common.roboflow.model_packages import (
+        ColorMode,
+        PreProcessingMetadata,
+        ResizeMode,
+        StaticCropOffset,
+    )
+    from inference_models.models.rfdetr.triton_preprocess import (
+        TRITON_AVAILABLE,
+        build_resample_tables,
+        triton_preprocess_rfdetr_stretch,
+    )
+
+    if not TRITON_AVAILABLE:
+        raise RuntimeError("Triton RF-DETR preprocessing is not available")
+
+    target_device = inputs["target_device"]
+    if target_device.type != "cuda":
+        raise RuntimeError(
+            f"Triton replay requires CUDA target_device, got {target_device}"
+        )
+
+    images = inputs["images"]
+    if isinstance(images, list):
+        if len(images) != 1:
+            raise RuntimeError("Triton replay only supports batch size 1")
+        candidate = images[0]
+    else:
+        candidate = images
+    if (
+        not isinstance(candidate, np.ndarray)
+        or candidate.dtype != np.uint8
+        or candidate.ndim != 3
+        or candidate.shape[2] != 3
+    ):
+        raise RuntimeError(
+            "Triton replay only supports one uint8 HWC ndarray input; "
+            f"got type={type(candidate)} shape={getattr(candidate, 'shape', None)}"
+        )
+
+    if inputs["image_size_wh"] is not None:
+        raise RuntimeError("Triton replay does not support image_size_wh overrides")
+
+    image_pre_processing = inputs["image_pre_processing"]
+    if (
+        _uses_enabled(image_pre_processing.static_crop)
+        or _uses_enabled(image_pre_processing.contrast)
+        or _uses_enabled(image_pre_processing.grayscale)
+    ):
+        raise RuntimeError(
+            "Triton replay only supports cases without static crop, contrast, "
+            "or grayscale preprocessing"
+        )
+
+    network_input = inputs["network_input"]
+    if network_input.dataset_version_resize_dimensions is not None:
+        raise RuntimeError("Triton replay does not support dataset-version resize")
+    if network_input.input_channels != 3:
+        raise RuntimeError("Triton replay only supports 3 input channels")
+    if network_input.scaling_factor not in (None, 255):
+        raise RuntimeError(
+            "Triton replay only supports scaling_factor in (None, 255)"
+        )
+    if network_input.normalization is None:
+        raise RuntimeError("Triton replay requires network_input.normalization")
+    if network_input.resize_mode not in (
+        ResizeMode.STRETCH_TO,
+        ResizeMode.LETTERBOX,
+        ResizeMode.CENTER_CROP,
+        ResizeMode.LETTERBOX_REFLECT_EDGES,
+    ):
+        raise RuntimeError(
+            f"Triton replay does not support resize_mode={network_input.resize_mode}"
+        )
+
+    caller_mode = (
+        ColorMode(inputs["input_color_format"])
+        if inputs["input_color_format"] is not None
+        else ColorMode.BGR
+    )
+    swap_rb = caller_mode != network_input.color_mode
+
+    means, stds = network_input.normalization
+    means_t = (float(means[0]), float(means[1]), float(means[2]))
+    stds_t = (float(stds[0]), float(stds[1]), float(stds[2]))
+    target_h = network_input.training_input_size.height
+    target_w = network_input.training_input_size.width
+    orig_h, orig_w = int(candidate.shape[0]), int(candidate.shape[1])
+
+    state_key = (str(target_device), orig_h, orig_w, target_h, target_w)
+    state = _TRITON_REPLAY_STATE.get(state_key)
+    if state is None:
+        pinned_host = torch.empty(
+            (orig_h, orig_w, 3), dtype=torch.uint8, pin_memory=True
+        )
+        src_gpu = torch.empty(
+            (orig_h, orig_w, 3), dtype=torch.uint8, device=target_device
+        )
+        out_buffer = torch.empty(
+            (1, 3, target_h, target_w), dtype=torch.float32, device=target_device
+        )
+        tables = build_resample_tables(
+            src_h=orig_h,
+            src_w=orig_w,
+            target_h=target_h,
+            target_w=target_w,
+            device=target_device,
+        )
+        state = {
+            "pinned_host": pinned_host,
+            "src_gpu": src_gpu,
+            "out_buffer": out_buffer,
+            "tables": tables,
+        }
+        _TRITON_REPLAY_STATE[state_key] = state
+
+    pinned_np = state["pinned_host"].numpy()
+    np.copyto(pinned_np, candidate, casting="no")
+    state["src_gpu"].copy_(state["pinned_host"], non_blocking=True)
+    triton_preprocess_rfdetr_stretch(
+        src=state["src_gpu"],
+        tables=state["tables"],
+        target_h=target_h,
+        target_w=target_w,
+        means=means_t,
+        stds=stds_t,
+        swap_rb=swap_rb,
+        out=state["out_buffer"],
+    )
+
+    meta = PreProcessingMetadata(
+        pad_left=0,
+        pad_top=0,
+        pad_right=0,
+        pad_bottom=0,
+        original_size=ImageDimensions(width=orig_w, height=orig_h),
+        size_after_pre_processing=ImageDimensions(width=orig_w, height=orig_h),
+        inference_size=ImageDimensions(width=target_w, height=target_h),
+        scale_width=target_w / orig_w,
+        scale_height=target_h / orig_h,
+        static_crop_offset=StaticCropOffset(
+            offset_x=0,
+            offset_y=0,
+            crop_width=orig_w,
+            crop_height=orig_h,
+        ),
+    )
+    return state["out_buffer"], [meta]
+
+
+def _synchronize(device: torch.device) -> None:
+    if device.type == "cuda":
+        torch.cuda.synchronize(device)
+
+
+def _load_case(path: Path) -> dict:
+    with path.open("rb") as f:
+        payload = pickle.load(f)
+    if payload.get("schema_version") != _SCHEMA_VERSION:
+        raise RuntimeError(
+            f"{path} has schema_version={payload.get('schema_version')}; "
+            f"expected {_SCHEMA_VERSION}."
+        )
+    return payload
+
+
+def _assert_tensor_equal(
+    *,
+    actual: torch.Tensor,
+    expected: torch.Tensor,
+    label: str,
+    atol: float,
+    rtol: float,
+) -> None:
+    actual_cpu = actual.detach().cpu()
+    if torch.is_floating_point(actual_cpu) and (atol != 0.0 or rtol != 0.0):
+        equal = torch.allclose(actual_cpu, expected, atol=atol, rtol=rtol)
+    else:
+        equal = torch.equal(actual_cpu, expected)
+    if not equal:
+        max_abs = (
+            (actual_cpu - expected).abs().max().item()
+            if actual_cpu.shape == expected.shape
+            else None
+        )
+        raise AssertionError(
+            f"{label} differs: actual shape={tuple(actual_cpu.shape)} "
+            f"expected shape={tuple(expected.shape)} max_abs_diff={max_abs}"
+        )
+
+
+def _assert_outputs_equal(
+    *,
+    actual: Tuple[torch.Tensor, List[Any]],
+    expected: dict,
+    case_index: int,
+    atol: float,
+    rtol: float,
+) -> None:
+    actual_tensor, actual_metadata = actual
+    _assert_tensor_equal(
+        actual=actual_tensor,
+        expected=expected["tensor"],
+        label=f"case {case_index} tensor",
+        atol=atol,
+        rtol=rtol,
+    )
+    if list(actual_metadata) != list(expected["metadata"]):
+        raise AssertionError(f"case {case_index} metadata differs")
+
+
+def _run_one_replay_case(
+    *,
+    case_path: Path,
+    device_override: str,
+    implementation: str,
+    atol: float,
+    rtol: float,
+) -> float:
+    from inference_models.models.rfdetr.pre_processing import pre_process_network_input
+
+    case = _load_case(case_path)
+    inputs = _materialize_inputs(case=case, device_override=device_override)
+    _synchronize(inputs["target_device"])
+    start = perf_counter()
+    if implementation == "reference":
+        actual = pre_process_network_input(**inputs)
+    elif implementation == "triton":
+        actual = _run_triton_fast_preprocess(inputs)
+    else:
+        raise RuntimeError(f"Unknown replay implementation: {implementation}")
+    _synchronize(inputs["target_device"])
+    elapsed = perf_counter() - start
+    _assert_outputs_equal(
+        actual=actual,
+        expected=case["expected_output"],
+        case_index=case["case_index"],
+        atol=atol,
+        rtol=rtol,
+    )
+    return elapsed
+
+
+def _summarize_timings(timings: List[float]) -> dict:
+    sorted_timings = sorted(timings)
+    total = sum(sorted_timings)
+    count = len(sorted_timings)
+
+    def percentile(p: float) -> float:
+        if count == 0:
+            return 0.0
+        index = min(count - 1, int(round((count - 1) * p)))
+        return sorted_timings[index]
+
+    return {
+        "count": count,
+        "total_seconds": total,
+        "mean_ms": (total / count) * 1000 if count else 0.0,
+        "min_ms": sorted_timings[0] * 1000 if count else 0.0,
+        "p50_ms": percentile(0.50) * 1000,
+        "p90_ms": percentile(0.90) * 1000,
+        "p99_ms": percentile(0.99) * 1000,
+        "max_ms": sorted_timings[-1] * 1000 if count else 0.0,
+    }
+
+
+def _print_timing_summary(summary: dict) -> None:
+    print(
+        "[replay] "
+        f"calls={summary['count']} "
+        f"total={summary['total_seconds']:.3f}s "
+        f"mean={summary['mean_ms']:.3f}ms "
+        f"p50={summary['p50_ms']:.3f}ms "
+        f"p90={summary['p90_ms']:.3f}ms "
+        f"p99={summary['p99_ms']:.3f}ms "
+        f"min={summary['min_ms']:.3f}ms "
+        f"max={summary['max_ms']:.3f}ms",
+        flush=True,
+    )
+
+
+def _run_replay(args: argparse.Namespace) -> dict:
+    _ensure_local_import_paths()
+    cases_dir = args.cases_dir.resolve()
+    case_paths = sorted(cases_dir.glob("case_*.pkl"))
+    if args.max_cases is not None:
+        case_paths = case_paths[: args.max_cases]
+    if not case_paths:
+        raise RuntimeError(f"No case_*.pkl files found in {cases_dir}")
+
+    print(
+        f"[replay] cases={len(case_paths)} repeats={args.repeats} "
+        f"warmup_repeats={args.warmup_repeats} device={args.device} "
+        f"implementation={args.replay_implementation}",
+        flush=True,
+    )
+    for _ in range(args.warmup_repeats):
+        for case_path in case_paths:
+            _run_one_replay_case(
+                case_path=case_path,
+                device_override=args.device,
+                implementation=args.replay_implementation,
+                atol=args.atol,
+                rtol=args.rtol,
+            )
+
+    timings = []
+    for repeat_index in range(args.repeats):
+        for case_path in case_paths:
+            timings.append(
+                _run_one_replay_case(
+                    case_path=case_path,
+                    device_override=args.device,
+                    implementation=args.replay_implementation,
+                    atol=args.atol,
+                    rtol=args.rtol,
+                )
+            )
+        print(
+            f"[replay] completed repeat {repeat_index + 1}/{args.repeats}",
+            flush=True,
+        )
+
+    summary = _summarize_timings(timings)
+    _print_timing_summary(summary)
+    print("[replay] all outputs matched captured e2e outputs", flush=True)
+    return summary
+
+
+def _parse_args() -> argparse.Namespace:
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--mode",
+        choices=("capture", "replay", "capture-and-replay"),
+        default="capture-and-replay",
+    )
+    parser.add_argument("--video_reference", default="vehicles_1080p.mp4")
+    parser.add_argument("--model_id", default="rfdetr-seg-nano")
+    parser.add_argument("--confidence", type=float, default=0.4)
+    parser.add_argument("--backend", choices=("trt", "onnx", "torch"), default="trt")
+    parser.add_argument(
+        "--cases-dir",
+        type=Path,
+        default=Path("temp/rfdetr_preprocess_cases"),
+    )
+    parser.add_argument("--capture-count", type=int, default=100)
+    parser.add_argument("--progress-every", type=int, default=50)
+    parser.add_argument(
+        "--overwrite",
+        action=argparse.BooleanOptionalAction,
+        default=True,
+    )
+    parser.add_argument(
+        "--force-reference-preprocess",
+        action=argparse.BooleanOptionalAction,
+        default=True,
+        help=(
+            "Set INFERENCE_MODELS_RFDETR_TRITON_PREPROC_ENABLED=false before "
+            "loading the workflow so pre_process_network_input is called."
+        ),
+    )
+    parser.add_argument(
+        "--device",
+        default="captured",
+        help="'captured', 'auto', 'cpu', or a torch device string used on replay.",
+    )
+    parser.add_argument(
+        "--replay-implementation",
+        choices=("reference", "triton"),
+        default="reference",
+        help="Implementation used by replay; capture always hooks pre_process_network_input.",
+    )
+    parser.add_argument("--repeats", type=int, default=1)
+    parser.add_argument("--warmup-repeats", type=int, default=0)
+    parser.add_argument("--max-cases", type=int, default=None)
+    parser.add_argument("--atol", type=float, default=0.0)
+    parser.add_argument("--rtol", type=float, default=0.0)
+    args = parser.parse_args()
+    if args.capture_count <= 0:
+        raise ValueError("--capture-count must be positive")
+    if args.repeats <= 0:
+        raise ValueError("--repeats must be positive")
+    if args.warmup_repeats < 0:
+        raise ValueError("--warmup-repeats must be non-negative")
+    if args.progress_every <= 0:
+        raise ValueError("--progress-every must be positive")
+    return args
+
+
+def main() -> None:
+    args = _parse_args()
+    if args.mode in {"capture", "capture-and-replay"}:
+        _run_capture(args=args)
+    if args.mode in {"replay", "capture-and-replay"}:
+        _run_replay(args=args)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/inference_models/inference_models/models/rfdetr/triton_preprocess.py b/inference_models/inference_models/models/rfdetr/triton_preprocess.py
index 0b9d7eaa8e..091b71d0c9 100644
--- a/inference_models/inference_models/models/rfdetr/triton_preprocess.py
+++ b/inference_models/inference_models/models/rfdetr/triton_preprocess.py
@@ -25,22 +25,23 @@
 horizontal convolution (int32 fixed-point, uint8 quantize) on the fly,
 multiply by the vertical weight, and accumulate. Final: uint8 quantize,
 BGR↔RGB swap, /255, ImageNet normalize, fp32 CHW store.
-
-A separable two-pass variant (horizontal then vertical, via a DRAM uint8
-intermediate) is ~0.4 fps faster end-to-end on the 312² RF-DETR workload
-because it avoids redoing KSIZE_X MACs per output row. We picked the fused
-version for simplicity (no intermediate buffer, one launch, one piece of
-math).
 """
 
 from __future__ import annotations
 
 import math
+import os
 from typing import Optional, Tuple
 
 import numpy as np
 import torch
 
+from inference_models.errors import (
+    MissingDependencyError,
+    ModelInputError,
+    ModelRuntimeError,
+)
+
 try:
     import triton
     import triton.language as tl
@@ -53,6 +54,39 @@
 
 
 PRECISION_BITS = 22
+_PREPROC_VARIANT_ENV = "INFERENCE_MODELS_RFDETR_TRITON_PREPROC_VARIANT"
+_PREPROC_BLOCK_H_ENV = "INFERENCE_MODELS_RFDETR_TRITON_PREPROC_BLOCK_H"
+_PREPROC_BLOCK_W_ENV = "INFERENCE_MODELS_RFDETR_TRITON_PREPROC_BLOCK_W"
+_PREPROC_HORIZONTAL_BLOCK_H_ENV = (
+    "INFERENCE_MODELS_RFDETR_TRITON_PREPROC_HORIZONTAL_BLOCK_H"
+)
+_PREPROC_HORIZONTAL_BLOCK_W_ENV = (
+    "INFERENCE_MODELS_RFDETR_TRITON_PREPROC_HORIZONTAL_BLOCK_W"
+)
+
+
+def _read_power_of_two_env(name: str, default: int) -> int:
+    raw = os.getenv(name)
+    if raw is None or raw.strip() == "":
+        return default
+    try:
+        value = int(raw)
+    except ValueError as error:
+        raise ModelRuntimeError(
+            message=f"{name} must be an integer, got {raw!r}.",
+            help_url="https://inference-models.roboflow.com/errors/models-runtime/#modelruntimeerror",
+        ) from error
+    if value <= 0 or value & (value - 1) != 0:
+        raise ModelRuntimeError(
+            message=f"{name} must be a positive power of two, got {value}.",
+            help_url="https://inference-models.roboflow.com/errors/models-runtime/#modelruntimeerror",
+        )
+    if value > 512:
+        raise ModelRuntimeError(
+            message=f"{name} must be <= 512, got {value}.",
+            help_url="https://inference-models.roboflow.com/errors/models-runtime/#modelruntimeerror",
+        )
+    return value
 
 
 def _bilinear_antialias_weights_1d_int(
@@ -97,7 +131,7 @@ def _bilinear_antialias_weights_1d_int(
     _HALF = 1 << (PRECISION_BITS - 1)
 
     @triton.jit
-    def _fused_resize_normalize_kernel(
+    def fused_resize_normalize_kernel(
         src_ptr,
         dst_ptr,
         ymin_ptr,
@@ -108,6 +142,8 @@ def _fused_resize_normalize_kernel(
         src_w,
         src_stride_h,
         src_stride_w,
+        crop_offset_y,
+        crop_offset_x,
         dst_stride_c,
         dst_stride_h,
         target_h,
@@ -130,7 +166,11 @@ def _fused_resize_normalize_kernel(
     ):
         """One kernel per (tile_y, tile_x) over target image.
 
-        In : src uint8 HWC (src_h, src_w, 3), source color order.
+        In : src uint8 HWC (src_h, src_w, 3), source color order. The
+             resample tables are built against `(crop_h, crop_w)` — the
+             logical source size after a possible static crop — which the
+             caller passes as `src_h`/`src_w`. `crop_offset_{y,x}` is the
+             load-time offset into the raw HWC buffer.
         Out: dst fp32 CHW (1, 3, target_h, target_w), network color order,
              (pixel/255 - mean)/std.
         """
@@ -152,9 +192,9 @@ def _fused_resize_normalize_kernel(
         vacc_2 = tl.zeros((BLOCK_H, BLOCK_W), dtype=tl.int32)
 
         for ky in tl.static_range(KSIZE_Y):
-            # Source row contributing to each output row in this tile.
+            # Source row (after static crop) contributing to each output row.
             sy = ymin + ky
-            sy_c = tl.maximum(tl.minimum(sy, src_h - 1), 0)
+            sy_c = tl.maximum(tl.minimum(sy, src_h - 1), 0) + crop_offset_y
             wy = tl.load(wy_ptr + offs_y * KSIZE_Y + ky, mask=mask_y, other=0)
 
             # Horizontal pass for (output_rows_in_tile, output_cols_in_tile):
@@ -166,7 +206,7 @@ def _fused_resize_normalize_kernel(
 
             for kx in tl.static_range(KSIZE_X):
                 sx = xmin + kx
-                sx_c = tl.maximum(tl.minimum(sx, src_w - 1), 0)
+                sx_c = tl.maximum(tl.minimum(sx, src_w - 1), 0) + crop_offset_x
                 wx = tl.load(wx_ptr + offs_x * KSIZE_X + kx, mask=mask_x, other=0)
                 base = sy_c[:, None] * src_stride_h + sx_c[None, :] * src_stride_w
                 p0 = tl.load(src_ptr + base + 0, mask=mask_out, other=0).to(tl.int32)
@@ -228,8 +268,467 @@ def _fused_resize_normalize_kernel(
         tl.store(dst_ptr + 1 * dst_stride_c + out_row, out_g, mask=mask_out)
         tl.store(dst_ptr + 2 * dst_stride_c + out_row, out_b, mask=mask_out)
 
+    @triton.jit
+    def fused_resize_normalize_channel_kernel(
+        src_ptr,
+        dst_ptr,
+        ymin_ptr,
+        xmin_ptr,
+        wy_ptr,
+        wx_ptr,
+        src_h,
+        src_w,
+        src_stride_h,
+        src_stride_w,
+        crop_offset_y,
+        crop_offset_x,
+        dst_stride_c,
+        dst_stride_h,
+        target_h,
+        target_w,
+        inv_std_255_r,
+        inv_std_255_g,
+        inv_std_255_b,
+        offset_r,
+        offset_g,
+        offset_b,
+        CH_R: tl.constexpr,
+        CH_G: tl.constexpr,
+        CH_B: tl.constexpr,
+        KSIZE_Y: tl.constexpr,
+        KSIZE_X: tl.constexpr,
+        PRECISION_BITS_C: tl.constexpr,
+        HALF_C: tl.constexpr,
+        BLOCK_H: tl.constexpr,
+        BLOCK_W: tl.constexpr,
+    ):
+        """Channel-split variant: one program computes one output channel."""
+        pid_y = tl.program_id(0)
+        pid_x = tl.program_id(1)
+        pid_c = tl.program_id(2)
+
+        offs_y = pid_y * BLOCK_H + tl.arange(0, BLOCK_H)
+        offs_x = pid_x * BLOCK_W + tl.arange(0, BLOCK_W)
+        mask_y = offs_y < target_h
+        mask_x = offs_x < target_w
+        mask_out = mask_y[:, None] & mask_x[None, :]
+
+        ymin = tl.load(ymin_ptr + offs_y, mask=mask_y, other=0)
+        xmin = tl.load(xmin_ptr + offs_x, mask=mask_x, other=0)
+
+        src_ch = tl.where(pid_c == 0, CH_R, tl.where(pid_c == 1, CH_G, CH_B))
+        inv_std_255 = tl.where(
+            pid_c == 0,
+            inv_std_255_r,
+            tl.where(pid_c == 1, inv_std_255_g, inv_std_255_b),
+        )
+        offset = tl.where(
+            pid_c == 0,
+            offset_r,
+            tl.where(pid_c == 1, offset_g, offset_b),
+        )
+
+        vacc = tl.zeros((BLOCK_H, BLOCK_W), dtype=tl.int32)
+
+        for ky in tl.static_range(KSIZE_Y):
+            sy = ymin + ky
+            sy_c = tl.maximum(tl.minimum(sy, src_h - 1), 0) + crop_offset_y
+            wy = tl.load(wy_ptr + offs_y * KSIZE_Y + ky, mask=mask_y, other=0)
+
+            hacc = tl.zeros((BLOCK_H, BLOCK_W), dtype=tl.int32)
+
+            for kx in tl.static_range(KSIZE_X):
+                sx = xmin + kx
+                sx_c = tl.maximum(tl.minimum(sx, src_w - 1), 0) + crop_offset_x
+                wx = tl.load(wx_ptr + offs_x * KSIZE_X + kx, mask=mask_x, other=0)
+                base = sy_c[:, None] * src_stride_h + sx_c[None, :] * src_stride_w
+                p = tl.load(src_ptr + base + src_ch, mask=mask_out, other=0).to(
+                    tl.int32
+                )
+                hacc += p * wx[None, :]
+
+            hacc = (hacc + HALF_C) >> PRECISION_BITS_C
+            hacc = tl.minimum(tl.maximum(hacc, 0), 255)
+
+            vacc += hacc * wy[:, None]
 
-class _ResampleTables:
+        q = (vacc + HALF_C) >> PRECISION_BITS_C
+        q = tl.minimum(tl.maximum(q, 0), 255)
+        out = q.to(tl.float32) * inv_std_255 + offset
+
+        out_row = offs_y[:, None] * dst_stride_h + offs_x[None, :]
+        tl.store(dst_ptr + pid_c * dst_stride_c + out_row, out, mask=mask_out)
+
+    @triton.jit
+    def fused_resize_normalize_packed_kernel(
+        src_ptr,
+        dst_ptr,
+        ymin_ptr,
+        xmin_ptr,
+        wy_ptr,
+        wx_ptr,
+        src_h,
+        src_w,
+        src_stride_h,
+        src_stride_w,
+        crop_offset_y,
+        crop_offset_x,
+        dst_stride_c,
+        dst_stride_h,
+        target_h,
+        target_w,
+        inv_std_255_r,
+        inv_std_255_g,
+        inv_std_255_b,
+        offset_r,
+        offset_g,
+        offset_b,
+        CH_R: tl.constexpr,
+        CH_G: tl.constexpr,
+        CH_B: tl.constexpr,
+        KSIZE_Y: tl.constexpr,
+        KSIZE_X: tl.constexpr,
+        PRECISION_BITS_C: tl.constexpr,
+        HALF_C: tl.constexpr,
+        BLOCK_H: tl.constexpr,
+        BLOCK_W: tl.constexpr,
+    ):
+        """All-channel variant using one unaligned u32 load per HWC pixel."""
+        pid_y = tl.program_id(0)
+        pid_x = tl.program_id(1)
+
+        offs_y = pid_y * BLOCK_H + tl.arange(0, BLOCK_H)
+        offs_x = pid_x * BLOCK_W + tl.arange(0, BLOCK_W)
+        mask_y = offs_y < target_h
+        mask_x = offs_x < target_w
+        mask_out = mask_y[:, None] & mask_x[None, :]
+
+        ymin = tl.load(ymin_ptr + offs_y, mask=mask_y, other=0)
+        xmin = tl.load(xmin_ptr + offs_x, mask=mask_x, other=0)
+
+        # src is contiguous HWC, so stride_h / stride_w recovers the raw width.
+        # u32 load is safe for every pixel except the final raw column.
+        raw_src_w = src_stride_h // src_stride_w
+
+        vacc_0 = tl.zeros((BLOCK_H, BLOCK_W), dtype=tl.int32)
+        vacc_1 = tl.zeros((BLOCK_H, BLOCK_W), dtype=tl.int32)
+        vacc_2 = tl.zeros((BLOCK_H, BLOCK_W), dtype=tl.int32)
+
+        for ky in tl.static_range(KSIZE_Y):
+            sy = ymin + ky
+            sy_c = tl.maximum(tl.minimum(sy, src_h - 1), 0) + crop_offset_y
+            wy = tl.load(wy_ptr + offs_y * KSIZE_Y + ky, mask=mask_y, other=0)
+
+            hacc_0 = tl.zeros((BLOCK_H, BLOCK_W), dtype=tl.int32)
+            hacc_1 = tl.zeros((BLOCK_H, BLOCK_W), dtype=tl.int32)
+            hacc_2 = tl.zeros((BLOCK_H, BLOCK_W), dtype=tl.int32)
+
+            for kx in tl.static_range(KSIZE_X):
+                sx = xmin + kx
+                sx_c = tl.maximum(tl.minimum(sx, src_w - 1), 0) + crop_offset_x
+                wx = tl.load(wx_ptr + offs_x * KSIZE_X + kx, mask=mask_x, other=0)
+                base = sy_c[:, None] * src_stride_h + sx_c[None, :] * src_stride_w
+
+                base_mod = base & 3
+                aligned_base = base - base_mod
+                not_last_col = sx_c[None, :] < (raw_src_w - 1)
+                use_packed = not_last_col | (base_mod == 1)
+
+                word_0 = tl.load(
+                    (src_ptr + aligned_base).to(tl.pointer_type(tl.uint32)),
+                    mask=mask_out & use_packed,
+                    other=0,
+                )
+                word_1 = tl.load(
+                    (src_ptr + aligned_base + 4).to(tl.pointer_type(tl.uint32)),
+                    mask=mask_out & use_packed & (base_mod >= 2),
+                    other=0,
+                )
+
+                word_0_b0 = (word_0 & 0xFF).to(tl.int32)
+                word_0_b1 = ((word_0 >> 8) & 0xFF).to(tl.int32)
+                word_0_b2 = ((word_0 >> 16) & 0xFF).to(tl.int32)
+                word_0_b3 = ((word_0 >> 24) & 0xFF).to(tl.int32)
+                word_1_b0 = (word_1 & 0xFF).to(tl.int32)
+                word_1_b1 = ((word_1 >> 8) & 0xFF).to(tl.int32)
+
+                packed_0 = tl.where(
+                    base_mod == 0,
+                    word_0_b0,
+                    tl.where(
+                        base_mod == 1,
+                        word_0_b1,
+                        tl.where(base_mod == 2, word_0_b2, word_0_b3),
+                    ),
+                )
+                packed_1 = tl.where(
+                    base_mod == 0,
+                    word_0_b1,
+                    tl.where(
+                        base_mod == 1,
+                        word_0_b2,
+                        tl.where(base_mod == 2, word_0_b3, word_1_b0),
+                    ),
+                )
+                packed_2 = tl.where(
+                    base_mod == 0,
+                    word_0_b2,
+                    tl.where(
+                        base_mod == 1,
+                        word_0_b3,
+                        tl.where(base_mod == 2, word_1_b0, word_1_b1),
+                    ),
+                )
+
+                fallback_mask = mask_out & ~use_packed
+                byte_0 = tl.load(src_ptr + base + 0, mask=fallback_mask, other=0).to(
+                    tl.int32
+                )
+                byte_1 = tl.load(src_ptr + base + 1, mask=fallback_mask, other=0).to(
+                    tl.int32
+                )
+                byte_2 = tl.load(src_ptr + base + 2, mask=fallback_mask, other=0).to(
+                    tl.int32
+                )
+                p0 = tl.where(use_packed, packed_0, byte_0)
+                p1 = tl.where(use_packed, packed_1, byte_1)
+                p2 = tl.where(use_packed, packed_2, byte_2)
+
+                wx_2d = wx[None, :]
+                hacc_0 += p0 * wx_2d
+                hacc_1 += p1 * wx_2d
+                hacc_2 += p2 * wx_2d
+
+            hacc_0 = (hacc_0 + HALF_C) >> PRECISION_BITS_C
+            hacc_1 = (hacc_1 + HALF_C) >> PRECISION_BITS_C
+            hacc_2 = (hacc_2 + HALF_C) >> PRECISION_BITS_C
+            hacc_0 = tl.minimum(tl.maximum(hacc_0, 0), 255)
+            hacc_1 = tl.minimum(tl.maximum(hacc_1, 0), 255)
+            hacc_2 = tl.minimum(tl.maximum(hacc_2, 0), 255)
+
+            wy_2d = wy[:, None]
+            vacc_0 += hacc_0 * wy_2d
+            vacc_1 += hacc_1 * wy_2d
+            vacc_2 += hacc_2 * wy_2d
+
+        q_0 = (vacc_0 + HALF_C) >> PRECISION_BITS_C
+        q_1 = (vacc_1 + HALF_C) >> PRECISION_BITS_C
+        q_2 = (vacc_2 + HALF_C) >> PRECISION_BITS_C
+        q_0 = tl.minimum(tl.maximum(q_0, 0), 255)
+        q_1 = tl.minimum(tl.maximum(q_1, 0), 255)
+        q_2 = tl.minimum(tl.maximum(q_2, 0), 255)
+
+        if CH_R == 0:
+            q_r = q_0
+        elif CH_R == 1:
+            q_r = q_1
+        else:
+            q_r = q_2
+        if CH_G == 0:
+            q_g = q_0
+        elif CH_G == 1:
+            q_g = q_1
+        else:
+            q_g = q_2
+        if CH_B == 0:
+            q_b = q_0
+        elif CH_B == 1:
+            q_b = q_1
+        else:
+            q_b = q_2
+
+        out_r = q_r.to(tl.float32) * inv_std_255_r + offset_r
+        out_g = q_g.to(tl.float32) * inv_std_255_g + offset_g
+        out_b = q_b.to(tl.float32) * inv_std_255_b + offset_b
+
+        out_row = offs_y[:, None] * dst_stride_h + offs_x[None, :]
+        tl.store(dst_ptr + 0 * dst_stride_c + out_row, out_r, mask=mask_out)
+        tl.store(dst_ptr + 1 * dst_stride_c + out_row, out_g, mask=mask_out)
+        tl.store(dst_ptr + 2 * dst_stride_c + out_row, out_b, mask=mask_out)
+
+    @triton.jit
+    def horizontal_resize_uint8_kernel(
+        src_ptr,
+        tmp_ptr,
+        xmin_ptr,
+        wx_ptr,
+        src_h,
+        src_w,
+        src_stride_h,
+        src_stride_w,
+        crop_offset_y,
+        crop_offset_x,
+        target_w,
+        CH_R: tl.constexpr,
+        CH_G: tl.constexpr,
+        CH_B: tl.constexpr,
+        KSIZE_X: tl.constexpr,
+        PRECISION_BITS_C: tl.constexpr,
+        HALF_C: tl.constexpr,
+        BLOCK_H: tl.constexpr,
+        BLOCK_W: tl.constexpr,
+    ):
+        """Horizontal PIL-antialias pass into uint8 CHW scratch."""
+        pid_y = tl.program_id(0)
+        pid_x = tl.program_id(1)
+        pid_c = tl.program_id(2)
+
+        offs_y = pid_y * BLOCK_H + tl.arange(0, BLOCK_H)
+        offs_x = pid_x * BLOCK_W + tl.arange(0, BLOCK_W)
+        mask_y = offs_y < src_h
+        mask_x = offs_x < target_w
+        mask_out = mask_y[:, None] & mask_x[None, :]
+
+        xmin = tl.load(xmin_ptr + offs_x, mask=mask_x, other=0)
+        src_ch = tl.where(pid_c == 0, CH_R, tl.where(pid_c == 1, CH_G, CH_B))
+        sy = offs_y + crop_offset_y
+
+        hacc = tl.zeros((BLOCK_H, BLOCK_W), dtype=tl.int32)
+        for kx in tl.static_range(KSIZE_X):
+            sx = xmin + kx
+            sx_c = tl.maximum(tl.minimum(sx, src_w - 1), 0) + crop_offset_x
+            wx = tl.load(wx_ptr + offs_x * KSIZE_X + kx, mask=mask_x, other=0)
+            base = sy[:, None] * src_stride_h + sx_c[None, :] * src_stride_w
+            p = tl.load(src_ptr + base + src_ch, mask=mask_out, other=0).to(tl.int32)
+            hacc += p * wx[None, :]
+
+        q = (hacc + HALF_C) >> PRECISION_BITS_C
+        q = tl.minimum(tl.maximum(q, 0), 255)
+
+        out_row = offs_y[:, None] * target_w + offs_x[None, :]
+        tl.store(tmp_ptr + pid_c * src_h * target_w + out_row, q, mask=mask_out)
+
+    @triton.jit
+    def horizontal_resize_uint8_all_channels_kernel(
+        src_ptr,
+        tmp_ptr,
+        xmin_ptr,
+        wx_ptr,
+        src_h,
+        src_w,
+        src_stride_h,
+        src_stride_w,
+        crop_offset_y,
+        crop_offset_x,
+        target_w,
+        CH_R: tl.constexpr,
+        CH_G: tl.constexpr,
+        CH_B: tl.constexpr,
+        KSIZE_X: tl.constexpr,
+        PRECISION_BITS_C: tl.constexpr,
+        HALF_C: tl.constexpr,
+        BLOCK_H: tl.constexpr,
+        BLOCK_W: tl.constexpr,
+    ):
+        """Horizontal PIL-antialias pass for all output channels."""
+        pid_y = tl.program_id(0)
+        pid_x = tl.program_id(1)
+
+        offs_y = pid_y * BLOCK_H + tl.arange(0, BLOCK_H)
+        offs_x = pid_x * BLOCK_W + tl.arange(0, BLOCK_W)
+        mask_y = offs_y < src_h
+        mask_x = offs_x < target_w
+        mask_out = mask_y[:, None] & mask_x[None, :]
+
+        xmin = tl.load(xmin_ptr + offs_x, mask=mask_x, other=0)
+        sy = offs_y + crop_offset_y
+
+        hacc_r = tl.zeros((BLOCK_H, BLOCK_W), dtype=tl.int32)
+        hacc_g = tl.zeros((BLOCK_H, BLOCK_W), dtype=tl.int32)
+        hacc_b = tl.zeros((BLOCK_H, BLOCK_W), dtype=tl.int32)
+        for kx in tl.static_range(KSIZE_X):
+            sx = xmin + kx
+            sx_c = tl.maximum(tl.minimum(sx, src_w - 1), 0) + crop_offset_x
+            wx = tl.load(wx_ptr + offs_x * KSIZE_X + kx, mask=mask_x, other=0)
+            base = sy[:, None] * src_stride_h + sx_c[None, :] * src_stride_w
+            p_r = tl.load(src_ptr + base + CH_R, mask=mask_out, other=0).to(tl.int32)
+            p_g = tl.load(src_ptr + base + CH_G, mask=mask_out, other=0).to(tl.int32)
+            p_b = tl.load(src_ptr + base + CH_B, mask=mask_out, other=0).to(tl.int32)
+            wx_2d = wx[None, :]
+            hacc_r += p_r * wx_2d
+            hacc_g += p_g * wx_2d
+            hacc_b += p_b * wx_2d
+
+        q_r = (hacc_r + HALF_C) >> PRECISION_BITS_C
+        q_g = (hacc_g + HALF_C) >> PRECISION_BITS_C
+        q_b = (hacc_b + HALF_C) >> PRECISION_BITS_C
+        q_r = tl.minimum(tl.maximum(q_r, 0), 255)
+        q_g = tl.minimum(tl.maximum(q_g, 0), 255)
+        q_b = tl.minimum(tl.maximum(q_b, 0), 255)
+
+        out_row = offs_y[:, None] * target_w + offs_x[None, :]
+        channel_stride = src_h * target_w
+        tl.store(tmp_ptr + 0 * channel_stride + out_row, q_r, mask=mask_out)
+        tl.store(tmp_ptr + 1 * channel_stride + out_row, q_g, mask=mask_out)
+        tl.store(tmp_ptr + 2 * channel_stride + out_row, q_b, mask=mask_out)
+
+    @triton.jit
+    def vertical_normalize_from_horizontal_kernel(
+        tmp_ptr,
+        dst_ptr,
+        ymin_ptr,
+        wy_ptr,
+        src_h,
+        dst_stride_c,
+        dst_stride_h,
+        target_h,
+        target_w,
+        inv_std_255_r,
+        inv_std_255_g,
+        inv_std_255_b,
+        offset_r,
+        offset_g,
+        offset_b,
+        KSIZE_Y: tl.constexpr,
+        PRECISION_BITS_C: tl.constexpr,
+        HALF_C: tl.constexpr,
+        BLOCK_H: tl.constexpr,
+        BLOCK_W: tl.constexpr,
+    ):
+        """Vertical PIL-antialias pass from uint8 scratch plus normalization."""
+        pid_y = tl.program_id(0)
+        pid_x = tl.program_id(1)
+        pid_c = tl.program_id(2)
+
+        offs_y = pid_y * BLOCK_H + tl.arange(0, BLOCK_H)
+        offs_x = pid_x * BLOCK_W + tl.arange(0, BLOCK_W)
+        mask_y = offs_y < target_h
+        mask_x = offs_x < target_w
+        mask_out = mask_y[:, None] & mask_x[None, :]
+
+        ymin = tl.load(ymin_ptr + offs_y, mask=mask_y, other=0)
+
+        vacc = tl.zeros((BLOCK_H, BLOCK_W), dtype=tl.int32)
+        for ky in tl.static_range(KSIZE_Y):
+            sy = ymin + ky
+            sy_c = tl.maximum(tl.minimum(sy, src_h - 1), 0)
+            wy = tl.load(wy_ptr + offs_y * KSIZE_Y + ky, mask=mask_y, other=0)
+            base = sy_c[:, None] * target_w + offs_x[None, :]
+            p = tl.load(
+                tmp_ptr + pid_c * src_h * target_w + base, mask=mask_out, other=0
+            ).to(tl.int32)
+            vacc += p * wy[:, None]
+
+        q = (vacc + HALF_C) >> PRECISION_BITS_C
+        q = tl.minimum(tl.maximum(q, 0), 255)
+
+        inv_std_255 = tl.where(
+            pid_c == 0,
+            inv_std_255_r,
+            tl.where(pid_c == 1, inv_std_255_g, inv_std_255_b),
+        )
+        offset = tl.where(
+            pid_c == 0,
+            offset_r,
+            tl.where(pid_c == 1, offset_g, offset_b),
+        )
+        out = q.to(tl.float32) * inv_std_255 + offset
+
+        out_row = offs_y[:, None] * dst_stride_h + offs_x[None, :]
+        tl.store(dst_ptr + pid_c * dst_stride_c + out_row, out, mask=mask_out)
+
+
+class ResampleTables:
     """Cache of per-axis PIL-int32 weight tables for one (src, dst) pair."""
 
     __slots__ = (
@@ -264,10 +763,10 @@ def build_resample_tables(
     target_h: int,
     target_w: int,
     device: torch.device,
-) -> _ResampleTables:
+) -> ResampleTables:
     ymin, wy, ksize_y = _bilinear_antialias_weights_1d_int(src_h, target_h)
     xmin, wx, ksize_x = _bilinear_antialias_weights_1d_int(src_w, target_w)
-    return _ResampleTables(
+    return ResampleTables(
         ymin_gpu=torch.from_numpy(ymin).to(device=device, non_blocking=True),
         xmin_gpu=torch.from_numpy(xmin).to(device=device, non_blocking=True),
         wy_gpu=torch.from_numpy(wy.ravel()).to(device=device, non_blocking=True),
@@ -279,39 +778,62 @@ def build_resample_tables(
 
 def triton_preprocess_rfdetr_stretch(
     src: torch.Tensor,
-    tables: _ResampleTables,
+    tables: ResampleTables,
     target_h: int,
     target_w: int,
     means: Tuple[float, float, float] = (0.485, 0.456, 0.406),
     stds: Tuple[float, float, float] = (0.229, 0.224, 0.225),
     swap_rb: bool = True,
+    crop_offset_y: int = 0,
+    crop_offset_x: int = 0,
+    crop_h: Optional[int] = None,
+    crop_w: Optional[int] = None,
     out: Optional[torch.Tensor] = None,
 ) -> torch.Tensor:
     """Fused PIL-exact resize + color swap + normalize.
 
     Args:
         src: uint8 CUDA tensor, shape (H, W, 3), HWC layout.
-        tables: precomputed int32 resample tables from `build_resample_tables`.
+        tables: precomputed int32 resample tables sized against the *cropped*
+            source `(crop_h, crop_w)` → `(target_h, target_w)`.
         target_h, target_w: output spatial dims.
         means, stds: normalization in output channel order (R, G, B for
             network_input.color_mode == 'rgb').
         swap_rb: if True, source channel 0 → output B (BGR input, RGB network).
+        crop_offset_y/_x: load-time offset into `src` for a static crop. 0
+            means no crop.
+        crop_h/_w: effective source dims after crop. Defaults to src dims
+            when no crop is configured.
         out: optional preallocated fp32 (1, 3, H, W) CUDA tensor.
 
     Returns:
         fp32 (1, 3, target_h, target_w) on the same device as `src`.
     """
     if not TRITON_AVAILABLE:
-        raise RuntimeError("triton is not installed")
+        raise MissingDependencyError(
+            message="triton is not installed",
+            help_url="https://inference-models.roboflow.com/errors/runtime-environment/#missingdependencyerror",
+        )
     if not src.is_cuda:
-        raise ValueError(f"expected CUDA src tensor, got device={src.device}")
+        raise ModelInputError(
+            message=f"expected CUDA src tensor, got device={src.device}",
+            help_url="https://inference-models.roboflow.com/errors/input-validation/#modelinputerror",
+        )
     if src.dtype != torch.uint8:
-        raise ValueError(f"expected uint8 src, got {src.dtype}")
+        raise ModelInputError(
+            message=f"expected uint8 src, got {src.dtype}",
+            help_url="https://inference-models.roboflow.com/errors/input-validation/#modelinputerror",
+        )
     if src.ndim != 3 or src.shape[2] != 3:
-        raise ValueError(f"expected HWC 3-channel, got shape={tuple(src.shape)}")
+        raise ModelInputError(
+            message=f"expected HWC 3-channel, got shape={tuple(src.shape)}",
+            help_url="https://inference-models.roboflow.com/errors/input-validation/#modelinputerror",
+        )
 
     src = src.contiguous()
-    src_h, src_w = int(src.shape[0]), int(src.shape[1])
+    raw_src_h, raw_src_w = int(src.shape[0]), int(src.shape[1])
+    src_h = crop_h if crop_h is not None else raw_src_h
+    src_w = crop_w if crop_w is not None else raw_src_w
     src_stride_h = int(src.stride(0))
     src_stride_w = int(src.stride(1))
 
@@ -321,12 +843,18 @@ def triton_preprocess_rfdetr_stretch(
         )
     else:
         if tuple(out.shape) != (1, 3, target_h, target_w):
-            raise ValueError(
-                f"out has shape {tuple(out.shape)}, expected "
-                f"(1, 3, {target_h}, {target_w})"
+            raise ModelRuntimeError(
+                message=(
+                    f"out has shape {tuple(out.shape)}, expected "
+                    f"(1, 3, {target_h}, {target_w})"
+                ),
+                help_url="https://inference-models.roboflow.com/errors/models-runtime/#modelruntimeerror",
             )
         if out.dtype != torch.float32 or not out.is_cuda:
-            raise ValueError("out must be fp32 CUDA tensor")
+            raise ModelRuntimeError(
+                message="out must be fp32 CUDA tensor",
+                help_url="https://inference-models.roboflow.com/errors/models-runtime/#modelruntimeerror",
+            )
 
     dst_stride_c = target_h * target_w
     dst_stride_h = target_w
@@ -343,41 +871,180 @@ def triton_preprocess_rfdetr_stretch(
     offset_g = -means[1] / stds[1]
     offset_b = -means[2] / stds[2]
 
-    BLOCK_H = 16
-    BLOCK_W = 16
+    variant = os.getenv(_PREPROC_VARIANT_ENV, "current").strip().lower()
+    default_block_h = 1 if variant in {"channel_split", "two_pass"} else 16
+    default_block_w = 128 if variant in {"channel_split", "two_pass"} else 16
+    BLOCK_H = _read_power_of_two_env(_PREPROC_BLOCK_H_ENV, default_block_h)
+    BLOCK_W = _read_power_of_two_env(_PREPROC_BLOCK_W_ENV, default_block_w)
     grid = (
         (target_h + BLOCK_H - 1) // BLOCK_H,
         (target_w + BLOCK_W - 1) // BLOCK_W,
     )
-    _fused_resize_normalize_kernel[grid](
-        src,
-        out,
-        tables.ymin_gpu,
-        tables.xmin_gpu,
-        tables.wy_gpu,
-        tables.wx_gpu,
-        src_h,
-        src_w,
-        src_stride_h,
-        src_stride_w,
-        dst_stride_c,
-        dst_stride_h,
-        target_h,
-        target_w,
-        float(inv_std_255_r),
-        float(inv_std_255_g),
-        float(inv_std_255_b),
-        float(offset_r),
-        float(offset_g),
-        float(offset_b),
-        CH_R=ch_r,
-        CH_G=ch_g,
-        CH_B=ch_b,
-        KSIZE_Y=tables.ksize_y,
-        KSIZE_X=tables.ksize_x,
-        PRECISION_BITS_C=PRECISION_BITS,
-        HALF_C=_HALF,
-        BLOCK_H=BLOCK_H,
-        BLOCK_W=BLOCK_W,
-    )
+    if variant in {"current", "baseline", ""}:
+        fused_resize_normalize_kernel[grid](
+            src,
+            out,
+            tables.ymin_gpu,
+            tables.xmin_gpu,
+            tables.wy_gpu,
+            tables.wx_gpu,
+            src_h,
+            src_w,
+            src_stride_h,
+            src_stride_w,
+            int(crop_offset_y),
+            int(crop_offset_x),
+            dst_stride_c,
+            dst_stride_h,
+            target_h,
+            target_w,
+            float(inv_std_255_r),
+            float(inv_std_255_g),
+            float(inv_std_255_b),
+            float(offset_r),
+            float(offset_g),
+            float(offset_b),
+            CH_R=ch_r,
+            CH_G=ch_g,
+            CH_B=ch_b,
+            KSIZE_Y=tables.ksize_y,
+            KSIZE_X=tables.ksize_x,
+            PRECISION_BITS_C=PRECISION_BITS,
+            HALF_C=_HALF,
+            BLOCK_H=BLOCK_H,
+            BLOCK_W=BLOCK_W,
+        )
+    elif variant == "channel_split":
+        fused_resize_normalize_channel_kernel[(grid[0], grid[1], 3)](
+            src,
+            out,
+            tables.ymin_gpu,
+            tables.xmin_gpu,
+            tables.wy_gpu,
+            tables.wx_gpu,
+            src_h,
+            src_w,
+            src_stride_h,
+            src_stride_w,
+            int(crop_offset_y),
+            int(crop_offset_x),
+            dst_stride_c,
+            dst_stride_h,
+            target_h,
+            target_w,
+            float(inv_std_255_r),
+            float(inv_std_255_g),
+            float(inv_std_255_b),
+            float(offset_r),
+            float(offset_g),
+            float(offset_b),
+            CH_R=ch_r,
+            CH_G=ch_g,
+            CH_B=ch_b,
+            KSIZE_Y=tables.ksize_y,
+            KSIZE_X=tables.ksize_x,
+            PRECISION_BITS_C=PRECISION_BITS,
+            HALF_C=_HALF,
+            BLOCK_H=BLOCK_H,
+            BLOCK_W=BLOCK_W,
+        )
+    elif variant == "packed":
+        fused_resize_normalize_packed_kernel[grid](
+            src,
+            out,
+            tables.ymin_gpu,
+            tables.xmin_gpu,
+            tables.wy_gpu,
+            tables.wx_gpu,
+            src_h,
+            src_w,
+            src_stride_h,
+            src_stride_w,
+            int(crop_offset_y),
+            int(crop_offset_x),
+            dst_stride_c,
+            dst_stride_h,
+            target_h,
+            target_w,
+            float(inv_std_255_r),
+            float(inv_std_255_g),
+            float(inv_std_255_b),
+            float(offset_r),
+            float(offset_g),
+            float(offset_b),
+            CH_R=ch_r,
+            CH_G=ch_g,
+            CH_B=ch_b,
+            KSIZE_Y=tables.ksize_y,
+            KSIZE_X=tables.ksize_x,
+            PRECISION_BITS_C=PRECISION_BITS,
+            HALF_C=_HALF,
+            BLOCK_H=BLOCK_H,
+            BLOCK_W=BLOCK_W,
+        )
+    elif variant == "two_pass":
+        horizontal_block_h = _read_power_of_two_env(
+            _PREPROC_HORIZONTAL_BLOCK_H_ENV, 1
+        )
+        horizontal_block_w = _read_power_of_two_env(
+            _PREPROC_HORIZONTAL_BLOCK_W_ENV, 128
+        )
+        tmp = torch.empty(
+            (3, src_h, target_w), dtype=torch.uint8, device=src.device
+        )
+        horizontal_grid = (
+            (src_h + horizontal_block_h - 1) // horizontal_block_h,
+            (target_w + horizontal_block_w - 1) // horizontal_block_w,
+        )
+        horizontal_resize_uint8_all_channels_kernel[horizontal_grid](
+            src,
+            tmp,
+            tables.xmin_gpu,
+            tables.wx_gpu,
+            src_h,
+            src_w,
+            src_stride_h,
+            src_stride_w,
+            int(crop_offset_y),
+            int(crop_offset_x),
+            target_w,
+            CH_R=ch_r,
+            CH_G=ch_g,
+            CH_B=ch_b,
+            KSIZE_X=tables.ksize_x,
+            PRECISION_BITS_C=PRECISION_BITS,
+            HALF_C=_HALF,
+            BLOCK_H=horizontal_block_h,
+            BLOCK_W=horizontal_block_w,
+        )
+        vertical_normalize_from_horizontal_kernel[(grid[0], grid[1], 3)](
+            tmp,
+            out,
+            tables.ymin_gpu,
+            tables.wy_gpu,
+            src_h,
+            dst_stride_c,
+            dst_stride_h,
+            target_h,
+            target_w,
+            float(inv_std_255_r),
+            float(inv_std_255_g),
+            float(inv_std_255_b),
+            float(offset_r),
+            float(offset_g),
+            float(offset_b),
+            KSIZE_Y=tables.ksize_y,
+            PRECISION_BITS_C=PRECISION_BITS,
+            HALF_C=_HALF,
+            BLOCK_H=BLOCK_H,
+            BLOCK_W=BLOCK_W,
+        )
+    else:
+        raise ModelRuntimeError(
+            message=(
+                f"Unknown {_PREPROC_VARIANT_ENV}={variant!r}; expected "
+                "'current', 'channel_split', 'packed', or 'two_pass'."
+            ),
+            help_url="https://inference-models.roboflow.com/errors/models-runtime/#modelruntimeerror",
+        )
     return out

From be8e0ffb30a7c34f0bb0a15eb1e061e103f9fba8 Mon Sep 17 00:00:00 2001
From: Aseem Saxena <aseem.bits@gmail.com>
Date: Tue, 2 Jun 2026 01:35:58 +0000
Subject: [PATCH 20/76] Consolidate RF-DETR Triton preproc kernel

---
 .../models/rfdetr/triton_preprocess.py        | 805 +++---------------
 1 file changed, 139 insertions(+), 666 deletions(-)

diff --git a/inference_models/inference_models/models/rfdetr/triton_preprocess.py b/inference_models/inference_models/models/rfdetr/triton_preprocess.py
index 091b71d0c9..b032ccf80a 100644
--- a/inference_models/inference_models/models/rfdetr/triton_preprocess.py
+++ b/inference_models/inference_models/models/rfdetr/triton_preprocess.py
@@ -1,4 +1,4 @@
-"""Fused Triton preprocessing kernel for RF-DETR.
+"""Triton preprocessing kernels for RF-DETR.
 
 Byte-exact port of PIL's separable bilinear-antialias resize (the algorithm
 torchvision's `TF.resize(pil, ..., antialias=True)` uses on PIL inputs), with
@@ -19,12 +19,9 @@
     w_i(o, k)   = round(w_f(o, k) * (1 << PRECISION_BITS))      int32
     out(o)      = clamp((Σ w_i(o, k) * src_u8) + (1 << (PRECISION_BITS-1)) >> PRECISION_BITS, 0, 255)
 
-Single fused kernel: the horizontal uint8 intermediate lives in registers
-rather than a DRAM scratch buffer. For each output tile we loop over
-KSIZE_Y source rows; for each contributing source row we recompute the
-horizontal convolution (int32 fixed-point, uint8 quantize) on the fly,
-multiply by the vertical weight, and accumulate. Final: uint8 quantize,
-BGR↔RGB swap, /255, ImageNet normalize, fp32 CHW store.
+The runtime implementation is the consolidated two-pass path:
+horizontal PIL-antialias resize into a uint8 CHW scratch buffer, followed by
+the vertical pass plus `/255` + ImageNet normalization into fp32 CHW output.
 """
 
 from __future__ import annotations
@@ -54,7 +51,6 @@
 
 
 PRECISION_BITS = 22
-_PREPROC_VARIANT_ENV = "INFERENCE_MODELS_RFDETR_TRITON_PREPROC_VARIANT"
 _PREPROC_BLOCK_H_ENV = "INFERENCE_MODELS_RFDETR_TRITON_PREPROC_BLOCK_H"
 _PREPROC_BLOCK_W_ENV = "INFERENCE_MODELS_RFDETR_TRITON_PREPROC_BLOCK_W"
 _PREPROC_HORIZONTAL_BLOCK_H_ENV = (
@@ -130,474 +126,6 @@ def _bilinear_antialias_weights_1d_int(
 
     _HALF = 1 << (PRECISION_BITS - 1)
 
-    @triton.jit
-    def fused_resize_normalize_kernel(
-        src_ptr,
-        dst_ptr,
-        ymin_ptr,
-        xmin_ptr,
-        wy_ptr,
-        wx_ptr,
-        src_h,
-        src_w,
-        src_stride_h,
-        src_stride_w,
-        crop_offset_y,
-        crop_offset_x,
-        dst_stride_c,
-        dst_stride_h,
-        target_h,
-        target_w,
-        inv_std_255_r,
-        inv_std_255_g,
-        inv_std_255_b,
-        offset_r,
-        offset_g,
-        offset_b,
-        CH_R: tl.constexpr,
-        CH_G: tl.constexpr,
-        CH_B: tl.constexpr,
-        KSIZE_Y: tl.constexpr,
-        KSIZE_X: tl.constexpr,
-        PRECISION_BITS_C: tl.constexpr,
-        HALF_C: tl.constexpr,
-        BLOCK_H: tl.constexpr,
-        BLOCK_W: tl.constexpr,
-    ):
-        """One kernel per (tile_y, tile_x) over target image.
-
-        In : src uint8 HWC (src_h, src_w, 3), source color order. The
-             resample tables are built against `(crop_h, crop_w)` — the
-             logical source size after a possible static crop — which the
-             caller passes as `src_h`/`src_w`. `crop_offset_{y,x}` is the
-             load-time offset into the raw HWC buffer.
-        Out: dst fp32 CHW (1, 3, target_h, target_w), network color order,
-             (pixel/255 - mean)/std.
-        """
-        pid_y = tl.program_id(0)
-        pid_x = tl.program_id(1)
-
-        offs_y = pid_y * BLOCK_H + tl.arange(0, BLOCK_H)
-        offs_x = pid_x * BLOCK_W + tl.arange(0, BLOCK_W)
-        mask_y = offs_y < target_h
-        mask_x = offs_x < target_w
-        mask_out = mask_y[:, None] & mask_x[None, :]
-
-        ymin = tl.load(ymin_ptr + offs_y, mask=mask_y, other=0)
-        xmin = tl.load(xmin_ptr + offs_x, mask=mask_x, other=0)
-
-        # Vertical pass accumulators (int32 fixed-point) for 3 channels.
-        vacc_0 = tl.zeros((BLOCK_H, BLOCK_W), dtype=tl.int32)
-        vacc_1 = tl.zeros((BLOCK_H, BLOCK_W), dtype=tl.int32)
-        vacc_2 = tl.zeros((BLOCK_H, BLOCK_W), dtype=tl.int32)
-
-        for ky in tl.static_range(KSIZE_Y):
-            # Source row (after static crop) contributing to each output row.
-            sy = ymin + ky
-            sy_c = tl.maximum(tl.minimum(sy, src_h - 1), 0) + crop_offset_y
-            wy = tl.load(wy_ptr + offs_y * KSIZE_Y + ky, mask=mask_y, other=0)
-
-            # Horizontal pass for (output_rows_in_tile, output_cols_in_tile):
-            # for each source column in the kernel, gather src[sy_c, sx_c, :]
-            # and accumulate with wx[output_col, kx].
-            hacc_0 = tl.zeros((BLOCK_H, BLOCK_W), dtype=tl.int32)
-            hacc_1 = tl.zeros((BLOCK_H, BLOCK_W), dtype=tl.int32)
-            hacc_2 = tl.zeros((BLOCK_H, BLOCK_W), dtype=tl.int32)
-
-            for kx in tl.static_range(KSIZE_X):
-                sx = xmin + kx
-                sx_c = tl.maximum(tl.minimum(sx, src_w - 1), 0) + crop_offset_x
-                wx = tl.load(wx_ptr + offs_x * KSIZE_X + kx, mask=mask_x, other=0)
-                base = sy_c[:, None] * src_stride_h + sx_c[None, :] * src_stride_w
-                p0 = tl.load(src_ptr + base + 0, mask=mask_out, other=0).to(tl.int32)
-                p1 = tl.load(src_ptr + base + 1, mask=mask_out, other=0).to(tl.int32)
-                p2 = tl.load(src_ptr + base + 2, mask=mask_out, other=0).to(tl.int32)
-                wx_2d = wx[None, :]
-                hacc_0 += p0 * wx_2d
-                hacc_1 += p1 * wx_2d
-                hacc_2 += p2 * wx_2d
-
-            # Horizontal uint8 quantization (byte-exact to PIL's intermediate).
-            hacc_0 = (hacc_0 + HALF_C) >> PRECISION_BITS_C
-            hacc_1 = (hacc_1 + HALF_C) >> PRECISION_BITS_C
-            hacc_2 = (hacc_2 + HALF_C) >> PRECISION_BITS_C
-            hacc_0 = tl.minimum(tl.maximum(hacc_0, 0), 255)
-            hacc_1 = tl.minimum(tl.maximum(hacc_1, 0), 255)
-            hacc_2 = tl.minimum(tl.maximum(hacc_2, 0), 255)
-
-            wy_2d = wy[:, None]
-            vacc_0 += hacc_0 * wy_2d
-            vacc_1 += hacc_1 * wy_2d
-            vacc_2 += hacc_2 * wy_2d
-
-        # Vertical uint8 quantization.
-        q_0 = (vacc_0 + HALF_C) >> PRECISION_BITS_C
-        q_1 = (vacc_1 + HALF_C) >> PRECISION_BITS_C
-        q_2 = (vacc_2 + HALF_C) >> PRECISION_BITS_C
-        q_0 = tl.minimum(tl.maximum(q_0, 0), 255)
-        q_1 = tl.minimum(tl.maximum(q_1, 0), 255)
-        q_2 = tl.minimum(tl.maximum(q_2, 0), 255)
-
-        # Source-to-output channel remap (triton requires constexpr branches).
-        if CH_R == 0:
-            q_r = q_0
-        elif CH_R == 1:
-            q_r = q_1
-        else:
-            q_r = q_2
-        if CH_G == 0:
-            q_g = q_0
-        elif CH_G == 1:
-            q_g = q_1
-        else:
-            q_g = q_2
-        if CH_B == 0:
-            q_b = q_0
-        elif CH_B == 1:
-            q_b = q_1
-        else:
-            q_b = q_2
-
-        # (pixel/255 - mean)/std  ==  pixel * (1/(255*std)) + (-mean/std)
-        out_r = q_r.to(tl.float32) * inv_std_255_r + offset_r
-        out_g = q_g.to(tl.float32) * inv_std_255_g + offset_g
-        out_b = q_b.to(tl.float32) * inv_std_255_b + offset_b
-
-        out_row = offs_y[:, None] * dst_stride_h + offs_x[None, :]
-        tl.store(dst_ptr + 0 * dst_stride_c + out_row, out_r, mask=mask_out)
-        tl.store(dst_ptr + 1 * dst_stride_c + out_row, out_g, mask=mask_out)
-        tl.store(dst_ptr + 2 * dst_stride_c + out_row, out_b, mask=mask_out)
-
-    @triton.jit
-    def fused_resize_normalize_channel_kernel(
-        src_ptr,
-        dst_ptr,
-        ymin_ptr,
-        xmin_ptr,
-        wy_ptr,
-        wx_ptr,
-        src_h,
-        src_w,
-        src_stride_h,
-        src_stride_w,
-        crop_offset_y,
-        crop_offset_x,
-        dst_stride_c,
-        dst_stride_h,
-        target_h,
-        target_w,
-        inv_std_255_r,
-        inv_std_255_g,
-        inv_std_255_b,
-        offset_r,
-        offset_g,
-        offset_b,
-        CH_R: tl.constexpr,
-        CH_G: tl.constexpr,
-        CH_B: tl.constexpr,
-        KSIZE_Y: tl.constexpr,
-        KSIZE_X: tl.constexpr,
-        PRECISION_BITS_C: tl.constexpr,
-        HALF_C: tl.constexpr,
-        BLOCK_H: tl.constexpr,
-        BLOCK_W: tl.constexpr,
-    ):
-        """Channel-split variant: one program computes one output channel."""
-        pid_y = tl.program_id(0)
-        pid_x = tl.program_id(1)
-        pid_c = tl.program_id(2)
-
-        offs_y = pid_y * BLOCK_H + tl.arange(0, BLOCK_H)
-        offs_x = pid_x * BLOCK_W + tl.arange(0, BLOCK_W)
-        mask_y = offs_y < target_h
-        mask_x = offs_x < target_w
-        mask_out = mask_y[:, None] & mask_x[None, :]
-
-        ymin = tl.load(ymin_ptr + offs_y, mask=mask_y, other=0)
-        xmin = tl.load(xmin_ptr + offs_x, mask=mask_x, other=0)
-
-        src_ch = tl.where(pid_c == 0, CH_R, tl.where(pid_c == 1, CH_G, CH_B))
-        inv_std_255 = tl.where(
-            pid_c == 0,
-            inv_std_255_r,
-            tl.where(pid_c == 1, inv_std_255_g, inv_std_255_b),
-        )
-        offset = tl.where(
-            pid_c == 0,
-            offset_r,
-            tl.where(pid_c == 1, offset_g, offset_b),
-        )
-
-        vacc = tl.zeros((BLOCK_H, BLOCK_W), dtype=tl.int32)
-
-        for ky in tl.static_range(KSIZE_Y):
-            sy = ymin + ky
-            sy_c = tl.maximum(tl.minimum(sy, src_h - 1), 0) + crop_offset_y
-            wy = tl.load(wy_ptr + offs_y * KSIZE_Y + ky, mask=mask_y, other=0)
-
-            hacc = tl.zeros((BLOCK_H, BLOCK_W), dtype=tl.int32)
-
-            for kx in tl.static_range(KSIZE_X):
-                sx = xmin + kx
-                sx_c = tl.maximum(tl.minimum(sx, src_w - 1), 0) + crop_offset_x
-                wx = tl.load(wx_ptr + offs_x * KSIZE_X + kx, mask=mask_x, other=0)
-                base = sy_c[:, None] * src_stride_h + sx_c[None, :] * src_stride_w
-                p = tl.load(src_ptr + base + src_ch, mask=mask_out, other=0).to(
-                    tl.int32
-                )
-                hacc += p * wx[None, :]
-
-            hacc = (hacc + HALF_C) >> PRECISION_BITS_C
-            hacc = tl.minimum(tl.maximum(hacc, 0), 255)
-
-            vacc += hacc * wy[:, None]
-
-        q = (vacc + HALF_C) >> PRECISION_BITS_C
-        q = tl.minimum(tl.maximum(q, 0), 255)
-        out = q.to(tl.float32) * inv_std_255 + offset
-
-        out_row = offs_y[:, None] * dst_stride_h + offs_x[None, :]
-        tl.store(dst_ptr + pid_c * dst_stride_c + out_row, out, mask=mask_out)
-
-    @triton.jit
-    def fused_resize_normalize_packed_kernel(
-        src_ptr,
-        dst_ptr,
-        ymin_ptr,
-        xmin_ptr,
-        wy_ptr,
-        wx_ptr,
-        src_h,
-        src_w,
-        src_stride_h,
-        src_stride_w,
-        crop_offset_y,
-        crop_offset_x,
-        dst_stride_c,
-        dst_stride_h,
-        target_h,
-        target_w,
-        inv_std_255_r,
-        inv_std_255_g,
-        inv_std_255_b,
-        offset_r,
-        offset_g,
-        offset_b,
-        CH_R: tl.constexpr,
-        CH_G: tl.constexpr,
-        CH_B: tl.constexpr,
-        KSIZE_Y: tl.constexpr,
-        KSIZE_X: tl.constexpr,
-        PRECISION_BITS_C: tl.constexpr,
-        HALF_C: tl.constexpr,
-        BLOCK_H: tl.constexpr,
-        BLOCK_W: tl.constexpr,
-    ):
-        """All-channel variant using one unaligned u32 load per HWC pixel."""
-        pid_y = tl.program_id(0)
-        pid_x = tl.program_id(1)
-
-        offs_y = pid_y * BLOCK_H + tl.arange(0, BLOCK_H)
-        offs_x = pid_x * BLOCK_W + tl.arange(0, BLOCK_W)
-        mask_y = offs_y < target_h
-        mask_x = offs_x < target_w
-        mask_out = mask_y[:, None] & mask_x[None, :]
-
-        ymin = tl.load(ymin_ptr + offs_y, mask=mask_y, other=0)
-        xmin = tl.load(xmin_ptr + offs_x, mask=mask_x, other=0)
-
-        # src is contiguous HWC, so stride_h / stride_w recovers the raw width.
-        # u32 load is safe for every pixel except the final raw column.
-        raw_src_w = src_stride_h // src_stride_w
-
-        vacc_0 = tl.zeros((BLOCK_H, BLOCK_W), dtype=tl.int32)
-        vacc_1 = tl.zeros((BLOCK_H, BLOCK_W), dtype=tl.int32)
-        vacc_2 = tl.zeros((BLOCK_H, BLOCK_W), dtype=tl.int32)
-
-        for ky in tl.static_range(KSIZE_Y):
-            sy = ymin + ky
-            sy_c = tl.maximum(tl.minimum(sy, src_h - 1), 0) + crop_offset_y
-            wy = tl.load(wy_ptr + offs_y * KSIZE_Y + ky, mask=mask_y, other=0)
-
-            hacc_0 = tl.zeros((BLOCK_H, BLOCK_W), dtype=tl.int32)
-            hacc_1 = tl.zeros((BLOCK_H, BLOCK_W), dtype=tl.int32)
-            hacc_2 = tl.zeros((BLOCK_H, BLOCK_W), dtype=tl.int32)
-
-            for kx in tl.static_range(KSIZE_X):
-                sx = xmin + kx
-                sx_c = tl.maximum(tl.minimum(sx, src_w - 1), 0) + crop_offset_x
-                wx = tl.load(wx_ptr + offs_x * KSIZE_X + kx, mask=mask_x, other=0)
-                base = sy_c[:, None] * src_stride_h + sx_c[None, :] * src_stride_w
-
-                base_mod = base & 3
-                aligned_base = base - base_mod
-                not_last_col = sx_c[None, :] < (raw_src_w - 1)
-                use_packed = not_last_col | (base_mod == 1)
-
-                word_0 = tl.load(
-                    (src_ptr + aligned_base).to(tl.pointer_type(tl.uint32)),
-                    mask=mask_out & use_packed,
-                    other=0,
-                )
-                word_1 = tl.load(
-                    (src_ptr + aligned_base + 4).to(tl.pointer_type(tl.uint32)),
-                    mask=mask_out & use_packed & (base_mod >= 2),
-                    other=0,
-                )
-
-                word_0_b0 = (word_0 & 0xFF).to(tl.int32)
-                word_0_b1 = ((word_0 >> 8) & 0xFF).to(tl.int32)
-                word_0_b2 = ((word_0 >> 16) & 0xFF).to(tl.int32)
-                word_0_b3 = ((word_0 >> 24) & 0xFF).to(tl.int32)
-                word_1_b0 = (word_1 & 0xFF).to(tl.int32)
-                word_1_b1 = ((word_1 >> 8) & 0xFF).to(tl.int32)
-
-                packed_0 = tl.where(
-                    base_mod == 0,
-                    word_0_b0,
-                    tl.where(
-                        base_mod == 1,
-                        word_0_b1,
-                        tl.where(base_mod == 2, word_0_b2, word_0_b3),
-                    ),
-                )
-                packed_1 = tl.where(
-                    base_mod == 0,
-                    word_0_b1,
-                    tl.where(
-                        base_mod == 1,
-                        word_0_b2,
-                        tl.where(base_mod == 2, word_0_b3, word_1_b0),
-                    ),
-                )
-                packed_2 = tl.where(
-                    base_mod == 0,
-                    word_0_b2,
-                    tl.where(
-                        base_mod == 1,
-                        word_0_b3,
-                        tl.where(base_mod == 2, word_1_b0, word_1_b1),
-                    ),
-                )
-
-                fallback_mask = mask_out & ~use_packed
-                byte_0 = tl.load(src_ptr + base + 0, mask=fallback_mask, other=0).to(
-                    tl.int32
-                )
-                byte_1 = tl.load(src_ptr + base + 1, mask=fallback_mask, other=0).to(
-                    tl.int32
-                )
-                byte_2 = tl.load(src_ptr + base + 2, mask=fallback_mask, other=0).to(
-                    tl.int32
-                )
-                p0 = tl.where(use_packed, packed_0, byte_0)
-                p1 = tl.where(use_packed, packed_1, byte_1)
-                p2 = tl.where(use_packed, packed_2, byte_2)
-
-                wx_2d = wx[None, :]
-                hacc_0 += p0 * wx_2d
-                hacc_1 += p1 * wx_2d
-                hacc_2 += p2 * wx_2d
-
-            hacc_0 = (hacc_0 + HALF_C) >> PRECISION_BITS_C
-            hacc_1 = (hacc_1 + HALF_C) >> PRECISION_BITS_C
-            hacc_2 = (hacc_2 + HALF_C) >> PRECISION_BITS_C
-            hacc_0 = tl.minimum(tl.maximum(hacc_0, 0), 255)
-            hacc_1 = tl.minimum(tl.maximum(hacc_1, 0), 255)
-            hacc_2 = tl.minimum(tl.maximum(hacc_2, 0), 255)
-
-            wy_2d = wy[:, None]
-            vacc_0 += hacc_0 * wy_2d
-            vacc_1 += hacc_1 * wy_2d
-            vacc_2 += hacc_2 * wy_2d
-
-        q_0 = (vacc_0 + HALF_C) >> PRECISION_BITS_C
-        q_1 = (vacc_1 + HALF_C) >> PRECISION_BITS_C
-        q_2 = (vacc_2 + HALF_C) >> PRECISION_BITS_C
-        q_0 = tl.minimum(tl.maximum(q_0, 0), 255)
-        q_1 = tl.minimum(tl.maximum(q_1, 0), 255)
-        q_2 = tl.minimum(tl.maximum(q_2, 0), 255)
-
-        if CH_R == 0:
-            q_r = q_0
-        elif CH_R == 1:
-            q_r = q_1
-        else:
-            q_r = q_2
-        if CH_G == 0:
-            q_g = q_0
-        elif CH_G == 1:
-            q_g = q_1
-        else:
-            q_g = q_2
-        if CH_B == 0:
-            q_b = q_0
-        elif CH_B == 1:
-            q_b = q_1
-        else:
-            q_b = q_2
-
-        out_r = q_r.to(tl.float32) * inv_std_255_r + offset_r
-        out_g = q_g.to(tl.float32) * inv_std_255_g + offset_g
-        out_b = q_b.to(tl.float32) * inv_std_255_b + offset_b
-
-        out_row = offs_y[:, None] * dst_stride_h + offs_x[None, :]
-        tl.store(dst_ptr + 0 * dst_stride_c + out_row, out_r, mask=mask_out)
-        tl.store(dst_ptr + 1 * dst_stride_c + out_row, out_g, mask=mask_out)
-        tl.store(dst_ptr + 2 * dst_stride_c + out_row, out_b, mask=mask_out)
-
-    @triton.jit
-    def horizontal_resize_uint8_kernel(
-        src_ptr,
-        tmp_ptr,
-        xmin_ptr,
-        wx_ptr,
-        src_h,
-        src_w,
-        src_stride_h,
-        src_stride_w,
-        crop_offset_y,
-        crop_offset_x,
-        target_w,
-        CH_R: tl.constexpr,
-        CH_G: tl.constexpr,
-        CH_B: tl.constexpr,
-        KSIZE_X: tl.constexpr,
-        PRECISION_BITS_C: tl.constexpr,
-        HALF_C: tl.constexpr,
-        BLOCK_H: tl.constexpr,
-        BLOCK_W: tl.constexpr,
-    ):
-        """Horizontal PIL-antialias pass into uint8 CHW scratch."""
-        pid_y = tl.program_id(0)
-        pid_x = tl.program_id(1)
-        pid_c = tl.program_id(2)
-
-        offs_y = pid_y * BLOCK_H + tl.arange(0, BLOCK_H)
-        offs_x = pid_x * BLOCK_W + tl.arange(0, BLOCK_W)
-        mask_y = offs_y < src_h
-        mask_x = offs_x < target_w
-        mask_out = mask_y[:, None] & mask_x[None, :]
-
-        xmin = tl.load(xmin_ptr + offs_x, mask=mask_x, other=0)
-        src_ch = tl.where(pid_c == 0, CH_R, tl.where(pid_c == 1, CH_G, CH_B))
-        sy = offs_y + crop_offset_y
-
-        hacc = tl.zeros((BLOCK_H, BLOCK_W), dtype=tl.int32)
-        for kx in tl.static_range(KSIZE_X):
-            sx = xmin + kx
-            sx_c = tl.maximum(tl.minimum(sx, src_w - 1), 0) + crop_offset_x
-            wx = tl.load(wx_ptr + offs_x * KSIZE_X + kx, mask=mask_x, other=0)
-            base = sy[:, None] * src_stride_h + sx_c[None, :] * src_stride_w
-            p = tl.load(src_ptr + base + src_ch, mask=mask_out, other=0).to(tl.int32)
-            hacc += p * wx[None, :]
-
-        q = (hacc + HALF_C) >> PRECISION_BITS_C
-        q = tl.minimum(tl.maximum(q, 0), 255)
-
-        out_row = offs_y[:, None] * target_w + offs_x[None, :]
-        tl.store(tmp_ptr + pid_c * src_h * target_w + out_row, q, mask=mask_out)
-
     @triton.jit
     def horizontal_resize_uint8_all_channels_kernel(
         src_ptr,
@@ -757,6 +285,15 @@ def __init__(
         self.ksize_x = ksize_x
 
 
+def resolve_two_pass_launch_config() -> Tuple[int, int, int, int]:
+    return (
+        _read_power_of_two_env(_PREPROC_BLOCK_H_ENV, 1),
+        _read_power_of_two_env(_PREPROC_BLOCK_W_ENV, 128),
+        _read_power_of_two_env(_PREPROC_HORIZONTAL_BLOCK_H_ENV, 1),
+        _read_power_of_two_env(_PREPROC_HORIZONTAL_BLOCK_W_ENV, 128),
+    )
+
+
 def build_resample_tables(
     src_h: int,
     src_w: int,
@@ -776,6 +313,98 @@ def build_resample_tables(
     )
 
 
+def triton_preprocess_rfdetr_stretch_two_pass_preallocated(
+    src: torch.Tensor,
+    out: torch.Tensor,
+    tmp: torch.Tensor,
+    tables: ResampleTables,
+    target_h: int,
+    target_w: int,
+    means: Tuple[float, float, float],
+    stds: Tuple[float, float, float],
+    swap_rb: bool,
+    launch_config: Tuple[int, int, int, int],
+    crop_offset_y: int = 0,
+    crop_offset_x: int = 0,
+    crop_h: Optional[int] = None,
+    crop_w: Optional[int] = None,
+) -> torch.Tensor:
+    """Hot two-pass launch path for already validated/preallocated tensors."""
+    raw_src_h, raw_src_w = int(src.shape[0]), int(src.shape[1])
+    src_h = crop_h if crop_h is not None else raw_src_h
+    src_w = crop_w if crop_w is not None else raw_src_w
+    src_stride_h = int(src.stride(0))
+    src_stride_w = int(src.stride(1))
+    dst_stride_c = target_h * target_w
+    dst_stride_h = target_w
+
+    if swap_rb:
+        ch_r, ch_g, ch_b = 2, 1, 0
+    else:
+        ch_r, ch_g, ch_b = 0, 1, 2
+
+    inv_std_255_r = 1.0 / (255.0 * stds[0])
+    inv_std_255_g = 1.0 / (255.0 * stds[1])
+    inv_std_255_b = 1.0 / (255.0 * stds[2])
+    offset_r = -means[0] / stds[0]
+    offset_g = -means[1] / stds[1]
+    offset_b = -means[2] / stds[2]
+    block_h, block_w, horizontal_block_h, horizontal_block_w = launch_config
+
+    horizontal_grid = (
+        (src_h + horizontal_block_h - 1) // horizontal_block_h,
+        (target_w + horizontal_block_w - 1) // horizontal_block_w,
+    )
+    horizontal_resize_uint8_all_channels_kernel[horizontal_grid](
+        src,
+        tmp,
+        tables.xmin_gpu,
+        tables.wx_gpu,
+        src_h,
+        src_w,
+        src_stride_h,
+        src_stride_w,
+        int(crop_offset_y),
+        int(crop_offset_x),
+        target_w,
+        CH_R=ch_r,
+        CH_G=ch_g,
+        CH_B=ch_b,
+        KSIZE_X=tables.ksize_x,
+        PRECISION_BITS_C=PRECISION_BITS,
+        HALF_C=_HALF,
+        BLOCK_H=horizontal_block_h,
+        BLOCK_W=horizontal_block_w,
+    )
+    grid = (
+        (target_h + block_h - 1) // block_h,
+        (target_w + block_w - 1) // block_w,
+    )
+    vertical_normalize_from_horizontal_kernel[(grid[0], grid[1], 3)](
+        tmp,
+        out,
+        tables.ymin_gpu,
+        tables.wy_gpu,
+        src_h,
+        dst_stride_c,
+        dst_stride_h,
+        target_h,
+        target_w,
+        float(inv_std_255_r),
+        float(inv_std_255_g),
+        float(inv_std_255_b),
+        float(offset_r),
+        float(offset_g),
+        float(offset_b),
+        KSIZE_Y=tables.ksize_y,
+        PRECISION_BITS_C=PRECISION_BITS,
+        HALF_C=_HALF,
+        BLOCK_H=block_h,
+        BLOCK_W=block_w,
+    )
+    return out
+
+
 def triton_preprocess_rfdetr_stretch(
     src: torch.Tensor,
     tables: ResampleTables,
@@ -789,8 +418,9 @@ def triton_preprocess_rfdetr_stretch(
     crop_h: Optional[int] = None,
     crop_w: Optional[int] = None,
     out: Optional[torch.Tensor] = None,
+    tmp: Optional[torch.Tensor] = None,
 ) -> torch.Tensor:
-    """Fused PIL-exact resize + color swap + normalize.
+    """PIL-exact resize + color swap + normalize using the two-pass kernels.
 
     Args:
         src: uint8 CUDA tensor, shape (H, W, 3), HWC layout.
@@ -805,6 +435,8 @@ def triton_preprocess_rfdetr_stretch(
         crop_h/_w: effective source dims after crop. Defaults to src dims
             when no crop is configured.
         out: optional preallocated fp32 (1, 3, H, W) CUDA tensor.
+        tmp: optional preallocated uint8 (3, crop_h/raw_h, target_w) CUDA tensor
+            used by the horizontal pass.
 
     Returns:
         fp32 (1, 3, target_h, target_w) on the same device as `src`.
@@ -856,195 +488,36 @@ def triton_preprocess_rfdetr_stretch(
                 help_url="https://inference-models.roboflow.com/errors/models-runtime/#modelruntimeerror",
             )
 
-    dst_stride_c = target_h * target_w
-    dst_stride_h = target_w
-
-    if swap_rb:
-        ch_r, ch_g, ch_b = 2, 1, 0
+    if tmp is None:
+        tmp = torch.empty((3, src_h, target_w), dtype=torch.uint8, device=src.device)
     else:
-        ch_r, ch_g, ch_b = 0, 1, 2
-
-    inv_std_255_r = 1.0 / (255.0 * stds[0])
-    inv_std_255_g = 1.0 / (255.0 * stds[1])
-    inv_std_255_b = 1.0 / (255.0 * stds[2])
-    offset_r = -means[0] / stds[0]
-    offset_g = -means[1] / stds[1]
-    offset_b = -means[2] / stds[2]
+        if tuple(tmp.shape) != (3, src_h, target_w):
+            raise ModelRuntimeError(
+                message=(
+                    f"tmp has shape {tuple(tmp.shape)}, expected "
+                    f"(3, {src_h}, {target_w})"
+                ),
+                help_url="https://inference-models.roboflow.com/errors/models-runtime/#modelruntimeerror",
+            )
+        if tmp.dtype != torch.uint8 or not tmp.is_cuda:
+            raise ModelRuntimeError(
+                message="tmp must be uint8 CUDA tensor",
+                help_url="https://inference-models.roboflow.com/errors/models-runtime/#modelruntimeerror",
+            )
 
-    variant = os.getenv(_PREPROC_VARIANT_ENV, "current").strip().lower()
-    default_block_h = 1 if variant in {"channel_split", "two_pass"} else 16
-    default_block_w = 128 if variant in {"channel_split", "two_pass"} else 16
-    BLOCK_H = _read_power_of_two_env(_PREPROC_BLOCK_H_ENV, default_block_h)
-    BLOCK_W = _read_power_of_two_env(_PREPROC_BLOCK_W_ENV, default_block_w)
-    grid = (
-        (target_h + BLOCK_H - 1) // BLOCK_H,
-        (target_w + BLOCK_W - 1) // BLOCK_W,
+    return triton_preprocess_rfdetr_stretch_two_pass_preallocated(
+        src=src,
+        out=out,
+        tmp=tmp,
+        tables=tables,
+        target_h=target_h,
+        target_w=target_w,
+        means=means,
+        stds=stds,
+        swap_rb=swap_rb,
+        launch_config=resolve_two_pass_launch_config(),
+        crop_offset_y=crop_offset_y,
+        crop_offset_x=crop_offset_x,
+        crop_h=crop_h,
+        crop_w=crop_w,
     )
-    if variant in {"current", "baseline", ""}:
-        fused_resize_normalize_kernel[grid](
-            src,
-            out,
-            tables.ymin_gpu,
-            tables.xmin_gpu,
-            tables.wy_gpu,
-            tables.wx_gpu,
-            src_h,
-            src_w,
-            src_stride_h,
-            src_stride_w,
-            int(crop_offset_y),
-            int(crop_offset_x),
-            dst_stride_c,
-            dst_stride_h,
-            target_h,
-            target_w,
-            float(inv_std_255_r),
-            float(inv_std_255_g),
-            float(inv_std_255_b),
-            float(offset_r),
-            float(offset_g),
-            float(offset_b),
-            CH_R=ch_r,
-            CH_G=ch_g,
-            CH_B=ch_b,
-            KSIZE_Y=tables.ksize_y,
-            KSIZE_X=tables.ksize_x,
-            PRECISION_BITS_C=PRECISION_BITS,
-            HALF_C=_HALF,
-            BLOCK_H=BLOCK_H,
-            BLOCK_W=BLOCK_W,
-        )
-    elif variant == "channel_split":
-        fused_resize_normalize_channel_kernel[(grid[0], grid[1], 3)](
-            src,
-            out,
-            tables.ymin_gpu,
-            tables.xmin_gpu,
-            tables.wy_gpu,
-            tables.wx_gpu,
-            src_h,
-            src_w,
-            src_stride_h,
-            src_stride_w,
-            int(crop_offset_y),
-            int(crop_offset_x),
-            dst_stride_c,
-            dst_stride_h,
-            target_h,
-            target_w,
-            float(inv_std_255_r),
-            float(inv_std_255_g),
-            float(inv_std_255_b),
-            float(offset_r),
-            float(offset_g),
-            float(offset_b),
-            CH_R=ch_r,
-            CH_G=ch_g,
-            CH_B=ch_b,
-            KSIZE_Y=tables.ksize_y,
-            KSIZE_X=tables.ksize_x,
-            PRECISION_BITS_C=PRECISION_BITS,
-            HALF_C=_HALF,
-            BLOCK_H=BLOCK_H,
-            BLOCK_W=BLOCK_W,
-        )
-    elif variant == "packed":
-        fused_resize_normalize_packed_kernel[grid](
-            src,
-            out,
-            tables.ymin_gpu,
-            tables.xmin_gpu,
-            tables.wy_gpu,
-            tables.wx_gpu,
-            src_h,
-            src_w,
-            src_stride_h,
-            src_stride_w,
-            int(crop_offset_y),
-            int(crop_offset_x),
-            dst_stride_c,
-            dst_stride_h,
-            target_h,
-            target_w,
-            float(inv_std_255_r),
-            float(inv_std_255_g),
-            float(inv_std_255_b),
-            float(offset_r),
-            float(offset_g),
-            float(offset_b),
-            CH_R=ch_r,
-            CH_G=ch_g,
-            CH_B=ch_b,
-            KSIZE_Y=tables.ksize_y,
-            KSIZE_X=tables.ksize_x,
-            PRECISION_BITS_C=PRECISION_BITS,
-            HALF_C=_HALF,
-            BLOCK_H=BLOCK_H,
-            BLOCK_W=BLOCK_W,
-        )
-    elif variant == "two_pass":
-        horizontal_block_h = _read_power_of_two_env(
-            _PREPROC_HORIZONTAL_BLOCK_H_ENV, 1
-        )
-        horizontal_block_w = _read_power_of_two_env(
-            _PREPROC_HORIZONTAL_BLOCK_W_ENV, 128
-        )
-        tmp = torch.empty(
-            (3, src_h, target_w), dtype=torch.uint8, device=src.device
-        )
-        horizontal_grid = (
-            (src_h + horizontal_block_h - 1) // horizontal_block_h,
-            (target_w + horizontal_block_w - 1) // horizontal_block_w,
-        )
-        horizontal_resize_uint8_all_channels_kernel[horizontal_grid](
-            src,
-            tmp,
-            tables.xmin_gpu,
-            tables.wx_gpu,
-            src_h,
-            src_w,
-            src_stride_h,
-            src_stride_w,
-            int(crop_offset_y),
-            int(crop_offset_x),
-            target_w,
-            CH_R=ch_r,
-            CH_G=ch_g,
-            CH_B=ch_b,
-            KSIZE_X=tables.ksize_x,
-            PRECISION_BITS_C=PRECISION_BITS,
-            HALF_C=_HALF,
-            BLOCK_H=horizontal_block_h,
-            BLOCK_W=horizontal_block_w,
-        )
-        vertical_normalize_from_horizontal_kernel[(grid[0], grid[1], 3)](
-            tmp,
-            out,
-            tables.ymin_gpu,
-            tables.wy_gpu,
-            src_h,
-            dst_stride_c,
-            dst_stride_h,
-            target_h,
-            target_w,
-            float(inv_std_255_r),
-            float(inv_std_255_g),
-            float(inv_std_255_b),
-            float(offset_r),
-            float(offset_g),
-            float(offset_b),
-            KSIZE_Y=tables.ksize_y,
-            PRECISION_BITS_C=PRECISION_BITS,
-            HALF_C=_HALF,
-            BLOCK_H=BLOCK_H,
-            BLOCK_W=BLOCK_W,
-        )
-    else:
-        raise ModelRuntimeError(
-            message=(
-                f"Unknown {_PREPROC_VARIANT_ENV}={variant!r}; expected "
-                "'current', 'channel_split', 'packed', or 'two_pass'."
-            ),
-            help_url="https://inference-models.roboflow.com/errors/models-runtime/#modelruntimeerror",
-        )
-    return out

From 488a703aa08c7156b412a19fd8b2c779b6995e26 Mon Sep 17 00:00:00 2001
From: Aseem Saxena <aseem.bits@gmail.com>
Date: Tue, 2 Jun 2026 02:53:31 +0000
Subject: [PATCH 21/76] Align Triton preproc env integration

---
 inference_models/inference_models/configuration.py  |  9 +++++++++
 .../rfdetr/rfdetr_instance_segmentation_trt.py      | 13 +++----------
 2 files changed, 12 insertions(+), 10 deletions(-)

diff --git a/inference_models/inference_models/configuration.py b/inference_models/inference_models/configuration.py
index 9350627ce1..92d292c555 100644
--- a/inference_models/inference_models/configuration.py
+++ b/inference_models/inference_models/configuration.py
@@ -294,6 +294,15 @@
     variable_name="INFERENCE_MODELS_RFDETR_TRITON_POSTPROC_ENABLED",
     default=DEFAULT_INFERENCE_MODELS_RFDETR_TRITON_POSTPROC_ENABLED,
 )
+DEFAULT_INFERENCE_MODELS_RFDETR_TRITON_PREPROC_ENABLED = False
+_LEGACY_USE_TRITON_FOR_PREPROCESSING = get_boolean_from_env(
+    variable_name="USE_TRITON_FOR_PREPROCESSING",
+    default=DEFAULT_INFERENCE_MODELS_RFDETR_TRITON_PREPROC_ENABLED,
+)
+INFERENCE_MODELS_RFDETR_TRITON_PREPROC_ENABLED = get_boolean_from_env(
+    variable_name="INFERENCE_MODELS_RFDETR_TRITON_PREPROC_ENABLED",
+    default=_LEGACY_USE_TRITON_FOR_PREPROCESSING,
+)
 INFERENCE_MODELS_ROBOFLOW_INSTANT_DEFAULT_CONFIDENCE = get_float_from_env(
     variable_name="INFERENCE_MODELS_ROBOFLOW_INSTANT_DEFAULT_CONFIDENCE",
     default=0.99,
diff --git a/inference_models/inference_models/models/rfdetr/rfdetr_instance_segmentation_trt.py b/inference_models/inference_models/models/rfdetr/rfdetr_instance_segmentation_trt.py
index 6b2d98db11..5e94590917 100644
--- a/inference_models/inference_models/models/rfdetr/rfdetr_instance_segmentation_trt.py
+++ b/inference_models/inference_models/models/rfdetr/rfdetr_instance_segmentation_trt.py
@@ -13,6 +13,7 @@
 from inference_models.configuration import (
     DEFAULT_DEVICE,
     INFERENCE_MODELS_RFDETR_DEFAULT_CONFIDENCE,
+    INFERENCE_MODELS_RFDETR_TRITON_PREPROC_ENABLED,
 )
 from inference_models.entities import ColorFormat, Confidence
 from inference_models.errors import (
@@ -55,7 +56,6 @@
     post_process_instance_segmentation_results_to_rle_masks,
 )
 from inference_models.models.rfdetr.pre_processing import pre_process_network_input
-from inference_models.utils.environment import get_boolean_from_env
 
 try:
     from inference_models.models.rfdetr.triton_preprocess import (
@@ -67,14 +67,10 @@
     _TRITON_AVAILABLE = False
     build_resample_tables = None
     triton_preprocess_rfdetr_stretch = None
-
-# Kill switch: set INFERENCE_MODELS_RFDETR_TRITON_PREPROC_ENABLED=false to force
-# the PIL reference path for every call, regardless of other predicates.
-_FAST_PATH_ENABLED = get_boolean_from_env(
-    "INFERENCE_MODELS_RFDETR_TRITON_PREPROC_ENABLED", default=True
-)
 from inference_models.weights_providers.entities import RecommendedParameters
 
+_FAST_PATH_ENABLED = INFERENCE_MODELS_RFDETR_TRITON_PREPROC_ENABLED
+
 try:
     import tensorrt as trt
 except ImportError as import_error:
@@ -392,9 +388,6 @@ def _try_fast_preprocess(
             return None
         if ni.normalization is None:
             return None
-        # When dataset_version_resize_dimensions is None, the prod path collapses
-        # non-stretch resize modes to a single PIL stretch as well
-        # (pre_processing.py:_needs_two_step_resize), so we accept all modes here.
         if ni.resize_mode not in (
             ResizeMode.STRETCH_TO,
             ResizeMode.LETTERBOX,

From 6441d8e924d7b02f067a1034cf13d12898fa3921 Mon Sep 17 00:00:00 2001
From: Aseem Saxena <aseem.bits@gmail.com>
Date: Tue, 2 Jun 2026 19:42:17 +0000
Subject: [PATCH 22/76] Tighten RF-DETR Triton preprocessing runtime

---
 .../rfdetr_instance_segmentation_trt.py       | 75 +++++++++++++------
 1 file changed, 53 insertions(+), 22 deletions(-)

diff --git a/inference_models/inference_models/models/rfdetr/rfdetr_instance_segmentation_trt.py b/inference_models/inference_models/models/rfdetr/rfdetr_instance_segmentation_trt.py
index 5e94590917..1561fab55b 100644
--- a/inference_models/inference_models/models/rfdetr/rfdetr_instance_segmentation_trt.py
+++ b/inference_models/inference_models/models/rfdetr/rfdetr_instance_segmentation_trt.py
@@ -61,16 +61,19 @@
     from inference_models.models.rfdetr.triton_preprocess import (
         TRITON_AVAILABLE as _TRITON_AVAILABLE,
         build_resample_tables,
-        triton_preprocess_rfdetr_stretch,
+        resolve_two_pass_launch_config,
+        triton_preprocess_rfdetr_stretch_two_pass_preallocated,
     )
 except ImportError:
     _TRITON_AVAILABLE = False
     build_resample_tables = None
-    triton_preprocess_rfdetr_stretch = None
+    resolve_two_pass_launch_config = None
+    triton_preprocess_rfdetr_stretch_two_pass_preallocated = None
 from inference_models.weights_providers.entities import RecommendedParameters
 
 _FAST_PATH_ENABLED = INFERENCE_MODELS_RFDETR_TRITON_PREPROC_ENABLED
 
+
 try:
     import tensorrt as trt
 except ImportError as import_error:
@@ -113,8 +116,11 @@ class _FastPathState:
         "target_w",
         "pinned_host",
         "src_gpu",
-        "out_buffer",
+        "out_buffers",
+        "tmp_buffers",
+        "out_buffer_index",
         "tables",
+        "launch_config",
     )
 
     def __init__(
@@ -125,8 +131,10 @@ def __init__(
         target_w: int,
         pinned_host: torch.Tensor,
         src_gpu: torch.Tensor,
-        out_buffer: torch.Tensor,
+        out_buffers: List[torch.Tensor],
+        tmp_buffers: List[torch.Tensor],
         tables,
+        launch_config,
     ) -> None:
         self.src_h = src_h
         self.src_w = src_w
@@ -134,8 +142,11 @@ def __init__(
         self.target_w = target_w
         self.pinned_host = pinned_host
         self.src_gpu = src_gpu
-        self.out_buffer = out_buffer
+        self.out_buffers = out_buffers
+        self.tmp_buffers = tmp_buffers
+        self.out_buffer_index = 0
         self.tables = tables
+        self.launch_config = launch_config
 
     @classmethod
     def build(
@@ -148,9 +159,14 @@ def build(
     ) -> "_FastPathState":
         pinned_host = torch.empty((src_h, src_w, 3), dtype=torch.uint8, pin_memory=True)
         src_gpu = torch.empty((src_h, src_w, 3), dtype=torch.uint8, device=device)
-        out_buffer = torch.empty(
-            (1, 3, target_h, target_w), dtype=torch.float32, device=device
-        )
+        out_buffers = [
+            torch.empty((1, 3, target_h, target_w), dtype=torch.float32, device=device)
+            for _ in range(3)
+        ]
+        tmp_buffers = [
+            torch.empty((3, src_h, target_w), dtype=torch.uint8, device=device)
+            for _ in range(3)
+        ]
         tables = build_resample_tables(
             src_h=src_h,
             src_w=src_w,
@@ -165,13 +181,13 @@ def build(
             target_w=target_w,
             pinned_host=pinned_host,
             src_gpu=src_gpu,
-            out_buffer=out_buffer,
+            out_buffers=out_buffers,
+            tmp_buffers=tmp_buffers,
             tables=tables,
+            launch_config=resolve_two_pass_launch_config(),
         )
 
-    def is_stale(
-        self, src_h: int, src_w: int, target_h: int, target_w: int
-    ) -> bool:
+    def is_stale(self, src_h: int, src_w: int, target_h: int, target_w: int) -> bool:
         return (
             self.src_h != src_h
             or self.src_w != src_w
@@ -179,6 +195,13 @@ def is_stale(
             or self.target_w != target_w
         )
 
+    def next_buffers(self) -> Tuple[torch.Tensor, torch.Tensor]:
+        idx = self.out_buffer_index
+        out = self.out_buffers[idx]
+        tmp = self.tmp_buffers[idx]
+        self.out_buffer_index = (idx + 1) % len(self.out_buffers)
+        return out, tmp
+
 
 class RFDetrForInstanceSegmentationTRT(
     InstanceSegmentationModel[
@@ -313,6 +336,7 @@ def __init__(
         self._trt_cuda_graph_cache = trt_cuda_graph_cache
         self._lock = threading.Lock()
         self._inference_stream = torch.cuda.Stream(device=self._device)
+        self._pre_process_cuda_stream = torch.cuda.Stream(device=self._device)
         self._thread_local_storage = threading.local()
         self.recommended_parameters = recommended_parameters
         self._fast_path_state: Optional[_FastPathState] = None
@@ -352,6 +376,7 @@ def pre_process(
                 pre_processing_overrides=pre_processing_overrides,
             )
         self._pre_process_stream.synchronize()
+        pre_processed_images._pre_processing_meta = pre_processing_meta  # type: ignore[attr-defined]
         return pre_processed_images, pre_processing_meta
 
     def _try_fast_preprocess(
@@ -443,21 +468,26 @@ def _try_fast_preprocess(
 
         pinned_np = state.pinned_host.numpy()
         np.copyto(pinned_np, candidate, casting="no")
+        out_buffer, tmp_buffer = state.next_buffers()
 
         with torch.cuda.stream(self._pre_process_stream):
             state.src_gpu.copy_(state.pinned_host, non_blocking=True)
-            triton_preprocess_rfdetr_stretch(
+            triton_preprocess_rfdetr_stretch_two_pass_preallocated(
                 src=state.src_gpu,
+                out=out_buffer,
+                tmp=tmp_buffer,
                 tables=state.tables,
                 target_h=target_h,
                 target_w=target_w,
                 means=means_t,
                 stds=stds_t,
                 swap_rb=swap_rb,
-                out=state.out_buffer,
+                launch_config=state.launch_config,
             )
-            state.out_buffer.record_stream(self._pre_process_stream)
-        self._pre_process_stream.synchronize()
+            self._fast_preproc_event = torch.cuda.Event()
+            self._fast_preproc_event.record(self._pre_process_stream)
+            out_buffer._trt_ready_event = self._fast_preproc_event  # type: ignore[attr-defined]
+            out_buffer.record_stream(self._pre_process_stream)
 
         meta = PreProcessingMetadata(
             pad_left=0,
@@ -473,7 +503,8 @@ def _try_fast_preprocess(
                 offset_x=0, offset_y=0, crop_width=orig_w, crop_height=orig_h
             ),
         )
-        return state.out_buffer, [meta]
+        out_buffer._pre_processing_meta = [meta]  # type: ignore[attr-defined]
+        return out_buffer, [meta]
 
     def forward(
         self,
@@ -482,6 +513,10 @@ def forward(
         **kwargs,
     ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
         cache = self._trt_cuda_graph_cache if not disable_cuda_graphs else None
+        preproc_event = getattr(self, "_fast_preproc_event", None)
+        if preproc_event is not None:
+            self._inference_stream.wait_event(preproc_event)
+            self._fast_preproc_event = None
         with self._lock:
             with use_cuda_context(context=self._cuda_context):
                 detections, labels, masks = infer_from_trt_engine(
@@ -549,11 +584,7 @@ def post_process(
 
     @property
     def _pre_process_stream(self) -> torch.cuda.Stream:
-        if not hasattr(self._thread_local_storage, "pre_process_stream"):
-            self._thread_local_storage.pre_process_stream = torch.cuda.Stream(
-                device=self._device
-            )
-        return self._thread_local_storage.pre_process_stream
+        return self._pre_process_cuda_stream
 
     @property
     def _post_process_stream(self) -> torch.cuda.Stream:

From c8d59a7b083bae09f06de9c2da4c33c7e7ab25c6 Mon Sep 17 00:00:00 2001
From: Aseem Saxena <aseem.bits@gmail.com>
Date: Wed, 3 Jun 2026 00:29:59 +0000
Subject: [PATCH 23/76] Tighten RF-DETR Triton preproc review coverage

---
 .../rfdetr_coco_same_shape_parity.py          |   9 +-
 .../inference_models/configuration.py         |   6 +-
 .../rfdetr_instance_segmentation_trt.py       | 115 +++++++-----
 .../models/rfdetr/test_triton_preprocess.py   | 164 ++++++++++++++++++
 .../rfdetr/test_trt_preprocess_fast_path.py   | 154 ++++++++++++++++
 5 files changed, 398 insertions(+), 50 deletions(-)
 create mode 100644 inference_models/tests/unit_tests/models/rfdetr/test_triton_preprocess.py
 create mode 100644 inference_models/tests/unit_tests/models/rfdetr/test_trt_preprocess_fast_path.py

diff --git a/development/stream_interface/rfdetr_coco_same_shape_parity.py b/development/stream_interface/rfdetr_coco_same_shape_parity.py
index 2ed30eec50..9050a7be16 100644
--- a/development/stream_interface/rfdetr_coco_same_shape_parity.py
+++ b/development/stream_interface/rfdetr_coco_same_shape_parity.py
@@ -1,8 +1,9 @@
 """Compare RF-DETR instance-segmentation outputs on same-shape COCO images.
 
 This harness is used to reproduce the correctness table in the RF-DETR Triton
-postprocess PR. It runs a baseline git ref with all RF-DETR fast paths disabled
-and a candidate ref with only Triton RLE postprocess enabled, then compares
+preprocess PR. It runs a baseline git ref with all RF-DETR fast paths disabled
+and a candidate ref with Triton RLE postprocess and Triton preprocess enabled,
+then compares
 detection counts, classes, boxes, scores, and RLE masks.
 
 Example:
@@ -10,7 +11,7 @@
     env PARITY_MODEL_PATH=/path/to/rfdetr-seg-nano-orin-trt-package \
       python development/stream_interface/rfdetr_coco_same_shape_parity.py \
         --base-ref main \
-        --candidate-ref opt-python-postproc \
+        --candidate-ref opt-preprocess \
         --height 480 \
         --width 640 \
         --image-count 1000
@@ -47,7 +48,7 @@
 }
 CANDIDATE_FLAGS_ON = {
     "INFERENCE_MODELS_RFDETR_TRITON_POSTPROC_ENABLED": "true",
-    "INFERENCE_MODELS_RFDETR_TRITON_PREPROC_ENABLED": "false",
+    "INFERENCE_MODELS_RFDETR_TRITON_PREPROC_ENABLED": "true",
     "RFDETR_PIPELINE_DEPTH": "1",
     "ENABLE_AUTO_CUDA_GRAPHS_FOR_TRT_BACKEND": "false",
     "RFDETR_NSIGHT_MARKERS": "false",
diff --git a/inference_models/inference_models/configuration.py b/inference_models/inference_models/configuration.py
index 92d292c555..5b10d0daf3 100644
--- a/inference_models/inference_models/configuration.py
+++ b/inference_models/inference_models/configuration.py
@@ -295,13 +295,9 @@
     default=DEFAULT_INFERENCE_MODELS_RFDETR_TRITON_POSTPROC_ENABLED,
 )
 DEFAULT_INFERENCE_MODELS_RFDETR_TRITON_PREPROC_ENABLED = False
-_LEGACY_USE_TRITON_FOR_PREPROCESSING = get_boolean_from_env(
-    variable_name="USE_TRITON_FOR_PREPROCESSING",
-    default=DEFAULT_INFERENCE_MODELS_RFDETR_TRITON_PREPROC_ENABLED,
-)
 INFERENCE_MODELS_RFDETR_TRITON_PREPROC_ENABLED = get_boolean_from_env(
     variable_name="INFERENCE_MODELS_RFDETR_TRITON_PREPROC_ENABLED",
-    default=_LEGACY_USE_TRITON_FOR_PREPROCESSING,
+    default=DEFAULT_INFERENCE_MODELS_RFDETR_TRITON_PREPROC_ENABLED,
 )
 INFERENCE_MODELS_ROBOFLOW_INSTANT_DEFAULT_CONFIDENCE = get_float_from_env(
     variable_name="INFERENCE_MODELS_ROBOFLOW_INSTANT_DEFAULT_CONFIDENCE",
diff --git a/inference_models/inference_models/models/rfdetr/rfdetr_instance_segmentation_trt.py b/inference_models/inference_models/models/rfdetr/rfdetr_instance_segmentation_trt.py
index 1561fab55b..2fc79309ec 100644
--- a/inference_models/inference_models/models/rfdetr/rfdetr_instance_segmentation_trt.py
+++ b/inference_models/inference_models/models/rfdetr/rfdetr_instance_segmentation_trt.py
@@ -1,4 +1,5 @@
 import threading
+import warnings
 from typing import List, Optional, Set, Tuple, Union
 
 import numpy as np
@@ -340,6 +341,7 @@ def __init__(
         self._thread_local_storage = threading.local()
         self.recommended_parameters = recommended_parameters
         self._fast_path_state: Optional[_FastPathState] = None
+        self._fast_preprocess_warned_reasons: Set[str] = set()
 
     @property
     def class_names(self) -> List[str]:
@@ -388,59 +390,25 @@ def _try_fast_preprocess(
     ) -> Optional[Tuple[torch.Tensor, List[PreProcessingMetadata]]]:
         if not _FAST_PATH_ENABLED:
             return None
-        if not _TRITON_AVAILABLE:
-            return None
-        if image_size is not None:
-            return None
-        # pre_processing_overrides can only *disable* transforms; it has no
-        # "enable" knob. The fast path never applies static_crop / grayscale /
-        # contrast regardless, so the override flags are irrelevant — we just
-        # gate on whether the image_pre_processing config itself asks for them.
-        ipp = self._inference_config.image_pre_processing
-        if (
-            (ipp.static_crop is not None and ipp.static_crop.enabled)
-            or (ipp.contrast is not None and ipp.contrast.enabled)
-            or (ipp.grayscale is not None and ipp.grayscale.enabled)
-        ):
-            return None
-
-        ni = self._inference_config.network_input
-        if ni.dataset_version_resize_dimensions is not None:
-            return None
-        if ni.input_channels != 3:
-            return None
-        if ni.scaling_factor not in (None, 255):
-            return None
-        if ni.normalization is None:
-            return None
-        if ni.resize_mode not in (
-            ResizeMode.STRETCH_TO,
-            ResizeMode.LETTERBOX,
-            ResizeMode.CENTER_CROP,
-            ResizeMode.LETTERBOX_REFLECT_EDGES,
-        ):
+        unsupported_reason = self._unsupported_fast_preprocess_reason(
+            images=images,
+            image_size=image_size,
+        )
+        if unsupported_reason is not None:
+            self._warn_unsupported_fast_preprocess(unsupported_reason)
             return None
 
         if isinstance(images, list):
-            if len(images) != 1:
-                return None
             candidate = images[0]
         else:
             candidate = images
-        if not isinstance(candidate, np.ndarray):
-            return None
-        if (
-            candidate.dtype != np.uint8
-            or candidate.ndim != 3
-            or candidate.shape[2] != 3
-        ):
-            return None
 
         caller_mode = (
             ColorMode(input_color_format)
             if input_color_format is not None
             else ColorMode.BGR
         )
+        ni = self._inference_config.network_input
         swap_rb = caller_mode != ni.color_mode
 
         means, stds = ni.normalization
@@ -506,6 +474,71 @@ def _try_fast_preprocess(
         out_buffer._pre_processing_meta = [meta]  # type: ignore[attr-defined]
         return out_buffer, [meta]
 
+    def _unsupported_fast_preprocess_reason(self, images, image_size) -> Optional[str]:
+        """Return why the Triton TRT preproc path cannot handle this request."""
+        if not _TRITON_AVAILABLE:
+            return "triton is not installed"
+        if image_size is not None:
+            return "custom image_size overrides are not supported"
+        # pre_processing_overrides can only *disable* transforms; it has no
+        # "enable" knob. The fast path never applies static_crop / grayscale /
+        # contrast regardless, so the override flags are irrelevant. The gate
+        # only needs to reject model configs that require those transforms.
+        ipp = self._inference_config.image_pre_processing
+        if (
+            (ipp.static_crop is not None and ipp.static_crop.enabled)
+            or (ipp.contrast is not None and ipp.contrast.enabled)
+            or (ipp.grayscale is not None and ipp.grayscale.enabled)
+        ):
+            return "static crop, contrast, and grayscale preprocessing are unsupported"
+
+        ni = self._inference_config.network_input
+        if ni.dataset_version_resize_dimensions is not None:
+            return "dataset-version resize is unsupported"
+        if ni.input_channels != 3:
+            return "only 3-channel inputs are supported"
+        if ni.scaling_factor not in (None, 255):
+            return "only scaling_factor None or 255 is supported"
+        if ni.normalization is None:
+            return "normalization is required"
+        if ni.resize_mode not in (
+            ResizeMode.STRETCH_TO,
+            ResizeMode.LETTERBOX,
+            ResizeMode.CENTER_CROP,
+            ResizeMode.LETTERBOX_REFLECT_EDGES,
+        ):
+            return f"resize mode {ni.resize_mode!r} is unsupported"
+
+        if isinstance(images, list):
+            if len(images) != 1:
+                return "only batch size 1 is supported"
+            candidate = images[0]
+        else:
+            candidate = images
+        if not isinstance(candidate, np.ndarray):
+            return "only numpy ndarray inputs are supported"
+        if (
+            candidate.dtype != np.uint8
+            or candidate.ndim != 3
+            or candidate.shape[2] != 3
+        ):
+            return "input must be uint8 HWC with 3 channels"
+        return None
+
+    def _warn_unsupported_fast_preprocess(self, reason: str) -> None:
+        warned_reasons = getattr(self, "_fast_preprocess_warned_reasons", None)
+        if warned_reasons is None:
+            warned_reasons = set()
+            self._fast_preprocess_warned_reasons = warned_reasons
+        if reason in warned_reasons:
+            return
+        warned_reasons.add(reason)
+        warnings.warn(
+            f"RF-DETR Triton preprocess path is unsupported: {reason}",
+            RuntimeWarning,
+            stacklevel=3,
+        )
+
     def forward(
         self,
         pre_processed_images: torch.Tensor,
diff --git a/inference_models/tests/unit_tests/models/rfdetr/test_triton_preprocess.py b/inference_models/tests/unit_tests/models/rfdetr/test_triton_preprocess.py
new file mode 100644
index 0000000000..f9064e1118
--- /dev/null
+++ b/inference_models/tests/unit_tests/models/rfdetr/test_triton_preprocess.py
@@ -0,0 +1,164 @@
+import numpy as np
+import pytest
+import torch
+import torchvision.transforms.functional as TF
+from PIL import Image
+
+from inference_models.errors import ModelInputError, ModelRuntimeError
+from inference_models.models.rfdetr import triton_preprocess
+from inference_models.models.rfdetr.triton_preprocess import (
+    build_resample_tables,
+    resolve_two_pass_launch_config,
+    triton_preprocess_rfdetr_stretch,
+)
+
+_IMAGENET_MEAN = (0.485, 0.456, 0.406)
+_IMAGENET_STD = (0.229, 0.224, 0.225)
+_PREPROC_ENV_VARS = (
+    "INFERENCE_MODELS_RFDETR_TRITON_PREPROC_BLOCK_H",
+    "INFERENCE_MODELS_RFDETR_TRITON_PREPROC_BLOCK_W",
+    "INFERENCE_MODELS_RFDETR_TRITON_PREPROC_HORIZONTAL_BLOCK_H",
+    "INFERENCE_MODELS_RFDETR_TRITON_PREPROC_HORIZONTAL_BLOCK_W",
+)
+
+
+def _reference_preprocess(image_rgb: np.ndarray, target_h: int, target_w: int):
+    resized = TF.resize(
+        Image.fromarray(image_rgb),
+        (target_h, target_w),
+        antialias=True,
+    )
+    tensor = TF.to_tensor(resized)
+    tensor = TF.normalize(
+        tensor,
+        mean=list(_IMAGENET_MEAN),
+        std=list(_IMAGENET_STD),
+    )
+    return tensor.unsqueeze(0)
+
+
+def test_build_resample_tables_shapes_on_cpu() -> None:
+    tables = build_resample_tables(
+        src_h=11,
+        src_w=13,
+        target_h=7,
+        target_w=5,
+        device=torch.device("cpu"),
+    )
+
+    assert tuple(tables.ymin_gpu.shape) == (7,)
+    assert tuple(tables.xmin_gpu.shape) == (5,)
+    assert tuple(tables.wy_gpu.shape) == (7 * tables.ksize_y,)
+    assert tuple(tables.wx_gpu.shape) == (5 * tables.ksize_x,)
+    assert tables.ymin_gpu.dtype == torch.int32
+    assert tables.wx_gpu.dtype == torch.int32
+
+
+def test_resolve_launch_config_rejects_non_power_of_two(
+    monkeypatch: pytest.MonkeyPatch,
+) -> None:
+    for env_var in _PREPROC_ENV_VARS:
+        monkeypatch.delenv(env_var, raising=False)
+    monkeypatch.setenv("INFERENCE_MODELS_RFDETR_TRITON_PREPROC_BLOCK_W", "96")
+
+    with pytest.raises(ModelRuntimeError, match="positive power of two"):
+        resolve_two_pass_launch_config()
+
+
+@pytest.mark.skipif(
+    not triton_preprocess.TRITON_AVAILABLE,
+    reason="Triton is required for runtime validation",
+)
+def test_triton_preprocess_rejects_cpu_source_tensor() -> None:
+    source = torch.zeros((8, 8, 3), dtype=torch.uint8)
+    tables = build_resample_tables(
+        src_h=8,
+        src_w=8,
+        target_h=8,
+        target_w=8,
+        device=torch.device("cpu"),
+    )
+
+    with pytest.raises(ModelInputError, match="expected CUDA src tensor"):
+        triton_preprocess_rfdetr_stretch(
+            src=source,
+            tables=tables,
+            target_h=8,
+            target_w=8,
+        )
+
+
+@pytest.mark.skipif(
+    not torch.cuda.is_available() or not triton_preprocess.TRITON_AVAILABLE,
+    reason="CUDA and Triton are required",
+)
+def test_triton_preprocess_matches_pil_for_rgb_numpy() -> None:
+    rng = np.random.default_rng(seed=17)
+    image_rgb = rng.integers(0, 256, size=(77, 51, 3), dtype=np.uint8)
+    target_h, target_w = 48, 64
+    device = torch.device("cuda")
+    tables = build_resample_tables(
+        src_h=image_rgb.shape[0],
+        src_w=image_rgb.shape[1],
+        target_h=target_h,
+        target_w=target_w,
+        device=device,
+    )
+
+    actual = triton_preprocess_rfdetr_stretch(
+        src=torch.from_numpy(image_rgb).to(device=device),
+        tables=tables,
+        target_h=target_h,
+        target_w=target_w,
+        means=_IMAGENET_MEAN,
+        stds=_IMAGENET_STD,
+        swap_rb=False,
+    )
+    torch.cuda.synchronize()
+
+    expected = _reference_preprocess(image_rgb, target_h=target_h, target_w=target_w)
+    torch.testing.assert_close(actual.cpu(), expected, atol=1e-6, rtol=0)
+
+
+@pytest.mark.skipif(
+    not torch.cuda.is_available() or not triton_preprocess.TRITON_AVAILABLE,
+    reason="CUDA and Triton are required",
+)
+def test_triton_preprocess_matches_pil_for_bgr_numpy_with_preallocated_buffers() -> (
+    None
+):
+    rng = np.random.default_rng(seed=23)
+    image_rgb = rng.integers(0, 256, size=(63, 85, 3), dtype=np.uint8)
+    image_bgr = image_rgb[:, :, ::-1].copy()
+    target_h, target_w = 64, 64
+    device = torch.device("cuda")
+    tables = build_resample_tables(
+        src_h=image_bgr.shape[0],
+        src_w=image_bgr.shape[1],
+        target_h=target_h,
+        target_w=target_w,
+        device=device,
+    )
+    out = torch.empty((1, 3, target_h, target_w), dtype=torch.float32, device=device)
+    tmp = torch.empty(
+        (3, image_bgr.shape[0], target_w),
+        dtype=torch.uint8,
+        device=device,
+    )
+
+    actual = triton_preprocess_rfdetr_stretch(
+        src=torch.from_numpy(image_bgr).to(device=device),
+        tables=tables,
+        target_h=target_h,
+        target_w=target_w,
+        means=_IMAGENET_MEAN,
+        stds=_IMAGENET_STD,
+        swap_rb=True,
+        out=out,
+        tmp=tmp,
+    )
+    torch.cuda.synchronize()
+
+    expected = _reference_preprocess(image_rgb, target_h=target_h, target_w=target_w)
+    assert actual.data_ptr() == out.data_ptr()
+    torch.testing.assert_close(actual.cpu(), expected, atol=1e-6, rtol=0)
diff --git a/inference_models/tests/unit_tests/models/rfdetr/test_trt_preprocess_fast_path.py b/inference_models/tests/unit_tests/models/rfdetr/test_trt_preprocess_fast_path.py
new file mode 100644
index 0000000000..d47050e569
--- /dev/null
+++ b/inference_models/tests/unit_tests/models/rfdetr/test_trt_preprocess_fast_path.py
@@ -0,0 +1,154 @@
+import warnings
+from types import SimpleNamespace
+
+import numpy as np
+import pytest
+import torch
+import torchvision.transforms.functional as TF
+from PIL import Image
+
+from inference_models.entities import ImageDimensions
+from inference_models.models.common.roboflow.model_packages import (
+    ColorMode,
+    ImagePreProcessing,
+    NetworkInputDefinition,
+    ResizeMode,
+    TrainingInputSize,
+)
+from inference_models.models.rfdetr import triton_preprocess
+
+pytest.importorskip("tensorrt")
+pytest.importorskip("pycuda.driver")
+
+from inference_models.models.rfdetr import (
+    rfdetr_instance_segmentation_trt,
+)  # noqa: E402
+
+_IMAGENET_MEAN = (0.485, 0.456, 0.406)
+_IMAGENET_STD = (0.229, 0.224, 0.225)
+
+
+def _network_input(
+    target_h: int = 64,
+    target_w: int = 64,
+) -> NetworkInputDefinition:
+    return NetworkInputDefinition(
+        training_input_size=TrainingInputSize(height=target_h, width=target_w),
+        dataset_version_resize_dimensions=None,
+        dynamic_spatial_size_supported=False,
+        color_mode=ColorMode.RGB,
+        resize_mode=ResizeMode.STRETCH_TO,
+        input_channels=3,
+        scaling_factor=255,
+        normalization=[list(_IMAGENET_MEAN), list(_IMAGENET_STD)],
+    )
+
+
+def _adapter_for_fast_preprocess(network_input: NetworkInputDefinition):
+    model = object.__new__(
+        rfdetr_instance_segmentation_trt.RFDetrForInstanceSegmentationTRT
+    )
+    model._inference_config = SimpleNamespace(
+        image_pre_processing=ImagePreProcessing(),
+        network_input=network_input,
+    )
+    model._device = torch.device("cuda")
+    model._pre_process_cuda_stream = torch.cuda.Stream(device=model._device)
+    model._fast_path_state = None
+    model._fast_preprocess_warned_reasons = set()
+    return model
+
+
+def _reference_preprocess(image_rgb: np.ndarray, target_h: int, target_w: int):
+    resized = TF.resize(
+        Image.fromarray(image_rgb),
+        (target_h, target_w),
+        antialias=True,
+    )
+    tensor = TF.to_tensor(resized)
+    tensor = TF.normalize(
+        tensor,
+        mean=list(_IMAGENET_MEAN),
+        std=list(_IMAGENET_STD),
+    )
+    return tensor.unsqueeze(0)
+
+
+def test_trt_fast_preprocess_warns_once_for_unsupported_batch(
+    monkeypatch: pytest.MonkeyPatch,
+) -> None:
+    monkeypatch.setattr(rfdetr_instance_segmentation_trt, "_FAST_PATH_ENABLED", True)
+    monkeypatch.setattr(rfdetr_instance_segmentation_trt, "_TRITON_AVAILABLE", True)
+    model = object.__new__(
+        rfdetr_instance_segmentation_trt.RFDetrForInstanceSegmentationTRT
+    )
+    model._inference_config = SimpleNamespace(
+        image_pre_processing=ImagePreProcessing(),
+        network_input=_network_input(),
+    )
+    model._fast_preprocess_warned_reasons = set()
+    image = np.zeros((8, 8, 3), dtype=np.uint8)
+
+    with pytest.warns(RuntimeWarning, match="only batch size 1 is supported"):
+        assert (
+            model._try_fast_preprocess(
+                images=[image, image],
+                input_color_format="bgr",
+                image_size=None,
+                pre_processing_overrides=None,
+            )
+            is None
+        )
+
+    with warnings.catch_warnings(record=True) as recorded:
+        warnings.simplefilter("always")
+        assert (
+            model._try_fast_preprocess(
+                images=[image, image],
+                input_color_format="bgr",
+                image_size=None,
+                pre_processing_overrides=None,
+            )
+            is None
+        )
+    assert recorded == []
+
+
+@pytest.mark.skipif(
+    not torch.cuda.is_available() or not triton_preprocess.TRITON_AVAILABLE,
+    reason="CUDA and Triton are required",
+)
+def test_trt_fast_preprocess_matches_reference_and_metadata(
+    monkeypatch: pytest.MonkeyPatch,
+) -> None:
+    monkeypatch.setattr(rfdetr_instance_segmentation_trt, "_FAST_PATH_ENABLED", True)
+    monkeypatch.setattr(rfdetr_instance_segmentation_trt, "_TRITON_AVAILABLE", True)
+    target_h, target_w = 64, 64
+    model = _adapter_for_fast_preprocess(
+        network_input=_network_input(target_h=target_h, target_w=target_w),
+    )
+    rng = np.random.default_rng(seed=71)
+    image_rgb = rng.integers(0, 256, size=(96, 80, 3), dtype=np.uint8)
+    image_bgr = image_rgb[:, :, ::-1].copy()
+
+    actual, metadata = model._try_fast_preprocess(
+        images=image_bgr,
+        input_color_format="bgr",
+        image_size=None,
+        pre_processing_overrides=None,
+    )
+    actual._trt_ready_event.synchronize()  # type: ignore[attr-defined]
+
+    expected = _reference_preprocess(image_rgb, target_h=target_h, target_w=target_w)
+    torch.testing.assert_close(actual.cpu(), expected, atol=1e-6, rtol=0)
+
+    assert metadata[0].original_size == ImageDimensions(height=96, width=80)
+    assert metadata[0].size_after_pre_processing == ImageDimensions(
+        height=96,
+        width=80,
+    )
+    assert metadata[0].inference_size == ImageDimensions(
+        height=target_h,
+        width=target_w,
+    )
+    assert actual._pre_processing_meta == metadata  # type: ignore[attr-defined]

From a923560494ec341b594a64f7b53aef26557a6093 Mon Sep 17 00:00:00 2001
From: Aseem Saxena <aseem.bits@gmail.com>
Date: Wed, 3 Jun 2026 00:39:40 +0000
Subject: [PATCH 24/76] Add RF-DETR Triton preproc integration coverage

---
 inference_models/docs/changelog.md            |  3 +
 .../models/test_rfdetr_seg_predictions_trt.py | 91 +++++++++++++++++++
 2 files changed, 94 insertions(+)

diff --git a/inference_models/docs/changelog.md b/inference_models/docs/changelog.md
index 5166f497ea..1473faafad 100644
--- a/inference_models/docs/changelog.md
+++ b/inference_models/docs/changelog.md
@@ -8,6 +8,9 @@
   `INFERENCE_MODELS_RFDETR_TRITON_POSTPROC_ENABLED=True` to generate COCO RLE
   masks directly from sparse interpolated mask regions on supported CUDA
   inputs.
+- Opt-in Triton RF-DETR instance-segmentation preprocessing for the TensorRT
+  backend. Set `INFERENCE_MODELS_RFDETR_TRITON_PREPROC_ENABLED=True` to run the
+  supported resize and normalize path on CUDA.
 
 ---
 
diff --git a/inference_models/tests/integration_tests/models/test_rfdetr_seg_predictions_trt.py b/inference_models/tests/integration_tests/models/test_rfdetr_seg_predictions_trt.py
index 269db95f72..4e1986d64e 100644
--- a/inference_models/tests/integration_tests/models/test_rfdetr_seg_predictions_trt.py
+++ b/inference_models/tests/integration_tests/models/test_rfdetr_seg_predictions_trt.py
@@ -1,10 +1,44 @@
+import os
+
 import numpy as np
 import pytest
 import torch
 
+from inference_models.errors import CorruptedModelPackageError
 from inference_models.models.common.rle_utils import coco_rle_masks_to_torch_mask
 
 
+def _assert_instance_segmentation_predictions_match(actual, expected) -> None:
+    assert len(actual) == len(expected)
+    for actual_element, expected_element in zip(actual, expected):
+        torch.testing.assert_close(
+            actual_element.xyxy.cpu(),
+            expected_element.xyxy.cpu(),
+            atol=1.0,
+            rtol=0,
+        )
+        torch.testing.assert_close(
+            actual_element.confidence.cpu(),
+            expected_element.confidence.cpu(),
+            atol=1e-4,
+            rtol=0,
+        )
+        torch.testing.assert_close(
+            actual_element.class_id.cpu(),
+            expected_element.class_id.cpu(),
+            atol=0,
+            rtol=0,
+        )
+
+        actual_mask = actual_element.mask.detach().to(torch.bool).cpu()
+        expected_mask = expected_element.mask.detach().to(torch.bool).cpu()
+        assert tuple(actual_mask.shape) == tuple(expected_mask.shape)
+        intersection = torch.logical_and(actual_mask, expected_mask).sum().item()
+        union = torch.logical_or(actual_mask, expected_mask).sum().item()
+        assert union > 0
+        assert intersection / union >= 0.999
+
+
 @pytest.mark.slow
 @pytest.mark.trt_extras
 def test_trt_package_numpy(
@@ -430,6 +464,63 @@ def test_trt_package_torch_batch(
     assert 16179 <= predictions[1].mask.cpu().sum().item() <= 16229
 
 
+@pytest.mark.slow
+@pytest.mark.trt_extras
+def test_trt_triton_preprocess_output_matches_reference_preprocess(
+    monkeypatch: pytest.MonkeyPatch,
+    rfdetr_seg_asl_trt_package: str,
+    asl_image_numpy: np.ndarray,
+) -> None:
+    pytest.importorskip("triton")
+    if not torch.cuda.is_available():
+        pytest.skip("CUDA is required for Triton preprocessing parity")
+
+    from inference_models.models.rfdetr import rfdetr_instance_segmentation_trt
+    from inference_models.models.rfdetr.rfdetr_instance_segmentation_trt import (
+        RFDetrForInstanceSegmentationTRT,
+    )
+
+    model_package = os.getenv(
+        "RFDETR_SEG_TRT_PACKAGE_PATH",
+        rfdetr_seg_asl_trt_package,
+    )
+    try:
+        model = RFDetrForInstanceSegmentationTRT.from_pretrained(
+            model_name_or_path=model_package,
+            engine_host_code_allowed=True,
+        )
+    except CorruptedModelPackageError as error:
+        if "Platform specific tag mismatch" in str(error):
+            pytest.skip("TRT engine package is not compatible with this platform")
+        raise
+
+    monkeypatch.setattr(rfdetr_instance_segmentation_trt, "_FAST_PATH_ENABLED", False)
+    reference_predictions = model(asl_image_numpy)
+
+    original_triton_preprocess = (
+        rfdetr_instance_segmentation_trt.triton_preprocess_rfdetr_stretch_two_pass_preallocated
+    )
+    triton_calls = {"count": 0}
+
+    def counting_triton_preprocess(*args, **kwargs):
+        triton_calls["count"] += 1
+        return original_triton_preprocess(*args, **kwargs)
+
+    monkeypatch.setattr(
+        rfdetr_instance_segmentation_trt,
+        "triton_preprocess_rfdetr_stretch_two_pass_preallocated",
+        counting_triton_preprocess,
+    )
+    monkeypatch.setattr(rfdetr_instance_segmentation_trt, "_FAST_PATH_ENABLED", True)
+    triton_predictions = model(asl_image_numpy)
+
+    assert triton_calls["count"] == 1
+    _assert_instance_segmentation_predictions_match(
+        actual=triton_predictions,
+        expected=reference_predictions,
+    )
+
+
 @pytest.mark.slow
 @pytest.mark.trt_extras
 def test_trt_cudagraph_output_matches_non_cudagraph_output(

From 6bfbbaa5de3c79019df3203f32be686a78a019cb Mon Sep 17 00:00:00 2001
From: Aseem Saxena <aseem.bits@gmail.com>
Date: Wed, 3 Jun 2026 00:52:48 +0000
Subject: [PATCH 25/76] Clarify RF-DETR Triton preproc runtime

---
 .../rfdetr_instance_segmentation_trt.py       | 292 +-----------
 .../models/rfdetr/triton_preprocess.py        | 185 +++++++-
 .../rfdetr/triton_preprocess_runtime.py       | 416 ++++++++++++++++++
 .../models/test_rfdetr_seg_predictions_trt.py |  10 +-
 .../rfdetr/test_trt_preprocess_fast_path.py   |  77 ++--
 5 files changed, 635 insertions(+), 345 deletions(-)
 create mode 100644 inference_models/inference_models/models/rfdetr/triton_preprocess_runtime.py

diff --git a/inference_models/inference_models/models/rfdetr/rfdetr_instance_segmentation_trt.py b/inference_models/inference_models/models/rfdetr/rfdetr_instance_segmentation_trt.py
index 2fc79309ec..1e570e3f9f 100644
--- a/inference_models/inference_models/models/rfdetr/rfdetr_instance_segmentation_trt.py
+++ b/inference_models/inference_models/models/rfdetr/rfdetr_instance_segmentation_trt.py
@@ -1,5 +1,4 @@
 import threading
-import warnings
 from typing import List, Optional, Set, Tuple, Union
 
 import numpy as np
@@ -14,7 +13,6 @@
 from inference_models.configuration import (
     DEFAULT_DEVICE,
     INFERENCE_MODELS_RFDETR_DEFAULT_CONFIDENCE,
-    INFERENCE_MODELS_RFDETR_TRITON_PREPROC_ENABLED,
 )
 from inference_models.entities import ColorFormat, Confidence
 from inference_models.errors import (
@@ -28,13 +26,10 @@
     use_primary_cuda_context,
 )
 from inference_models.models.common.model_packages import get_model_package_contents
-from inference_models.entities import ImageDimensions
 from inference_models.models.common.roboflow.model_packages import (
-    ColorMode,
     InferenceConfig,
     PreProcessingMetadata,
     ResizeMode,
-    StaticCropOffset,
     TRTConfig,
     parse_class_names_file,
     parse_inference_config,
@@ -57,24 +52,11 @@
     post_process_instance_segmentation_results_to_rle_masks,
 )
 from inference_models.models.rfdetr.pre_processing import pre_process_network_input
-
-try:
-    from inference_models.models.rfdetr.triton_preprocess import (
-        TRITON_AVAILABLE as _TRITON_AVAILABLE,
-        build_resample_tables,
-        resolve_two_pass_launch_config,
-        triton_preprocess_rfdetr_stretch_two_pass_preallocated,
-    )
-except ImportError:
-    _TRITON_AVAILABLE = False
-    build_resample_tables = None
-    resolve_two_pass_launch_config = None
-    triton_preprocess_rfdetr_stretch_two_pass_preallocated = None
+from inference_models.models.rfdetr.triton_preprocess_runtime import (
+    FastPreprocessRuntime,
+)
 from inference_models.weights_providers.entities import RecommendedParameters
 
-_FAST_PATH_ENABLED = INFERENCE_MODELS_RFDETR_TRITON_PREPROC_ENABLED
-
-
 try:
     import tensorrt as trt
 except ImportError as import_error:
@@ -106,104 +88,6 @@
     ) from import_error
 
 
-class _FastPathState:
-    """Per-(src_shape, target_shape) cache of GPU buffers + resample tables
-    that the Triton fast path reuses across frames."""
-
-    __slots__ = (
-        "src_h",
-        "src_w",
-        "target_h",
-        "target_w",
-        "pinned_host",
-        "src_gpu",
-        "out_buffers",
-        "tmp_buffers",
-        "out_buffer_index",
-        "tables",
-        "launch_config",
-    )
-
-    def __init__(
-        self,
-        src_h: int,
-        src_w: int,
-        target_h: int,
-        target_w: int,
-        pinned_host: torch.Tensor,
-        src_gpu: torch.Tensor,
-        out_buffers: List[torch.Tensor],
-        tmp_buffers: List[torch.Tensor],
-        tables,
-        launch_config,
-    ) -> None:
-        self.src_h = src_h
-        self.src_w = src_w
-        self.target_h = target_h
-        self.target_w = target_w
-        self.pinned_host = pinned_host
-        self.src_gpu = src_gpu
-        self.out_buffers = out_buffers
-        self.tmp_buffers = tmp_buffers
-        self.out_buffer_index = 0
-        self.tables = tables
-        self.launch_config = launch_config
-
-    @classmethod
-    def build(
-        cls,
-        src_h: int,
-        src_w: int,
-        target_h: int,
-        target_w: int,
-        device: torch.device,
-    ) -> "_FastPathState":
-        pinned_host = torch.empty((src_h, src_w, 3), dtype=torch.uint8, pin_memory=True)
-        src_gpu = torch.empty((src_h, src_w, 3), dtype=torch.uint8, device=device)
-        out_buffers = [
-            torch.empty((1, 3, target_h, target_w), dtype=torch.float32, device=device)
-            for _ in range(3)
-        ]
-        tmp_buffers = [
-            torch.empty((3, src_h, target_w), dtype=torch.uint8, device=device)
-            for _ in range(3)
-        ]
-        tables = build_resample_tables(
-            src_h=src_h,
-            src_w=src_w,
-            target_h=target_h,
-            target_w=target_w,
-            device=device,
-        )
-        return cls(
-            src_h=src_h,
-            src_w=src_w,
-            target_h=target_h,
-            target_w=target_w,
-            pinned_host=pinned_host,
-            src_gpu=src_gpu,
-            out_buffers=out_buffers,
-            tmp_buffers=tmp_buffers,
-            tables=tables,
-            launch_config=resolve_two_pass_launch_config(),
-        )
-
-    def is_stale(self, src_h: int, src_w: int, target_h: int, target_w: int) -> bool:
-        return (
-            self.src_h != src_h
-            or self.src_w != src_w
-            or self.target_h != target_h
-            or self.target_w != target_w
-        )
-
-    def next_buffers(self) -> Tuple[torch.Tensor, torch.Tensor]:
-        idx = self.out_buffer_index
-        out = self.out_buffers[idx]
-        tmp = self.tmp_buffers[idx]
-        self.out_buffer_index = (idx + 1) % len(self.out_buffers)
-        return out, tmp
-
-
 class RFDetrForInstanceSegmentationTRT(
     InstanceSegmentationModel[
         torch.Tensor,
@@ -340,8 +224,7 @@ def __init__(
         self._pre_process_cuda_stream = torch.cuda.Stream(device=self._device)
         self._thread_local_storage = threading.local()
         self.recommended_parameters = recommended_parameters
-        self._fast_path_state: Optional[_FastPathState] = None
-        self._fast_preprocess_warned_reasons: Set[str] = set()
+        self._fast_preprocess_runtime = FastPreprocessRuntime(device=self._device)
 
     @property
     def class_names(self) -> List[str]:
@@ -359,14 +242,17 @@ def pre_process(
         pre_processing_overrides: Optional[PreProcessingOverrides] = None,
         **kwargs,
     ) -> Tuple[torch.Tensor, List[PreProcessingMetadata]]:
-        fast = self._try_fast_preprocess(
+        fast = self._fast_preprocess_runtime.try_preprocess(
             images=images,
             input_color_format=input_color_format,
             image_size=image_size,
-            pre_processing_overrides=pre_processing_overrides,
+            image_pre_processing=self._inference_config.image_pre_processing,
+            network_input=self._inference_config.network_input,
+            stream=self._pre_process_stream,
         )
         if fast is not None:
-            return fast
+            self._fast_preproc_event = fast.ready_event
+            return fast.tensor, fast.metadata
         with torch.cuda.stream(self._pre_process_stream):
             pre_processed_images, pre_processing_meta = pre_process_network_input(
                 images=images,
@@ -381,164 +267,6 @@ def pre_process(
         pre_processed_images._pre_processing_meta = pre_processing_meta  # type: ignore[attr-defined]
         return pre_processed_images, pre_processing_meta
 
-    def _try_fast_preprocess(
-        self,
-        images,
-        input_color_format,
-        image_size,
-        pre_processing_overrides,
-    ) -> Optional[Tuple[torch.Tensor, List[PreProcessingMetadata]]]:
-        if not _FAST_PATH_ENABLED:
-            return None
-        unsupported_reason = self._unsupported_fast_preprocess_reason(
-            images=images,
-            image_size=image_size,
-        )
-        if unsupported_reason is not None:
-            self._warn_unsupported_fast_preprocess(unsupported_reason)
-            return None
-
-        if isinstance(images, list):
-            candidate = images[0]
-        else:
-            candidate = images
-
-        caller_mode = (
-            ColorMode(input_color_format)
-            if input_color_format is not None
-            else ColorMode.BGR
-        )
-        ni = self._inference_config.network_input
-        swap_rb = caller_mode != ni.color_mode
-
-        means, stds = ni.normalization
-        means_t = (float(means[0]), float(means[1]), float(means[2]))
-        stds_t = (float(stds[0]), float(stds[1]), float(stds[2]))
-        target_h = ni.training_input_size.height
-        target_w = ni.training_input_size.width
-        orig_h, orig_w = int(candidate.shape[0]), int(candidate.shape[1])
-
-        state = self._fast_path_state
-        if state is None or state.is_stale(
-            src_h=orig_h,
-            src_w=orig_w,
-            target_h=target_h,
-            target_w=target_w,
-        ):
-            state = _FastPathState.build(
-                src_h=orig_h,
-                src_w=orig_w,
-                target_h=target_h,
-                target_w=target_w,
-                device=self._device,
-            )
-            self._fast_path_state = state
-
-        pinned_np = state.pinned_host.numpy()
-        np.copyto(pinned_np, candidate, casting="no")
-        out_buffer, tmp_buffer = state.next_buffers()
-
-        with torch.cuda.stream(self._pre_process_stream):
-            state.src_gpu.copy_(state.pinned_host, non_blocking=True)
-            triton_preprocess_rfdetr_stretch_two_pass_preallocated(
-                src=state.src_gpu,
-                out=out_buffer,
-                tmp=tmp_buffer,
-                tables=state.tables,
-                target_h=target_h,
-                target_w=target_w,
-                means=means_t,
-                stds=stds_t,
-                swap_rb=swap_rb,
-                launch_config=state.launch_config,
-            )
-            self._fast_preproc_event = torch.cuda.Event()
-            self._fast_preproc_event.record(self._pre_process_stream)
-            out_buffer._trt_ready_event = self._fast_preproc_event  # type: ignore[attr-defined]
-            out_buffer.record_stream(self._pre_process_stream)
-
-        meta = PreProcessingMetadata(
-            pad_left=0,
-            pad_top=0,
-            pad_right=0,
-            pad_bottom=0,
-            original_size=ImageDimensions(width=orig_w, height=orig_h),
-            size_after_pre_processing=ImageDimensions(width=orig_w, height=orig_h),
-            inference_size=ImageDimensions(width=target_w, height=target_h),
-            scale_width=target_w / orig_w,
-            scale_height=target_h / orig_h,
-            static_crop_offset=StaticCropOffset(
-                offset_x=0, offset_y=0, crop_width=orig_w, crop_height=orig_h
-            ),
-        )
-        out_buffer._pre_processing_meta = [meta]  # type: ignore[attr-defined]
-        return out_buffer, [meta]
-
-    def _unsupported_fast_preprocess_reason(self, images, image_size) -> Optional[str]:
-        """Return why the Triton TRT preproc path cannot handle this request."""
-        if not _TRITON_AVAILABLE:
-            return "triton is not installed"
-        if image_size is not None:
-            return "custom image_size overrides are not supported"
-        # pre_processing_overrides can only *disable* transforms; it has no
-        # "enable" knob. The fast path never applies static_crop / grayscale /
-        # contrast regardless, so the override flags are irrelevant. The gate
-        # only needs to reject model configs that require those transforms.
-        ipp = self._inference_config.image_pre_processing
-        if (
-            (ipp.static_crop is not None and ipp.static_crop.enabled)
-            or (ipp.contrast is not None and ipp.contrast.enabled)
-            or (ipp.grayscale is not None and ipp.grayscale.enabled)
-        ):
-            return "static crop, contrast, and grayscale preprocessing are unsupported"
-
-        ni = self._inference_config.network_input
-        if ni.dataset_version_resize_dimensions is not None:
-            return "dataset-version resize is unsupported"
-        if ni.input_channels != 3:
-            return "only 3-channel inputs are supported"
-        if ni.scaling_factor not in (None, 255):
-            return "only scaling_factor None or 255 is supported"
-        if ni.normalization is None:
-            return "normalization is required"
-        if ni.resize_mode not in (
-            ResizeMode.STRETCH_TO,
-            ResizeMode.LETTERBOX,
-            ResizeMode.CENTER_CROP,
-            ResizeMode.LETTERBOX_REFLECT_EDGES,
-        ):
-            return f"resize mode {ni.resize_mode!r} is unsupported"
-
-        if isinstance(images, list):
-            if len(images) != 1:
-                return "only batch size 1 is supported"
-            candidate = images[0]
-        else:
-            candidate = images
-        if not isinstance(candidate, np.ndarray):
-            return "only numpy ndarray inputs are supported"
-        if (
-            candidate.dtype != np.uint8
-            or candidate.ndim != 3
-            or candidate.shape[2] != 3
-        ):
-            return "input must be uint8 HWC with 3 channels"
-        return None
-
-    def _warn_unsupported_fast_preprocess(self, reason: str) -> None:
-        warned_reasons = getattr(self, "_fast_preprocess_warned_reasons", None)
-        if warned_reasons is None:
-            warned_reasons = set()
-            self._fast_preprocess_warned_reasons = warned_reasons
-        if reason in warned_reasons:
-            return
-        warned_reasons.add(reason)
-        warnings.warn(
-            f"RF-DETR Triton preprocess path is unsupported: {reason}",
-            RuntimeWarning,
-            stacklevel=3,
-        )
-
     def forward(
         self,
         pre_processed_images: torch.Tensor,
diff --git a/inference_models/inference_models/models/rfdetr/triton_preprocess.py b/inference_models/inference_models/models/rfdetr/triton_preprocess.py
index b032ccf80a..a3d88d0a69 100644
--- a/inference_models/inference_models/models/rfdetr/triton_preprocess.py
+++ b/inference_models/inference_models/models/rfdetr/triton_preprocess.py
@@ -22,6 +22,19 @@
 The runtime implementation is the consolidated two-pass path:
 horizontal PIL-antialias resize into a uint8 CHW scratch buffer, followed by
 the vertical pass plus `/255` + ImageNet normalization into fp32 CHW output.
+
+Tensor contracts:
+
+* ``src`` is a CUDA uint8 HWC image with shape ``(raw_h, raw_w, 3)``. The
+  hot TRT path currently passes a full frame with no static crop, but the
+  kernels also accept crop offsets and logical crop dimensions.
+* ``tmp`` is a CUDA uint8 CHW scratch tensor with shape
+  ``(3, src_h, target_w)``. It stores the horizontally resized image after
+  the same fixed-point rounding PIL applies between its separable passes.
+* ``out`` is a CUDA fp32 NCHW tensor with shape ``(1, 3, target_h, target_w)``
+  in network channel order. Each element is ``(uint8 / 255 - mean) / std``.
+* ``ResampleTables`` owns the per-axis int32 fixed-point start/weight tables
+  that PIL would precompute for this source/target shape pair.
 """
 
 from __future__ import annotations
@@ -62,6 +75,13 @@
 
 
 def _read_power_of_two_env(name: str, default: int) -> int:
+    """Read an optional Triton block-size override from the environment.
+
+    The preprocess kernels use power-of-two block sizes so Triton can form
+    static tensor shapes for vectorized loads/stores. This helper keeps those
+    launch-shape constraints local to the kernel wrapper and raises a
+    user-facing ``ModelRuntimeError`` for invalid values.
+    """
     raw = os.getenv(name)
     if raw is None or raw.strip() == "":
         return default
@@ -88,7 +108,20 @@ def _read_power_of_two_env(name: str, default: int) -> int:
 def _bilinear_antialias_weights_1d_int(
     in_size: int, out_size: int
 ) -> Tuple[np.ndarray, np.ndarray, int]:
-    """PIL's precompute_coeffs, int32 fixed-point form."""
+    """Build one axis of PIL-compatible bilinear-antialias tables.
+
+    Args:
+        in_size: Number of pixels along the source axis after static cropping.
+        out_size: Number of pixels along the resized output axis.
+
+    Returns:
+        ``(starts, weights_int, ksize)`` where ``starts`` has shape
+        ``(out_size,)`` and gives the first source sample for each output
+        coordinate, ``weights_int`` has shape ``(out_size, ksize)`` and stores
+        PIL's normalized triangle weights in ``PRECISION_BITS`` fixed-point
+        format, and ``ksize`` is the compile-time convolution width used by the
+        Triton loop for that axis.
+    """
     scale = in_size / out_size
     filterscale = max(1.0, scale)
     support = filterscale
@@ -148,7 +181,33 @@ def horizontal_resize_uint8_all_channels_kernel(
         BLOCK_H: tl.constexpr,
         BLOCK_W: tl.constexpr,
     ):
-        """Horizontal PIL-antialias pass for all output channels."""
+        """Compute PIL's horizontal resize pass for one tile.
+
+        Args:
+            src_ptr: CUDA uint8 HWC source image, shape ``(raw_h, raw_w, 3)``.
+            tmp_ptr: CUDA uint8 CHW scratch output, shape
+                ``(3, src_h, target_w)``.
+            xmin_ptr: CUDA int32 starts table, shape ``(target_w,)``.
+            wx_ptr: CUDA int32 flattened weights table, shape
+                ``(target_w * KSIZE_X,)``.
+            src_h/src_w: Logical source height/width after crop. These drive
+                bounds checks for the resized region.
+            src_stride_h/src_stride_w: Source strides in elements, used so the
+                kernel does not assume contiguous row pitch beyond HWC layout.
+            crop_offset_y/crop_offset_x: Offset into ``src_ptr`` for static
+                crop support. The TRT fast path passes zero.
+            target_w: Width of the resized network input.
+            CH_R/CH_G/CH_B: Source channel indices to emit into network
+                channel order. For BGR input feeding an RGB model this is
+                ``2, 1, 0``.
+
+        Output:
+            Writes the horizontally resized and PIL-rounded uint8 values into
+            ``tmp_ptr`` in CHW order. The vertical kernel consumes this scratch
+            buffer as its input image.
+        """
+        # Program ids tile over logical source rows and target columns. The
+        # y-axis is still source height because this pass only resizes width.
         pid_y = tl.program_id(0)
         pid_x = tl.program_id(1)
 
@@ -158,6 +217,8 @@ def horizontal_resize_uint8_all_channels_kernel(
         mask_x = offs_x < target_w
         mask_out = mask_y[:, None] & mask_x[None, :]
 
+        # For each output x, PIL precomputes the first contributing source x
+        # and a fixed-width row of int32 fixed-point triangle weights.
         xmin = tl.load(xmin_ptr + offs_x, mask=mask_x, other=0)
         sy = offs_y + crop_offset_y
 
@@ -169,14 +230,18 @@ def horizontal_resize_uint8_all_channels_kernel(
             sx_c = tl.maximum(tl.minimum(sx, src_w - 1), 0) + crop_offset_x
             wx = tl.load(wx_ptr + offs_x * KSIZE_X + kx, mask=mask_x, other=0)
             base = sy[:, None] * src_stride_h + sx_c[None, :] * src_stride_w
+            # Load source pixels in the network's channel order so the channel
+            # swap replaces the original PIL image conversion step.
             p_r = tl.load(src_ptr + base + CH_R, mask=mask_out, other=0).to(tl.int32)
             p_g = tl.load(src_ptr + base + CH_G, mask=mask_out, other=0).to(tl.int32)
             p_b = tl.load(src_ptr + base + CH_B, mask=mask_out, other=0).to(tl.int32)
             wx_2d = wx[None, :]
+            # Fixed-point horizontal convolution: sum(src * PIL_weight_int).
             hacc_r += p_r * wx_2d
             hacc_g += p_g * wx_2d
             hacc_b += p_b * wx_2d
 
+        # Match PIL's intermediate uint8 rounding before the vertical pass.
         q_r = (hacc_r + HALF_C) >> PRECISION_BITS_C
         q_g = (hacc_g + HALF_C) >> PRECISION_BITS_C
         q_b = (hacc_b + HALF_C) >> PRECISION_BITS_C
@@ -186,6 +251,8 @@ def horizontal_resize_uint8_all_channels_kernel(
 
         out_row = offs_y[:, None] * target_w + offs_x[None, :]
         channel_stride = src_h * target_w
+        # CHW scratch keeps the following vertical pass contiguous along x for
+        # one output channel at a time.
         tl.store(tmp_ptr + 0 * channel_stride + out_row, q_r, mask=mask_out)
         tl.store(tmp_ptr + 1 * channel_stride + out_row, q_g, mask=mask_out)
         tl.store(tmp_ptr + 2 * channel_stride + out_row, q_b, mask=mask_out)
@@ -213,7 +280,29 @@ def vertical_normalize_from_horizontal_kernel(
         BLOCK_H: tl.constexpr,
         BLOCK_W: tl.constexpr,
     ):
-        """Vertical PIL-antialias pass from uint8 scratch plus normalization."""
+        """Compute PIL's vertical resize pass and torchvision normalization.
+
+        Args:
+            tmp_ptr: CUDA uint8 CHW horizontal scratch, shape
+                ``(3, src_h, target_w)``.
+            dst_ptr: CUDA fp32 NCHW output, shape
+                ``(1, 3, target_h, target_w)``.
+            ymin_ptr: CUDA int32 starts table, shape ``(target_h,)``.
+            wy_ptr: CUDA int32 flattened weights table, shape
+                ``(target_h * KSIZE_Y,)``.
+            src_h: Logical source height after crop and after the horizontal
+                pass. This is the height of ``tmp_ptr``.
+            dst_stride_c/dst_stride_h: Output strides in elements.
+            target_h/target_w: Resized network input shape.
+            inv_std_255_*: Precomputed ``1 / (255 * std[channel])`` values.
+            offset_*: Precomputed ``-mean[channel] / std[channel]`` values.
+
+        Output:
+            Writes normalized fp32 NCHW data into ``dst_ptr``. This is the
+            tensor consumed directly by TensorRT.
+        """
+        # Program ids tile over output rows, output columns, and the three
+        # output channels. Channel-specific normalization is selected by pid_c.
         pid_y = tl.program_id(0)
         pid_x = tl.program_id(1)
         pid_c = tl.program_id(2)
@@ -224,6 +313,8 @@ def vertical_normalize_from_horizontal_kernel(
         mask_x = offs_x < target_w
         mask_out = mask_y[:, None] & mask_x[None, :]
 
+        # For each output y, load the first source row and PIL fixed-point
+        # weights for the vertical half of the separable resize.
         ymin = tl.load(ymin_ptr + offs_y, mask=mask_y, other=0)
 
         vacc = tl.zeros((BLOCK_H, BLOCK_W), dtype=tl.int32)
@@ -235,11 +326,15 @@ def vertical_normalize_from_horizontal_kernel(
             p = tl.load(
                 tmp_ptr + pid_c * src_h * target_w + base, mask=mask_out, other=0
             ).to(tl.int32)
+            # Fixed-point vertical convolution over the horizontally rounded
+            # scratch buffer, matching PIL's second resample pass.
             vacc += p * wy[:, None]
 
+        # Final PIL uint8 rounding/clamping before torchvision's to_tensor.
         q = (vacc + HALF_C) >> PRECISION_BITS_C
         q = tl.minimum(tl.maximum(q, 0), 255)
 
+        # Fuse TF.to_tensor() (`q / 255`) and TF.normalize().
         inv_std_255 = tl.where(
             pid_c == 0,
             inv_std_255_r,
@@ -257,7 +352,23 @@ def vertical_normalize_from_horizontal_kernel(
 
 
 class ResampleTables:
-    """Cache of per-axis PIL-int32 weight tables for one (src, dst) pair."""
+    """CUDA cache of PIL fixed-point resize tables for one shape pair.
+
+    Attributes:
+        ymin_gpu: int32 tensor with shape ``(target_h,)``. ``ymin_gpu[y]`` is
+            the first source row contributing to output row ``y``.
+        xmin_gpu: int32 tensor with shape ``(target_w,)``. ``xmin_gpu[x]`` is
+            the first source column contributing to output column ``x``.
+        wy_gpu: int32 flattened tensor with shape ``(target_h * ksize_y,)``.
+            Row ``y`` contains the fixed-point vertical weights for output row
+            ``y``.
+        wx_gpu: int32 flattened tensor with shape ``(target_w * ksize_x,)``.
+            Row ``x`` contains the fixed-point horizontal weights for output
+            column ``x``.
+        ksize_y/ksize_x: Static loop bounds for the vertical and horizontal
+            Triton kernels. They are determined by PIL's antialias support
+            radius for the current source/target scale.
+    """
 
     __slots__ = (
         "ymin_gpu",
@@ -286,6 +397,14 @@ def __init__(
 
 
 def resolve_two_pass_launch_config() -> Tuple[int, int, int, int]:
+    """Resolve block sizes for the two-pass Triton implementation.
+
+    Returns:
+        ``(vertical_block_h, vertical_block_w, horizontal_block_h,
+        horizontal_block_w)``. Defaults are tuned for the RF-DETR TRT workload,
+        while environment variables allow microbenchmark sweeps without code
+        changes.
+    """
     return (
         _read_power_of_two_env(_PREPROC_BLOCK_H_ENV, 1),
         _read_power_of_two_env(_PREPROC_BLOCK_W_ENV, 128),
@@ -301,6 +420,18 @@ def build_resample_tables(
     target_w: int,
     device: torch.device,
 ) -> ResampleTables:
+    """Build and upload PIL-compatible resample tables for one resize.
+
+    Args:
+        src_h/src_w: Effective source image dimensions after optional crop.
+        target_h/target_w: Network input dimensions after resize.
+        device: CUDA device where the Triton kernels will run.
+
+    Returns:
+        ``ResampleTables`` with all starts/weights already copied to ``device``.
+        The hot TRT path keeps this object in a shape-keyed cache so table
+        construction is not repeated per frame.
+    """
     ymin, wy, ksize_y = _bilinear_antialias_weights_1d_int(src_h, target_h)
     xmin, wx, ksize_x = _bilinear_antialias_weights_1d_int(src_w, target_w)
     return ResampleTables(
@@ -329,7 +460,34 @@ def triton_preprocess_rfdetr_stretch_two_pass_preallocated(
     crop_h: Optional[int] = None,
     crop_w: Optional[int] = None,
 ) -> torch.Tensor:
-    """Hot two-pass launch path for already validated/preallocated tensors."""
+    """Launch the fast two-pass preprocessor using caller-owned buffers.
+
+    This is the hot path used by the TensorRT adapter. It intentionally assumes
+    the caller already validated shapes, dtypes, device placement, and table
+    compatibility so each frame only pays for the HtoD copy and two Triton
+    kernel launches.
+
+    Args:
+        src: CUDA uint8 HWC source tensor, shape ``(raw_h, raw_w, 3)``.
+        out: CUDA fp32 NCHW output tensor, shape
+            ``(1, 3, target_h, target_w)``.
+        tmp: CUDA uint8 CHW scratch tensor, shape ``(3, src_h, target_w)``.
+        tables: ``ResampleTables`` built for ``(src_h, src_w)`` to
+            ``(target_h, target_w)``.
+        target_h/target_w: Network input dimensions.
+        means/stds: Per-channel normalization constants in output channel
+            order.
+        swap_rb: Whether to swap source red/blue channels while writing
+            network-order output channels.
+        launch_config: Block sizes returned by ``resolve_two_pass_launch_config``.
+        crop_offset_y/crop_offset_x: Optional top-left crop offset in ``src``.
+        crop_h/crop_w: Optional logical source shape after crop. When omitted,
+            the full source tensor shape is used.
+
+    Returns:
+        The same ``out`` tensor after scheduling both kernels on the current
+        CUDA stream.
+    """
     raw_src_h, raw_src_w = int(src.shape[0]), int(src.shape[1])
     src_h = crop_h if crop_h is not None else raw_src_h
     src_w = crop_w if crop_w is not None else raw_src_w
@@ -351,6 +509,8 @@ def triton_preprocess_rfdetr_stretch_two_pass_preallocated(
     offset_b = -means[2] / stds[2]
     block_h, block_w, horizontal_block_h, horizontal_block_w = launch_config
 
+    # First reproduce PIL's horizontal resize into uint8 scratch. This is the
+    # only pass that reads the raw HWC frame.
     horizontal_grid = (
         (src_h + horizontal_block_h - 1) // horizontal_block_h,
         (target_w + horizontal_block_w - 1) // horizontal_block_w,
@@ -376,6 +536,8 @@ def triton_preprocess_rfdetr_stretch_two_pass_preallocated(
         BLOCK_H=horizontal_block_h,
         BLOCK_W=horizontal_block_w,
     )
+    # Then reproduce PIL's vertical resize and fuse the torchvision tensor
+    # conversion and normalization into the final fp32 TensorRT input.
     grid = (
         (target_h + block_h - 1) // block_h,
         (target_w + block_w - 1) // block_w,
@@ -423,9 +585,9 @@ def triton_preprocess_rfdetr_stretch(
     """PIL-exact resize + color swap + normalize using the two-pass kernels.
 
     Args:
-        src: uint8 CUDA tensor, shape (H, W, 3), HWC layout.
+        src: uint8 CUDA tensor, shape ``(raw_h, raw_w, 3)``, HWC layout.
         tables: precomputed int32 resample tables sized against the *cropped*
-            source `(crop_h, crop_w)` → `(target_h, target_w)`.
+            source ``(crop_h, crop_w)`` to ``(target_h, target_w)``.
         target_h, target_w: output spatial dims.
         means, stds: normalization in output channel order (R, G, B for
             network_input.color_mode == 'rgb').
@@ -434,12 +596,13 @@ def triton_preprocess_rfdetr_stretch(
             means no crop.
         crop_h/_w: effective source dims after crop. Defaults to src dims
             when no crop is configured.
-        out: optional preallocated fp32 (1, 3, H, W) CUDA tensor.
-        tmp: optional preallocated uint8 (3, crop_h/raw_h, target_w) CUDA tensor
-            used by the horizontal pass.
+        out: optional preallocated fp32 ``(1, 3, target_h, target_w)`` CUDA
+            tensor.
+        tmp: optional preallocated uint8 ``(3, crop_h/raw_h, target_w)`` CUDA
+            tensor used by the horizontal pass.
 
     Returns:
-        fp32 (1, 3, target_h, target_w) on the same device as `src`.
+        fp32 ``(1, 3, target_h, target_w)`` on the same device as ``src``.
     """
     if not TRITON_AVAILABLE:
         raise MissingDependencyError(
diff --git a/inference_models/inference_models/models/rfdetr/triton_preprocess_runtime.py b/inference_models/inference_models/models/rfdetr/triton_preprocess_runtime.py
new file mode 100644
index 0000000000..7e72c28cad
--- /dev/null
+++ b/inference_models/inference_models/models/rfdetr/triton_preprocess_runtime.py
@@ -0,0 +1,416 @@
+"""Runtime glue for RF-DETR TensorRT Triton preprocessing.
+
+This module owns the pieces that are specific to running the Triton
+preprocessor inside the TensorRT RF-DETR model adapter: fast-path eligibility,
+warning throttling, reusable CUDA buffers, and CUDA event handoff to the TRT
+inference stream. The numerical resize/normalize kernels live in
+``triton_preprocess.py``; this file only decides when and how to launch them.
+"""
+
+from __future__ import annotations
+
+import warnings
+from dataclasses import dataclass
+from typing import List, Optional, Tuple
+
+import numpy as np
+import torch
+
+from inference_models.configuration import (
+    INFERENCE_MODELS_RFDETR_TRITON_PREPROC_ENABLED,
+)
+from inference_models.entities import ColorFormat, ImageDimensions
+from inference_models.models.common.roboflow.model_packages import (
+    ColorMode,
+    ImagePreProcessing,
+    NetworkInputDefinition,
+    PreProcessingMetadata,
+    ResizeMode,
+    StaticCropOffset,
+)
+
+try:
+    from inference_models.models.rfdetr.triton_preprocess import (
+        TRITON_AVAILABLE as _TRITON_AVAILABLE,
+        ResampleTables,
+        build_resample_tables,
+        resolve_two_pass_launch_config,
+        triton_preprocess_rfdetr_stretch_two_pass_preallocated,
+    )
+except ImportError:  # pragma: no cover - import-time dependency guard
+    _TRITON_AVAILABLE = False
+    ResampleTables = None
+    build_resample_tables = None
+    resolve_two_pass_launch_config = None
+    triton_preprocess_rfdetr_stretch_two_pass_preallocated = None
+
+
+_FAST_PATH_ENABLED = INFERENCE_MODELS_RFDETR_TRITON_PREPROC_ENABLED
+_BUFFER_RING_SIZE = 3
+
+
+@dataclass(frozen=True)
+class FastPreprocessResult:
+    """Result returned after the Triton preprocessing work is enqueued.
+
+    Attributes:
+        tensor: CUDA fp32 tensor with shape ``(1, 3, target_h, target_w)`` in
+            network color order and normalized with the model's mean/std.
+        metadata: Single-item preprocessing metadata list matching the reference
+            RF-DETR TRT preprocessing contract.
+        ready_event: Event recorded on the preprocessing stream after the HtoD
+            copy and both Triton kernels. The TensorRT stream must wait on this
+            event before consuming ``tensor``.
+    """
+
+    tensor: torch.Tensor
+    metadata: List[PreProcessingMetadata]
+    ready_event: torch.cuda.Event
+
+
+class FastPreprocessState:
+    """Reusable buffers for one source shape and one network input shape.
+
+    The state is keyed by ``(src_h, src_w, target_h, target_w)`` because those
+    dimensions define both the PIL fixed-point resample tables and every buffer
+    size used by the Triton kernels.
+
+    Attributes:
+        pinned_host: Pinned CPU HWC uint8 staging tensor. The incoming numpy
+            image is copied here first so the following host-to-device copy can
+            be submitted as ``non_blocking=True`` on the preprocessing stream.
+        src_gpu: CUDA HWC uint8 tensor consumed by the horizontal Triton kernel.
+        out_buffers: Ring of CUDA fp32 ``(1, 3, target_h, target_w)`` outputs.
+            The returned tensor can still be owned by TensorRT or response
+            finalization while Python prepares the next frame, so the ring avoids
+            overwriting an output that downstream work may still read.
+        tmp_buffers: Matching ring of CUDA uint8 ``(3, src_h, target_w)``
+            horizontal-resize scratch buffers. They are paired with output
+            buffers so each in-flight preprocessing submission has independent
+            scratch storage until the vertical kernel finishes.
+        tables: CUDA int32 PIL-compatible resample tables. ``xmin``/``wx`` are
+            used by the horizontal pass and ``ymin``/``wy`` by the vertical pass.
+        launch_config: Tuned block sizes for the two Triton kernels, resolved
+            once from env vars when the shape-specific state is built.
+    """
+
+    __slots__ = (
+        "src_h",
+        "src_w",
+        "target_h",
+        "target_w",
+        "pinned_host",
+        "src_gpu",
+        "out_buffers",
+        "tmp_buffers",
+        "out_buffer_index",
+        "tables",
+        "launch_config",
+    )
+
+    def __init__(
+        self,
+        src_h: int,
+        src_w: int,
+        target_h: int,
+        target_w: int,
+        pinned_host: torch.Tensor,
+        src_gpu: torch.Tensor,
+        out_buffers: List[torch.Tensor],
+        tmp_buffers: List[torch.Tensor],
+        tables: ResampleTables,
+        launch_config: Tuple[int, int, int, int],
+    ) -> None:
+        self.src_h = src_h
+        self.src_w = src_w
+        self.target_h = target_h
+        self.target_w = target_w
+        self.pinned_host = pinned_host
+        self.src_gpu = src_gpu
+        self.out_buffers = out_buffers
+        self.tmp_buffers = tmp_buffers
+        self.out_buffer_index = 0
+        self.tables = tables
+        self.launch_config = launch_config
+
+    @classmethod
+    def build(
+        cls,
+        src_h: int,
+        src_w: int,
+        target_h: int,
+        target_w: int,
+        device: torch.device,
+    ) -> "FastPreprocessState":
+        """Allocate shape-specific buffers and build GPU resample tables."""
+        pinned_host = torch.empty((src_h, src_w, 3), dtype=torch.uint8, pin_memory=True)
+        src_gpu = torch.empty((src_h, src_w, 3), dtype=torch.uint8, device=device)
+        out_buffers = [
+            torch.empty((1, 3, target_h, target_w), dtype=torch.float32, device=device)
+            for _ in range(_BUFFER_RING_SIZE)
+        ]
+        tmp_buffers = [
+            torch.empty((3, src_h, target_w), dtype=torch.uint8, device=device)
+            for _ in range(_BUFFER_RING_SIZE)
+        ]
+        tables = build_resample_tables(
+            src_h=src_h,
+            src_w=src_w,
+            target_h=target_h,
+            target_w=target_w,
+            device=device,
+        )
+        return cls(
+            src_h=src_h,
+            src_w=src_w,
+            target_h=target_h,
+            target_w=target_w,
+            pinned_host=pinned_host,
+            src_gpu=src_gpu,
+            out_buffers=out_buffers,
+            tmp_buffers=tmp_buffers,
+            tables=tables,
+            launch_config=resolve_two_pass_launch_config(),
+        )
+
+    def is_stale(self, src_h: int, src_w: int, target_h: int, target_w: int) -> bool:
+        """Return true when image/network dimensions no longer match state."""
+        return (
+            self.src_h != src_h
+            or self.src_w != src_w
+            or self.target_h != target_h
+            or self.target_w != target_w
+        )
+
+    def next_buffers(self) -> Tuple[torch.Tensor, torch.Tensor]:
+        """Return the next output/scratch pair from the ring."""
+        idx = self.out_buffer_index
+        out = self.out_buffers[idx]
+        tmp = self.tmp_buffers[idx]
+        self.out_buffer_index = (idx + 1) % len(self.out_buffers)
+        return out, tmp
+
+
+class FastPreprocessRuntime:
+    """Eligibility, launch, and CUDA handoff manager for Triton preprocessing.
+
+    ``RFDetrForInstanceSegmentationTRT`` owns one runtime instance for the life
+    of the model adapter. The runtime keeps all mutable fast-path state here so
+    the adapter only has to call ``try_preprocess(...)`` and, when a result is
+    returned, make the TensorRT stream wait on ``result.ready_event``.
+
+    Stream lifetime:
+        The caller supplies the preprocessing CUDA stream for each launch. This
+        runtime records the returned event on that stream after the host-to-
+        device copy and both Triton kernels. The output tensor also records the
+        stream so PyTorch's caching allocator cannot reuse its storage before
+        the asynchronous preprocessing work has finished.
+    """
+
+    def __init__(self, device: torch.device) -> None:
+        self._device = device
+        self._state: Optional[FastPreprocessState] = None
+        self._warned_reasons: set[str] = set()
+
+    def try_preprocess(
+        self,
+        images,
+        input_color_format: Optional[ColorFormat],
+        image_size: Optional[Tuple[int, int]],
+        image_pre_processing: ImagePreProcessing,
+        network_input: NetworkInputDefinition,
+        stream: torch.cuda.Stream,
+    ) -> Optional[FastPreprocessResult]:
+        """Enqueue Triton preprocessing when the request is supported.
+
+        Args:
+            images: Single uint8 HWC numpy image, or a single-item list
+                containing one. Batch sizes greater than one use the reference
+                preprocessing path.
+            input_color_format: Caller-provided image color order. ``None`` is
+                treated as BGR, matching the model adapter default.
+            image_size: Per-call size override. Overrides are rejected because
+                the fast path is keyed to the model's configured network input.
+            image_pre_processing: Model package preprocessing config used for
+                fast-path eligibility checks.
+            network_input: Model package network-input config. Its resize mode,
+                color mode, normalization, and target size define the kernel
+                contract.
+            stream: CUDA stream where the HtoD copy and Triton kernels are
+                enqueued. It must be a real stream when the request is eligible.
+
+        Returns:
+            ``FastPreprocessResult`` after the GPU work has been scheduled, or
+            ``None`` when the opt-in flag is disabled or the request falls
+            outside the conservative fast-path contract. In that case the caller
+            should run the reference preprocessing path.
+        """
+        if not _FAST_PATH_ENABLED:
+            return None
+        unsupported_reason = self._unsupported_reason(
+            images=images,
+            image_size=image_size,
+            image_pre_processing=image_pre_processing,
+            network_input=network_input,
+        )
+        if unsupported_reason is not None:
+            self._warn_unsupported(unsupported_reason)
+            return None
+
+        candidate = images[0] if isinstance(images, list) else images
+        caller_mode = (
+            ColorMode(input_color_format)
+            if input_color_format is not None
+            else ColorMode.BGR
+        )
+        swap_rb = caller_mode != network_input.color_mode
+
+        means, stds = network_input.normalization
+        means_t = (float(means[0]), float(means[1]), float(means[2]))
+        stds_t = (float(stds[0]), float(stds[1]), float(stds[2]))
+        target_h = network_input.training_input_size.height
+        target_w = network_input.training_input_size.width
+        orig_h, orig_w = int(candidate.shape[0]), int(candidate.shape[1])
+
+        state = self._state
+        if state is None or state.is_stale(
+            src_h=orig_h,
+            src_w=orig_w,
+            target_h=target_h,
+            target_w=target_w,
+        ):
+            state = FastPreprocessState.build(
+                src_h=orig_h,
+                src_w=orig_w,
+                target_h=target_h,
+                target_w=target_w,
+                device=self._device,
+            )
+            self._state = state
+
+        np.copyto(state.pinned_host.numpy(), candidate, casting="no")
+        out_buffer, tmp_buffer = state.next_buffers()
+
+        with torch.cuda.stream(stream):
+            state.src_gpu.copy_(state.pinned_host, non_blocking=True)
+            triton_preprocess_rfdetr_stretch_two_pass_preallocated(
+                src=state.src_gpu,
+                out=out_buffer,
+                tmp=tmp_buffer,
+                tables=state.tables,
+                target_h=target_h,
+                target_w=target_w,
+                means=means_t,
+                stds=stds_t,
+                swap_rb=swap_rb,
+                launch_config=state.launch_config,
+            )
+            ready_event = torch.cuda.Event()
+            ready_event.record(stream)
+            out_buffer._trt_ready_event = ready_event  # type: ignore[attr-defined]
+            out_buffer.record_stream(stream)
+
+        metadata = [
+            PreProcessingMetadata(
+                pad_left=0,
+                pad_top=0,
+                pad_right=0,
+                pad_bottom=0,
+                original_size=ImageDimensions(width=orig_w, height=orig_h),
+                size_after_pre_processing=ImageDimensions(
+                    width=orig_w,
+                    height=orig_h,
+                ),
+                inference_size=ImageDimensions(width=target_w, height=target_h),
+                scale_width=target_w / orig_w,
+                scale_height=target_h / orig_h,
+                static_crop_offset=StaticCropOffset(
+                    offset_x=0,
+                    offset_y=0,
+                    crop_width=orig_w,
+                    crop_height=orig_h,
+                ),
+            )
+        ]
+        out_buffer._pre_processing_meta = metadata  # type: ignore[attr-defined]
+        return FastPreprocessResult(
+            tensor=out_buffer,
+            metadata=metadata,
+            ready_event=ready_event,
+        )
+
+    def _unsupported_reason(
+        self,
+        images,
+        image_size: Optional[Tuple[int, int]],
+        image_pre_processing: ImagePreProcessing,
+        network_input: NetworkInputDefinition,
+    ) -> Optional[str]:
+        """Explain why the request must use the reference preprocessing path."""
+        if not _TRITON_AVAILABLE:
+            return "triton is not installed"
+        if self._device.type != "cuda":
+            return "CUDA device is required"
+        if image_size is not None:
+            return "custom image_size overrides are not supported"
+
+        # Overrides can only disable configured transforms; they cannot enable
+        # transforms. The fast path deliberately rejects model configs that ask
+        # for transforms whose pixel semantics are not implemented in Triton.
+        if (
+            (
+                image_pre_processing.static_crop is not None
+                and image_pre_processing.static_crop.enabled
+            )
+            or (
+                image_pre_processing.contrast is not None
+                and image_pre_processing.contrast.enabled
+            )
+            or (
+                image_pre_processing.grayscale is not None
+                and image_pre_processing.grayscale.enabled
+            )
+        ):
+            return "static crop, contrast, and grayscale preprocessing are unsupported"
+
+        if network_input.dataset_version_resize_dimensions is not None:
+            return "dataset-version resize is unsupported"
+        if network_input.input_channels != 3:
+            return "only 3-channel inputs are supported"
+        if network_input.scaling_factor not in (None, 255):
+            return "only scaling_factor None or 255 is supported"
+        if network_input.normalization is None:
+            return "normalization is required"
+        if network_input.resize_mode not in (
+            ResizeMode.STRETCH_TO,
+            ResizeMode.LETTERBOX,
+            ResizeMode.CENTER_CROP,
+            ResizeMode.LETTERBOX_REFLECT_EDGES,
+        ):
+            return f"resize mode {network_input.resize_mode!r} is unsupported"
+
+        if isinstance(images, list):
+            if len(images) != 1:
+                return "only batch size 1 is supported"
+            candidate = images[0]
+        else:
+            candidate = images
+        if not isinstance(candidate, np.ndarray):
+            return "only numpy ndarray inputs are supported"
+        if (
+            candidate.dtype != np.uint8
+            or candidate.ndim != 3
+            or candidate.shape[2] != 3
+        ):
+            return "input must be uint8 HWC with 3 channels"
+        return None
+
+    def _warn_unsupported(self, reason: str) -> None:
+        if reason in self._warned_reasons:
+            return
+        self._warned_reasons.add(reason)
+        warnings.warn(
+            f"RF-DETR Triton preprocess path is unsupported: {reason}",
+            RuntimeWarning,
+            stacklevel=4,
+        )
diff --git a/inference_models/tests/integration_tests/models/test_rfdetr_seg_predictions_trt.py b/inference_models/tests/integration_tests/models/test_rfdetr_seg_predictions_trt.py
index 4e1986d64e..18c7b1f063 100644
--- a/inference_models/tests/integration_tests/models/test_rfdetr_seg_predictions_trt.py
+++ b/inference_models/tests/integration_tests/models/test_rfdetr_seg_predictions_trt.py
@@ -475,7 +475,7 @@ def test_trt_triton_preprocess_output_matches_reference_preprocess(
     if not torch.cuda.is_available():
         pytest.skip("CUDA is required for Triton preprocessing parity")
 
-    from inference_models.models.rfdetr import rfdetr_instance_segmentation_trt
+    from inference_models.models.rfdetr import triton_preprocess_runtime
     from inference_models.models.rfdetr.rfdetr_instance_segmentation_trt import (
         RFDetrForInstanceSegmentationTRT,
     )
@@ -494,11 +494,11 @@ def test_trt_triton_preprocess_output_matches_reference_preprocess(
             pytest.skip("TRT engine package is not compatible with this platform")
         raise
 
-    monkeypatch.setattr(rfdetr_instance_segmentation_trt, "_FAST_PATH_ENABLED", False)
+    monkeypatch.setattr(triton_preprocess_runtime, "_FAST_PATH_ENABLED", False)
     reference_predictions = model(asl_image_numpy)
 
     original_triton_preprocess = (
-        rfdetr_instance_segmentation_trt.triton_preprocess_rfdetr_stretch_two_pass_preallocated
+        triton_preprocess_runtime.triton_preprocess_rfdetr_stretch_two_pass_preallocated
     )
     triton_calls = {"count": 0}
 
@@ -507,11 +507,11 @@ def counting_triton_preprocess(*args, **kwargs):
         return original_triton_preprocess(*args, **kwargs)
 
     monkeypatch.setattr(
-        rfdetr_instance_segmentation_trt,
+        triton_preprocess_runtime,
         "triton_preprocess_rfdetr_stretch_two_pass_preallocated",
         counting_triton_preprocess,
     )
-    monkeypatch.setattr(rfdetr_instance_segmentation_trt, "_FAST_PATH_ENABLED", True)
+    monkeypatch.setattr(triton_preprocess_runtime, "_FAST_PATH_ENABLED", True)
     triton_predictions = model(asl_image_numpy)
 
     assert triton_calls["count"] == 1
diff --git a/inference_models/tests/unit_tests/models/rfdetr/test_trt_preprocess_fast_path.py b/inference_models/tests/unit_tests/models/rfdetr/test_trt_preprocess_fast_path.py
index d47050e569..a33f530cf7 100644
--- a/inference_models/tests/unit_tests/models/rfdetr/test_trt_preprocess_fast_path.py
+++ b/inference_models/tests/unit_tests/models/rfdetr/test_trt_preprocess_fast_path.py
@@ -1,5 +1,4 @@
 import warnings
-from types import SimpleNamespace
 
 import numpy as np
 import pytest
@@ -15,14 +14,13 @@
     ResizeMode,
     TrainingInputSize,
 )
-from inference_models.models.rfdetr import triton_preprocess
-
-pytest.importorskip("tensorrt")
-pytest.importorskip("pycuda.driver")
-
 from inference_models.models.rfdetr import (
-    rfdetr_instance_segmentation_trt,
-)  # noqa: E402
+    triton_preprocess,
+    triton_preprocess_runtime,
+)
+from inference_models.models.rfdetr.triton_preprocess_runtime import (
+    FastPreprocessRuntime,
+)
 
 _IMAGENET_MEAN = (0.485, 0.456, 0.406)
 _IMAGENET_STD = (0.229, 0.224, 0.225)
@@ -44,21 +42,6 @@ def _network_input(
     )
 
 
-def _adapter_for_fast_preprocess(network_input: NetworkInputDefinition):
-    model = object.__new__(
-        rfdetr_instance_segmentation_trt.RFDetrForInstanceSegmentationTRT
-    )
-    model._inference_config = SimpleNamespace(
-        image_pre_processing=ImagePreProcessing(),
-        network_input=network_input,
-    )
-    model._device = torch.device("cuda")
-    model._pre_process_cuda_stream = torch.cuda.Stream(device=model._device)
-    model._fast_path_state = None
-    model._fast_preprocess_warned_reasons = set()
-    return model
-
-
 def _reference_preprocess(image_rgb: np.ndarray, target_h: int, target_w: int):
     resized = TF.resize(
         Image.fromarray(image_rgb),
@@ -77,25 +60,20 @@ def _reference_preprocess(image_rgb: np.ndarray, target_h: int, target_w: int):
 def test_trt_fast_preprocess_warns_once_for_unsupported_batch(
     monkeypatch: pytest.MonkeyPatch,
 ) -> None:
-    monkeypatch.setattr(rfdetr_instance_segmentation_trt, "_FAST_PATH_ENABLED", True)
-    monkeypatch.setattr(rfdetr_instance_segmentation_trt, "_TRITON_AVAILABLE", True)
-    model = object.__new__(
-        rfdetr_instance_segmentation_trt.RFDetrForInstanceSegmentationTRT
-    )
-    model._inference_config = SimpleNamespace(
-        image_pre_processing=ImagePreProcessing(),
-        network_input=_network_input(),
-    )
-    model._fast_preprocess_warned_reasons = set()
+    monkeypatch.setattr(triton_preprocess_runtime, "_FAST_PATH_ENABLED", True)
+    monkeypatch.setattr(triton_preprocess_runtime, "_TRITON_AVAILABLE", True)
+    runtime = FastPreprocessRuntime(device=torch.device("cuda"))
     image = np.zeros((8, 8, 3), dtype=np.uint8)
 
     with pytest.warns(RuntimeWarning, match="only batch size 1 is supported"):
         assert (
-            model._try_fast_preprocess(
+            runtime.try_preprocess(
                 images=[image, image],
                 input_color_format="bgr",
                 image_size=None,
-                pre_processing_overrides=None,
+                image_pre_processing=ImagePreProcessing(),
+                network_input=_network_input(),
+                stream=None,
             )
             is None
         )
@@ -103,11 +81,13 @@ def test_trt_fast_preprocess_warns_once_for_unsupported_batch(
     with warnings.catch_warnings(record=True) as recorded:
         warnings.simplefilter("always")
         assert (
-            model._try_fast_preprocess(
+            runtime.try_preprocess(
                 images=[image, image],
                 input_color_format="bgr",
                 image_size=None,
-                pre_processing_overrides=None,
+                image_pre_processing=ImagePreProcessing(),
+                network_input=_network_input(),
+                stream=None,
             )
             is None
         )
@@ -121,27 +101,30 @@ def test_trt_fast_preprocess_warns_once_for_unsupported_batch(
 def test_trt_fast_preprocess_matches_reference_and_metadata(
     monkeypatch: pytest.MonkeyPatch,
 ) -> None:
-    monkeypatch.setattr(rfdetr_instance_segmentation_trt, "_FAST_PATH_ENABLED", True)
-    monkeypatch.setattr(rfdetr_instance_segmentation_trt, "_TRITON_AVAILABLE", True)
+    monkeypatch.setattr(triton_preprocess_runtime, "_FAST_PATH_ENABLED", True)
+    monkeypatch.setattr(triton_preprocess_runtime, "_TRITON_AVAILABLE", True)
     target_h, target_w = 64, 64
-    model = _adapter_for_fast_preprocess(
-        network_input=_network_input(target_h=target_h, target_w=target_w),
-    )
+    runtime = FastPreprocessRuntime(device=torch.device("cuda"))
+    stream = torch.cuda.Stream(device=torch.device("cuda"))
     rng = np.random.default_rng(seed=71)
     image_rgb = rng.integers(0, 256, size=(96, 80, 3), dtype=np.uint8)
     image_bgr = image_rgb[:, :, ::-1].copy()
 
-    actual, metadata = model._try_fast_preprocess(
+    result = runtime.try_preprocess(
         images=image_bgr,
         input_color_format="bgr",
         image_size=None,
-        pre_processing_overrides=None,
+        image_pre_processing=ImagePreProcessing(),
+        network_input=_network_input(target_h=target_h, target_w=target_w),
+        stream=stream,
     )
-    actual._trt_ready_event.synchronize()  # type: ignore[attr-defined]
+    assert result is not None
+    result.ready_event.synchronize()
 
     expected = _reference_preprocess(image_rgb, target_h=target_h, target_w=target_w)
-    torch.testing.assert_close(actual.cpu(), expected, atol=1e-6, rtol=0)
+    torch.testing.assert_close(result.tensor.cpu(), expected, atol=1e-6, rtol=0)
 
+    metadata = result.metadata
     assert metadata[0].original_size == ImageDimensions(height=96, width=80)
     assert metadata[0].size_after_pre_processing == ImageDimensions(
         height=96,
@@ -151,4 +134,4 @@ def test_trt_fast_preprocess_matches_reference_and_metadata(
         height=target_h,
         width=target_w,
     )
-    assert actual._pre_processing_meta == metadata  # type: ignore[attr-defined]
+    assert result.tensor._pre_processing_meta == metadata  # type: ignore[attr-defined]

From ab811f2ba303034dc38b34a1a721549e059124a8 Mon Sep 17 00:00:00 2001
From: Aseem Saxena <aseem.bits@gmail.com>
Date: Wed, 3 Jun 2026 01:04:23 +0000
Subject: [PATCH 26/76] Tighten RF-DETR Triton preproc review coverage

---
 .../rfdetr_preprocess_microbenchmark.py       | 180 +++-------------
 .../rfdetr/test_trt_preprocess_fast_path.py   | 202 +++++++++++++++++-
 2 files changed, 232 insertions(+), 150 deletions(-)

diff --git a/development/stream_interface/rfdetr_preprocess_microbenchmark.py b/development/stream_interface/rfdetr_preprocess_microbenchmark.py
index b8397bd852..d06a1a9263 100644
--- a/development/stream_interface/rfdetr_preprocess_microbenchmark.py
+++ b/development/stream_interface/rfdetr_preprocess_microbenchmark.py
@@ -12,9 +12,12 @@
     python development/stream_interface/rfdetr_preprocess_microbenchmark.py \
         --mode replay --cases-dir temp/rfdetr_preprocess_cases
 
-The TRT RF-DETR model has a Triton fast path that bypasses this function. Capture
-mode forces ``INFERENCE_MODELS_RFDETR_TRITON_PREPROC_ENABLED=false`` before
-loading the workflow so the reference preprocessing function is exercised.
+The TRT RF-DETR model has a Triton fast path that bypasses this function.
+Capture mode forces ``INFERENCE_MODELS_RFDETR_TRITON_PREPROC_ENABLED=false``
+before loading the workflow so the reference preprocessing function is
+exercised. Triton replay uses the same ``FastPreprocessRuntime`` helper as the
+TRT adapter rather than duplicating eligibility, buffer, and metadata logic in
+this harness.
 """
 
 import argparse
@@ -32,7 +35,6 @@
 import numpy as np
 import torch
 
-
 _REPO_ROOT = Path(__file__).resolve().parents[2]
 _INFERENCE_MODELS_ROOT = _REPO_ROOT / "inference_models"
 _WORKFLOW_PATH = (
@@ -41,7 +43,7 @@
 _TARGET_FUNCTION = "pre_process_network_input"
 _SCHEMA_VERSION = 1
 _FORCED_PREPROC_ENV = "INFERENCE_MODELS_RFDETR_TRITON_PREPROC_ENABLED"
-_TRITON_REPLAY_STATE: Dict[Tuple[str, int, int, int, int], Any] = {}
+_TRITON_REPLAY_RUNTIMES: Dict[str, Any] = {}
 
 
 def _ensure_local_import_paths() -> None:
@@ -341,159 +343,47 @@ def _materialize_inputs(case: dict, device_override: str) -> dict:
     }
 
 
-def _uses_enabled(config: Optional[Any]) -> bool:
-    return bool(config is not None and config.enabled)
-
-
 def _run_triton_fast_preprocess(inputs: dict) -> Tuple[torch.Tensor, List[Any]]:
-    from inference_models.entities import ImageDimensions
-    from inference_models.models.common.roboflow.model_packages import (
-        ColorMode,
-        PreProcessingMetadata,
-        ResizeMode,
-        StaticCropOffset,
-    )
-    from inference_models.models.rfdetr.triton_preprocess import (
-        TRITON_AVAILABLE,
-        build_resample_tables,
-        triton_preprocess_rfdetr_stretch,
+    from inference_models.models.rfdetr import triton_preprocess_runtime
+    from inference_models.models.rfdetr.triton_preprocess_runtime import (
+        FastPreprocessRuntime,
     )
 
-    if not TRITON_AVAILABLE:
-        raise RuntimeError("Triton RF-DETR preprocessing is not available")
-
     target_device = inputs["target_device"]
     if target_device.type != "cuda":
         raise RuntimeError(
             f"Triton replay requires CUDA target_device, got {target_device}"
         )
 
-    images = inputs["images"]
-    if isinstance(images, list):
-        if len(images) != 1:
-            raise RuntimeError("Triton replay only supports batch size 1")
-        candidate = images[0]
-    else:
-        candidate = images
-    if (
-        not isinstance(candidate, np.ndarray)
-        or candidate.dtype != np.uint8
-        or candidate.ndim != 3
-        or candidate.shape[2] != 3
-    ):
-        raise RuntimeError(
-            "Triton replay only supports one uint8 HWC ndarray input; "
-            f"got type={type(candidate)} shape={getattr(candidate, 'shape', None)}"
-        )
-
-    if inputs["image_size_wh"] is not None:
-        raise RuntimeError("Triton replay does not support image_size_wh overrides")
-
-    image_pre_processing = inputs["image_pre_processing"]
-    if (
-        _uses_enabled(image_pre_processing.static_crop)
-        or _uses_enabled(image_pre_processing.contrast)
-        or _uses_enabled(image_pre_processing.grayscale)
-    ):
-        raise RuntimeError(
-            "Triton replay only supports cases without static crop, contrast, "
-            "or grayscale preprocessing"
-        )
+    # Capture may have forced the environment flag off in this process. Replay
+    # mode is an explicit request to exercise the Triton path, so force the
+    # runtime gate on while still using the production runtime helper.
+    triton_preprocess_runtime._FAST_PATH_ENABLED = True
 
-    network_input = inputs["network_input"]
-    if network_input.dataset_version_resize_dimensions is not None:
-        raise RuntimeError("Triton replay does not support dataset-version resize")
-    if network_input.input_channels != 3:
-        raise RuntimeError("Triton replay only supports 3 input channels")
-    if network_input.scaling_factor not in (None, 255):
-        raise RuntimeError(
-            "Triton replay only supports scaling_factor in (None, 255)"
+    runtime_state = _TRITON_REPLAY_RUNTIMES.get(str(target_device))
+    if runtime_state is None:
+        runtime_state = (
+            FastPreprocessRuntime(device=target_device),
+            torch.cuda.Stream(device=target_device),
         )
-    if network_input.normalization is None:
-        raise RuntimeError("Triton replay requires network_input.normalization")
-    if network_input.resize_mode not in (
-        ResizeMode.STRETCH_TO,
-        ResizeMode.LETTERBOX,
-        ResizeMode.CENTER_CROP,
-        ResizeMode.LETTERBOX_REFLECT_EDGES,
-    ):
-        raise RuntimeError(
-            f"Triton replay does not support resize_mode={network_input.resize_mode}"
-        )
-
-    caller_mode = (
-        ColorMode(inputs["input_color_format"])
-        if inputs["input_color_format"] is not None
-        else ColorMode.BGR
+        _TRITON_REPLAY_RUNTIMES[str(target_device)] = runtime_state
+    runtime, stream = runtime_state
+
+    result = runtime.try_preprocess(
+        images=inputs["images"],
+        input_color_format=inputs["input_color_format"],
+        image_size=inputs["image_size_wh"],
+        image_pre_processing=inputs["image_pre_processing"],
+        network_input=inputs["network_input"],
+        stream=stream,
     )
-    swap_rb = caller_mode != network_input.color_mode
-
-    means, stds = network_input.normalization
-    means_t = (float(means[0]), float(means[1]), float(means[2]))
-    stds_t = (float(stds[0]), float(stds[1]), float(stds[2]))
-    target_h = network_input.training_input_size.height
-    target_w = network_input.training_input_size.width
-    orig_h, orig_w = int(candidate.shape[0]), int(candidate.shape[1])
-
-    state_key = (str(target_device), orig_h, orig_w, target_h, target_w)
-    state = _TRITON_REPLAY_STATE.get(state_key)
-    if state is None:
-        pinned_host = torch.empty(
-            (orig_h, orig_w, 3), dtype=torch.uint8, pin_memory=True
-        )
-        src_gpu = torch.empty(
-            (orig_h, orig_w, 3), dtype=torch.uint8, device=target_device
-        )
-        out_buffer = torch.empty(
-            (1, 3, target_h, target_w), dtype=torch.float32, device=target_device
-        )
-        tables = build_resample_tables(
-            src_h=orig_h,
-            src_w=orig_w,
-            target_h=target_h,
-            target_w=target_w,
-            device=target_device,
+    if result is None:
+        raise RuntimeError(
+            "Captured preprocess case is not supported by FastPreprocessRuntime; "
+            "run replay with --replay-implementation reference to benchmark the "
+            "reference path."
         )
-        state = {
-            "pinned_host": pinned_host,
-            "src_gpu": src_gpu,
-            "out_buffer": out_buffer,
-            "tables": tables,
-        }
-        _TRITON_REPLAY_STATE[state_key] = state
-
-    pinned_np = state["pinned_host"].numpy()
-    np.copyto(pinned_np, candidate, casting="no")
-    state["src_gpu"].copy_(state["pinned_host"], non_blocking=True)
-    triton_preprocess_rfdetr_stretch(
-        src=state["src_gpu"],
-        tables=state["tables"],
-        target_h=target_h,
-        target_w=target_w,
-        means=means_t,
-        stds=stds_t,
-        swap_rb=swap_rb,
-        out=state["out_buffer"],
-    )
-
-    meta = PreProcessingMetadata(
-        pad_left=0,
-        pad_top=0,
-        pad_right=0,
-        pad_bottom=0,
-        original_size=ImageDimensions(width=orig_w, height=orig_h),
-        size_after_pre_processing=ImageDimensions(width=orig_w, height=orig_h),
-        inference_size=ImageDimensions(width=target_w, height=target_h),
-        scale_width=target_w / orig_w,
-        scale_height=target_h / orig_h,
-        static_crop_offset=StaticCropOffset(
-            offset_x=0,
-            offset_y=0,
-            crop_width=orig_w,
-            crop_height=orig_h,
-        ),
-    )
-    return state["out_buffer"], [meta]
+    return result.tensor, result.metadata
 
 
 def _synchronize(device: torch.device) -> None:
diff --git a/inference_models/tests/unit_tests/models/rfdetr/test_trt_preprocess_fast_path.py b/inference_models/tests/unit_tests/models/rfdetr/test_trt_preprocess_fast_path.py
index a33f530cf7..072889c0b2 100644
--- a/inference_models/tests/unit_tests/models/rfdetr/test_trt_preprocess_fast_path.py
+++ b/inference_models/tests/unit_tests/models/rfdetr/test_trt_preprocess_fast_path.py
@@ -9,9 +9,13 @@
 from inference_models.entities import ImageDimensions
 from inference_models.models.common.roboflow.model_packages import (
     ColorMode,
+    Contrast,
+    ContrastType,
+    Grayscale,
     ImagePreProcessing,
     NetworkInputDefinition,
     ResizeMode,
+    StaticCrop,
     TrainingInputSize,
 )
 from inference_models.models.rfdetr import (
@@ -24,21 +28,53 @@
 
 _IMAGENET_MEAN = (0.485, 0.456, 0.406)
 _IMAGENET_STD = (0.229, 0.224, 0.225)
+_DEFAULT_NORMALIZATION = object()
 
 
 def _network_input(
     target_h: int = 64,
     target_w: int = 64,
+    dataset_version_resize_dimensions=None,
+    resize_mode: ResizeMode = ResizeMode.STRETCH_TO,
+    input_channels: int = 3,
+    scaling_factor=255,
+    normalization=_DEFAULT_NORMALIZATION,
 ) -> NetworkInputDefinition:
+    if normalization is _DEFAULT_NORMALIZATION:
+        normalization = [list(_IMAGENET_MEAN), list(_IMAGENET_STD)]
     return NetworkInputDefinition(
         training_input_size=TrainingInputSize(height=target_h, width=target_w),
-        dataset_version_resize_dimensions=None,
+        dataset_version_resize_dimensions=dataset_version_resize_dimensions,
         dynamic_spatial_size_supported=False,
         color_mode=ColorMode.RGB,
-        resize_mode=ResizeMode.STRETCH_TO,
-        input_channels=3,
-        scaling_factor=255,
-        normalization=[list(_IMAGENET_MEAN), list(_IMAGENET_STD)],
+        resize_mode=resize_mode,
+        input_channels=input_channels,
+        scaling_factor=scaling_factor,
+        normalization=normalization,
+    )
+
+
+def _call_fast_preprocess(
+    runtime: FastPreprocessRuntime,
+    *,
+    images=None,
+    image_size=None,
+    image_pre_processing=None,
+    network_input=None,
+):
+    if images is None:
+        images = np.zeros((8, 8, 3), dtype=np.uint8)
+    if image_pre_processing is None:
+        image_pre_processing = ImagePreProcessing()
+    if network_input is None:
+        network_input = _network_input()
+    return runtime.try_preprocess(
+        images=images,
+        input_color_format="bgr",
+        image_size=image_size,
+        image_pre_processing=image_pre_processing,
+        network_input=network_input,
+        stream=None,
     )
 
 
@@ -94,6 +130,162 @@ def test_trt_fast_preprocess_warns_once_for_unsupported_batch(
     assert recorded == []
 
 
+def test_trt_fast_preprocess_flag_disabled_returns_none_without_warning(
+    monkeypatch: pytest.MonkeyPatch,
+) -> None:
+    monkeypatch.setattr(triton_preprocess_runtime, "_FAST_PATH_ENABLED", False)
+    monkeypatch.setattr(triton_preprocess_runtime, "_TRITON_AVAILABLE", True)
+    runtime = FastPreprocessRuntime(device=torch.device("cuda"))
+
+    with warnings.catch_warnings(record=True) as recorded:
+        warnings.simplefilter("always")
+        assert _call_fast_preprocess(runtime) is None
+    assert recorded == []
+
+
+@pytest.mark.parametrize(
+    ("runtime_device", "kwargs", "reason"),
+    [
+        (
+            torch.device("cuda"),
+            {},
+            "triton is not installed",
+        ),
+    ],
+)
+def test_trt_fast_preprocess_warns_for_unavailable_runtime(
+    monkeypatch: pytest.MonkeyPatch,
+    runtime_device: torch.device,
+    kwargs,
+    reason: str,
+) -> None:
+    monkeypatch.setattr(triton_preprocess_runtime, "_FAST_PATH_ENABLED", True)
+    monkeypatch.setattr(triton_preprocess_runtime, "_TRITON_AVAILABLE", False)
+    runtime = FastPreprocessRuntime(device=runtime_device)
+
+    with pytest.warns(RuntimeWarning, match=reason):
+        assert _call_fast_preprocess(runtime, **kwargs) is None
+
+
+@pytest.mark.parametrize(
+    ("runtime_device", "kwargs", "reason"),
+    [
+        (
+            torch.device("cpu"),
+            {},
+            "CUDA device is required",
+        ),
+        (
+            torch.device("cuda"),
+            {"image_size": (32, 32)},
+            "custom image_size overrides are not supported",
+        ),
+        (
+            torch.device("cuda"),
+            {
+                "image_pre_processing": ImagePreProcessing.model_validate(
+                    {
+                        "static-crop": StaticCrop(
+                            enabled=True,
+                            x_min=0,
+                            x_max=8,
+                            y_min=0,
+                            y_max=8,
+                        )
+                    }
+                )
+            },
+            "static crop, contrast, and grayscale preprocessing are unsupported",
+        ),
+        (
+            torch.device("cuda"),
+            {
+                "image_pre_processing": ImagePreProcessing(
+                    contrast=Contrast(
+                        enabled=True,
+                        type=ContrastType.CONTRAST_STRETCHING,
+                    )
+                )
+            },
+            "static crop, contrast, and grayscale preprocessing are unsupported",
+        ),
+        (
+            torch.device("cuda"),
+            {
+                "image_pre_processing": ImagePreProcessing(
+                    grayscale=Grayscale(enabled=True)
+                )
+            },
+            "static crop, contrast, and grayscale preprocessing are unsupported",
+        ),
+        (
+            torch.device("cuda"),
+            {
+                "network_input": _network_input(
+                    dataset_version_resize_dimensions=TrainingInputSize(
+                        height=8,
+                        width=8,
+                    )
+                )
+            },
+            "dataset-version resize is unsupported",
+        ),
+        (
+            torch.device("cuda"),
+            {"network_input": _network_input(input_channels=1)},
+            "only 3-channel inputs are supported",
+        ),
+        (
+            torch.device("cuda"),
+            {"network_input": _network_input(scaling_factor=1)},
+            "only scaling_factor None or 255 is supported",
+        ),
+        (
+            torch.device("cuda"),
+            {"network_input": _network_input(normalization=None)},
+            "normalization is required",
+        ),
+        (
+            torch.device("cuda"),
+            {"network_input": _network_input(resize_mode=ResizeMode.FIT_LONGER_EDGE)},
+            "resize mode",
+        ),
+        (
+            torch.device("cuda"),
+            {"images": torch.zeros((8, 8, 3), dtype=torch.uint8)},
+            "only numpy ndarray inputs are supported",
+        ),
+        (
+            torch.device("cuda"),
+            {"images": np.zeros((8, 8, 3), dtype=np.float32)},
+            "input must be uint8 HWC with 3 channels",
+        ),
+        (
+            torch.device("cuda"),
+            {"images": np.zeros((8, 8), dtype=np.uint8)},
+            "input must be uint8 HWC with 3 channels",
+        ),
+        (
+            torch.device("cuda"),
+            {"images": np.zeros((8, 8, 1), dtype=np.uint8)},
+            "input must be uint8 HWC with 3 channels",
+        ),
+    ],
+)
+def test_trt_fast_preprocess_warns_for_unsupported_requests(
+    monkeypatch: pytest.MonkeyPatch,
+    runtime_device: torch.device,
+    kwargs,
+    reason: str,
+) -> None:
+    monkeypatch.setattr(triton_preprocess_runtime, "_FAST_PATH_ENABLED", True)
+    monkeypatch.setattr(triton_preprocess_runtime, "_TRITON_AVAILABLE", True)
+    runtime = FastPreprocessRuntime(device=runtime_device)
+
+    with pytest.warns(RuntimeWarning, match=reason):
+        assert _call_fast_preprocess(runtime, **kwargs) is None
+
+
 @pytest.mark.skipif(
     not torch.cuda.is_available() or not triton_preprocess.TRITON_AVAILABLE,
     reason="CUDA and Triton are required",

From e3d8543587ac70b1c5fc1162c724323cf8e19dd0 Mon Sep 17 00:00:00 2001
From: Grzegorz Klimaszewski
 <166530809+grzegorz-roboflow@users.noreply.github.com>
Date: Wed, 3 Jun 2026 17:17:08 +0200
Subject: [PATCH 27/76] roboflow/inference-server-experimental for building
 images from feat/new-model-manager (#2406)

---
 .github/workflows/docker.inference_server.yml | 118 ++++++++++++++++++
 1 file changed, 118 insertions(+)
 create mode 100644 .github/workflows/docker.inference_server.yml

diff --git a/.github/workflows/docker.inference_server.yml b/.github/workflows/docker.inference_server.yml
new file mode 100644
index 0000000000..ab6238bbd9
--- /dev/null
+++ b/.github/workflows/docker.inference_server.yml
@@ -0,0 +1,118 @@
+name: Build inference_server images
+
+# Manual only. When you click "Run workflow", pick your branch as the ref —
+# it builds that ref's code (never main on its own). Tick the targets you want;
+# tick none = build all. (workflow_dispatch only appears in the UI once this
+# file is on the default branch — a one-time merge; it still never auto-runs.)
+permissions:
+  contents: read
+on:
+  workflow_dispatch:
+    inputs:
+      gpu:
+        type: boolean
+        description: "Build GPU (cu124, amd64)"
+        default: false
+      cpu:
+        type: boolean
+        description: "Build CPU (amd64)"
+        default: false
+      jetson_5_1_1:
+        type: boolean
+        description: "Build Jetson JP5.1.1 (arm64)"
+        default: false
+      jetson_6_0_0:
+        type: boolean
+        description: "Build Jetson JP6.0 (arm64)"
+        default: false
+      jetson_6_2_0:
+        type: boolean
+        description: "Build Jetson JP6.2 (arm64)"
+        default: false
+      tag_suffix:
+        type: string
+        description: "Postfix appended to image tag, e.g. -test → :gpu-0.1.0-test"
+        default: ""
+      push:
+        type: boolean
+        description: "Push images after build (otherwise build-only)"
+        default: false
+
+env:
+  IMAGE: roboflow/inference-server-experimental
+
+jobs:
+  setup:
+    runs-on: ubuntu-latest
+    outputs:
+      matrix: ${{ steps.set.outputs.matrix }}
+      version: ${{ steps.ver.outputs.version }}
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v6
+      - name: Read inference_server version
+        id: ver
+        run: |
+          v=$(python -c "import tomllib; print(tomllib.load(open('inference_server/pyproject.toml','rb'))['project']['version'])")
+          echo "version=$v" >> "$GITHUB_OUTPUT"
+          echo "inference_server version: $v"
+      - name: Compute build matrix (none selected = all)
+        id: set
+        env:
+          GPU: ${{ inputs.gpu }}
+          CPU: ${{ inputs.cpu }}
+          J511: ${{ inputs.jetson_5_1_1 }}
+          J600: ${{ inputs.jetson_6_0_0 }}
+          J620: ${{ inputs.jetson_6_2_0 }}
+        run: |
+          set -euo pipefail
+          all_json=$(cat <<EOF
+          [
+            {"name":"gpu","dockerfile":"inference_server/docker/Dockerfile.gpu","platform":"linux/amd64","depot_project":"grl7ffzxd7","sel":"$GPU"},
+            {"name":"cpu","dockerfile":"inference_server/docker/Dockerfile.cpu","platform":"linux/amd64","depot_project":"grl7ffzxd7","sel":"$CPU"},
+            {"name":"jetson-5.1.1","dockerfile":"inference_server/docker/Dockerfile.jetson.5.1.1","platform":"linux/arm64","depot_project":"2rp7mfjw7q","sel":"$J511"},
+            {"name":"jetson-6.0.0","dockerfile":"inference_server/docker/Dockerfile.jetson.6.0.0","platform":"linux/arm64","depot_project":"2rp7mfjw7q","sel":"$J600"},
+            {"name":"jetson-6.2.0","dockerfile":"inference_server/docker/Dockerfile.jetson.6.2.0","platform":"linux/arm64","depot_project":"2rp7mfjw7q","sel":"$J620"}
+          ]
+          EOF
+          )
+          selected=$(echo "$all_json" | jq -c '[.[] | select(.sel=="true")]')
+          if [ "$(echo "$selected" | jq 'length')" -eq 0 ]; then
+            selected="$all_json"
+          fi
+          matrix=$(echo "$selected" | jq -c '{include: [.[] | {name, dockerfile, platform, depot_project}]}')
+          echo "matrix=$matrix" >> "$GITHUB_OUTPUT"
+          echo "$matrix" | jq .
+
+  build:
+    needs: setup
+    runs-on: ubuntu-latest
+    timeout-minutes: 180
+    permissions:
+      id-token: write
+      contents: read
+    strategy:
+      fail-fast: false
+      matrix: ${{ fromJson(needs.setup.outputs.matrix) }}
+    name: build-${{ matrix.name }}
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v6
+      - name: Set up Depot CLI
+        uses: depot/setup-action@v1
+      # jp5.1.1 base (roboflow/l4t-ml) is private on Docker Hub.
+      - name: Login to Docker Hub
+        uses: docker/login-action@v3
+        with:
+          username: ${{ secrets.DOCKERHUB_USERNAME }}
+          password: ${{ secrets.DOCKERHUB_TOKEN }}
+      - name: Build (and optionally push)
+        uses: depot/build-push-action@v1
+        with:
+          project: ${{ matrix.depot_project }}
+          context: .
+          file: ${{ matrix.dockerfile }}
+          platforms: ${{ matrix.platform }}
+          pull: true
+          push: ${{ inputs.push }}
+          tags: ${{ env.IMAGE }}:${{ matrix.name }}-${{ needs.setup.outputs.version }}${{ inputs.tag_suffix }}

From d0e13c0c230d3405960d78ae4517a1d23b3f9c1c Mon Sep 17 00:00:00 2001
From: Rodrigo Barbosa <rodrigo@roboflow.com>
Date: Thu, 4 Jun 2026 12:14:15 -0300
Subject: [PATCH 28/76] fix batch processing (#2411)

* fix batch processing

* style: run black on asset_library_attributes v1
---
 .../roboflow/asset_library_attributes/v1.py   | 72 +++++++++++----
 .../asset_library_attributes/test_v1.py       | 88 +++++++++++++++++++
 2 files changed, 144 insertions(+), 16 deletions(-)

diff --git a/inference/core/workflows/core_steps/sinks/roboflow/asset_library_attributes/v1.py b/inference/core/workflows/core_steps/sinks/roboflow/asset_library_attributes/v1.py
index aad7050d88..8ec4d487ce 100644
--- a/inference/core/workflows/core_steps/sinks/roboflow/asset_library_attributes/v1.py
+++ b/inference/core/workflows/core_steps/sinks/roboflow/asset_library_attributes/v1.py
@@ -271,18 +271,34 @@ class EffectiveUpdates(NamedTuple):
     results: List[Optional[Dict[str, Any]]]
 
 
+def _resolve_element(v: Any, index: int) -> Any:
+    return v[index] if isinstance(v, Batch) else v
+
+
+def _normalize_to_per_row(value: Any, n: int) -> List[Any]:
+    if isinstance(value, Batch):
+        return list(value)
+    if value is None:
+        return [None] * n
+    if isinstance(value, dict):
+        if not any(isinstance(v, Batch) for v in value.values()):
+            return [value] * n
+        return [{k: _resolve_element(v, i) for k, v in value.items()} for i in range(n)]
+    if isinstance(value, list):
+        if not any(isinstance(v, Batch) for v in value):
+            return [value] * n
+        return [[_resolve_element(v, i) for v in value] for i in range(n)]
+    return [value] * n
+
+
 def build_effective_updates(
     source_ids: Batch[str],
     metadata: Optional[Union[Dict[str, Any], Batch[Optional[Dict[str, Any]]]]],
     tags: Optional[Union[List[str], Batch[Optional[List[str]]]]],
 ) -> EffectiveUpdates:
     n = len(source_ids)
-    metadata_values: List[Optional[Dict[str, Any]]] = (
-        list(metadata) if isinstance(metadata, Batch) else [metadata] * n
-    )
-    tag_values: List[Optional[List[str]]] = (
-        list(tags) if isinstance(tags, Batch) else [tags] * n
-    )
+    metadata_values = _normalize_to_per_row(metadata, n)
+    tag_values = _normalize_to_per_row(tags, n)
     results: List[Optional[Dict[str, Any]]] = [None] * n
     updates_by_source_id: Dict[str, Dict[str, Any]] = {}
     result_indices_by_id: Dict[str, List[int]] = {}
@@ -312,6 +328,30 @@ def build_effective_updates(
     )
 
 
+def _extract_response_body(error: Exception) -> str:
+    for exc in (error, getattr(error, "__cause__", None)):
+        response = getattr(exc, "response", None)
+        if response is None:
+            continue
+        try:
+            return str(response.json())
+        except Exception:
+            pass
+        try:
+            return response.text[:500]
+        except Exception:
+            pass
+    return ""
+
+
+def _format_api_error(prefix: str, error: Exception) -> str:
+    body = _extract_response_body(error)
+    detail = f"Error type: {type(error).__name__}. Details: {error}"
+    if body:
+        detail += f". Response body: {body}"
+    return f"{prefix}. {detail}"
+
+
 def call_single_image_endpoint(
     workspace_id: str,
     update: Dict[str, Any],
@@ -326,15 +366,15 @@ def call_single_image_endpoint(
             add_tags=update.get("addTags"),
         )
     except Exception as error:
+        message = _format_api_error(
+            "Error while updating Asset Library attributes", error
+        )
         logging.warning(
             "Could not update Asset Library attributes for image %s: %s",
             update["imageId"],
-            error,
+            message,
         )
-        return {
-            "error_status": True,
-            "message": f"Error while updating Asset Library attributes. Error type: {type(error)}. Details: {error}",
-        }
+        return {"error_status": True, "message": message}
     return {"error_status": False, "message": UPDATE_SUCCESS_MESSAGE}
 
 
@@ -352,13 +392,13 @@ def call_batch_endpoint(
         if not response.get("taskId"):
             raise ValueError("Malformed image metadata batch response: missing taskId")
     except Exception as error:
+        message = _format_api_error(
+            "Error while submitting Asset Library attributes update", error
+        )
         logging.warning(
-            "Could not submit Asset Library attributes batch update: %s", error
+            "Could not submit Asset Library attributes batch update: %s", message
         )
-        return {
-            "error_status": True,
-            "message": f"Error while submitting Asset Library attributes update. Error type: {type(error)}. Details: {error}",
-        }
+        return {"error_status": True, "message": message}
     logging.info(
         "Submitted Asset Library attributes batch update: updates=%d",
         len(updates),
diff --git a/tests/workflows/unit_tests/core_steps/sinks/roboflow/asset_library_attributes/test_v1.py b/tests/workflows/unit_tests/core_steps/sinks/roboflow/asset_library_attributes/test_v1.py
index 36c4f12bc3..81a535a1db 100644
--- a/tests/workflows/unit_tests/core_steps/sinks/roboflow/asset_library_attributes/test_v1.py
+++ b/tests/workflows/unit_tests/core_steps/sinks/roboflow/asset_library_attributes/test_v1.py
@@ -13,6 +13,9 @@
     UPDATE_SUCCESS_MESSAGE,
     BlockManifest,
     RoboflowAssetLibraryAttributesBlockV1,
+    _extract_response_body,
+    _format_api_error,
+    _normalize_to_per_row,
     build_effective_updates,
 )
 from inference.core.workflows.execution_engine.entities.base import Batch
@@ -205,6 +208,44 @@ def test_run_batch_endpoint_error_returns_per_image_error(
     assert "preflight error" in result[1]["message"]
 
 
+def test_error_message_includes_response_body_from_http_error(
+    block: RoboflowAssetLibraryAttributesBlockV1, mocked_v1
+) -> None:
+    """When the API returns a 400 with a JSON body, the error message should include it."""
+    fake_response = mock.MagicMock()
+    fake_response.json.return_value = {"error": "tags must be a list of strings"}
+    http_error = Exception("400 Bad Request")
+    http_error.response = fake_response
+    mocked_v1.update_single.side_effect = http_error
+
+    result = block.run(
+        source_id=make_batch(["img-1"]),
+        metadata={"color": "red"},
+    )
+
+    assert result[0]["error_status"] is True
+    assert "tags must be a list of strings" in result[0]["message"]
+
+
+def test_extract_response_body_from_chained_cause() -> None:
+    fake_response = mock.MagicMock()
+    fake_response.json.return_value = {"detail": "invalid field"}
+    inner = Exception("HTTP 400")
+    inner.response = fake_response
+    outer = RuntimeError("wrapped")
+    outer.__cause__ = inner
+
+    body = _extract_response_body(outer)
+    assert "invalid field" in body
+
+
+def test_format_api_error_without_response_body() -> None:
+    msg = _format_api_error("Something failed", ValueError("bad input"))
+    assert "ValueError" in msg
+    assert "bad input" in msg
+    assert "Response body" not in msg
+
+
 def test_run_raises_when_effective_update_count_exceeds_batch_limit(
     block: RoboflowAssetLibraryAttributesBlockV1, mocked_v1
 ) -> None:
@@ -217,6 +258,53 @@ def test_run_raises_when_effective_update_count_exceeds_batch_limit(
         )
 
 
+def test_normalize_to_per_row_none() -> None:
+    assert _normalize_to_per_row(None, 3) == [None, None, None]
+
+
+def test_normalize_to_per_row_batch() -> None:
+    assert _normalize_to_per_row(make_batch(["a", "b"]), 2) == ["a", "b"]
+
+
+def test_normalize_to_per_row_plain_scalar_broadcasts() -> None:
+    d = {"color": "red"}
+    result = _normalize_to_per_row(d, 2)
+    assert result == [d, d]
+
+
+def test_normalize_to_per_row_dict_with_batch_values() -> None:
+    result = _normalize_to_per_row(
+        {"color": make_batch(["red", "blue"]), "static": "abc"}, 2
+    )
+    assert result == [
+        {"color": "red", "static": "abc"},
+        {"color": "blue", "static": "abc"},
+    ]
+
+
+def test_normalize_to_per_row_list_with_batch_values() -> None:
+    result = _normalize_to_per_row(
+        ["static-tag", make_batch(["label-a", "label-b"])], 2
+    )
+    assert result == [
+        ["static-tag", "label-a"],
+        ["static-tag", "label-b"],
+    ]
+
+
+def test_build_effective_updates_resolves_batch_values_in_metadata_and_tags() -> None:
+    effective = build_effective_updates(
+        source_ids=make_batch(["img-1", "img-2"]),
+        metadata={"score": make_batch([0.9, 0.3])},
+        tags=[make_batch(["cat", "dog"])],
+    )
+
+    assert effective.updates == [
+        {"imageId": "img-1", "metadata": {"score": 0.9}, "addTags": ["cat"]},
+        {"imageId": "img-2", "metadata": {"score": 0.3}, "addTags": ["dog"]},
+    ]
+
+
 def test_run_uses_injected_offloader_instead_of_calling_api(mocked_v1) -> None:
     offloader = mock.MagicMock(
         return_value={"error_status": False, "message": "queued"}

From b54110a68b722ea48110493834885f7f4d7feca6 Mon Sep 17 00:00:00 2001
From: nkuneman <nikuneman@gmail.com>
Date: Thu, 4 Jun 2026 13:38:42 -0400
Subject: [PATCH 29/76] Nk/add volume support (#2413)

* Add volume mount support to inference server container

Adds a --volume / -v CLI flag to `inference server start` and a
corresponding `volumes` parameter to `start_inference_container()`,
allowing users to bind-mount host directories into the container.

* Apply black formatting

* Remove unused import

* Document --volume flag in server CLI docs
---
 docs/inference_helpers/cli_commands/server.md | 16 ++++++
 inference_cli/lib/container_adapter.py        |  3 +-
 inference_cli/server.py                       | 23 ++++++++-
 .../unit_tests/test_container_adapter.py      | 50 ++++++++++++++++++-
 4 files changed, 89 insertions(+), 3 deletions(-)

diff --git a/docs/inference_helpers/cli_commands/server.md b/docs/inference_helpers/cli_commands/server.md
index ee83da89c6..84eb73668e 100644
--- a/docs/inference_helpers/cli_commands/server.md
+++ b/docs/inference_helpers/cli_commands/server.md
@@ -37,6 +37,22 @@ in case that values of internal parameters needs to be adjusted. Any value passe
 is considered as more important and will shadow the value defined in `.env` file under the same target variable name.
 
 
+### Volume Mounts
+
+Use the `--volume` (or `-v`) flag to mount a host directory into the container. This is useful for persisting files written by workflows (e.g. via the `local_file_sink` block) to your local machine.
+
+```bash
+inference server start --volume /host/path:/container/path
+```
+
+You can mount multiple volumes by repeating the flag:
+
+```bash
+inference server start --volume /host/data:/data --volume /host/models:/models:ro
+```
+
+The optional `:ro` suffix mounts the volume as read-only. If omitted, the volume is mounted read-write.
+
 ### Development Mode
 
 Use the `--dev` flag to start the Inference Server in development mode. Development mode enables the Inference Server's built in notebook environment for easy testing and development.
diff --git a/inference_cli/lib/container_adapter.py b/inference_cli/lib/container_adapter.py
index be4cbe4d6e..4d24870690 100644
--- a/inference_cli/lib/container_adapter.py
+++ b/inference_cli/lib/container_adapter.py
@@ -212,6 +212,7 @@ def start_inference_container(
     env_file_path: Optional[str] = None,
     development: bool = False,
     use_local_images: bool = False,
+    volumes: Optional[Dict[str, dict]] = None,
 ) -> None:
     containers = find_running_inference_containers()
     if len(containers) > 0:
@@ -279,7 +280,7 @@ def start_inference_container(
             else None
         ),
         read_only=not is_jetson,
-        volumes={"/tmp": {"bind": "/tmp", "mode": "rw"}},
+        volumes={"/tmp": {"bind": "/tmp", "mode": "rw"}, **(volumes or {})},
         network_mode="bridge",
         ipc_mode="private" if not is_jetson else None,
         **docker_run_kwargs,
diff --git a/inference_cli/server.py b/inference_cli/server.py
index e1e18ece06..a197082876 100644
--- a/inference_cli/server.py
+++ b/inference_cli/server.py
@@ -1,4 +1,4 @@
-from typing import Optional
+from typing import List, Optional
 
 import typer
 from typing_extensions import Annotated
@@ -87,6 +87,14 @@ def start(
             help="Flag controlling if metrics are enabled (default is True)",
         ),
     ] = True,
+    volumes: Annotated[
+        Optional[List[str]],
+        typer.Option(
+            "--volume",
+            "-v",
+            help="Volume mount in the format /host/path:/container/path[:ro]. Can be specified multiple times.",
+        ),
+    ] = None,
 ) -> None:
 
     try:
@@ -95,6 +103,18 @@ def start(
         typer.echo(docker_error)
         raise typer.Exit(code=1) from docker_error
 
+    parsed_volumes = {}
+    for v in volumes or []:
+        parts = v.split(":")
+        if len(parts) < 2:
+            typer.echo(
+                f"Invalid volume format: {v}. Expected /host/path:/container/path[:ro]"
+            )
+            raise typer.Exit(code=1)
+        host_path, container_path = parts[0], parts[1]
+        mode = parts[2] if len(parts) == 3 else "rw"
+        parsed_volumes[host_path] = {"bind": container_path, "mode": mode}
+
     try:
         start_inference_container(
             image=image,
@@ -105,6 +125,7 @@ def start(
             api_key=api_key,
             use_local_images=use_local_images,
             metrics_enabled=metrics_enabled,
+            volumes=parsed_volumes or None,
         )
     except Exception as container_error:
         typer.echo(container_error)
diff --git a/tests/inference_cli/unit_tests/test_container_adapter.py b/tests/inference_cli/unit_tests/test_container_adapter.py
index 44cecd2508..9f297e8ad0 100644
--- a/tests/inference_cli/unit_tests/test_container_adapter.py
+++ b/tests/inference_cli/unit_tests/test_container_adapter.py
@@ -2,7 +2,10 @@
 from unittest.mock import MagicMock
 
 from inference_cli.lib import container_adapter
-from inference_cli.lib.container_adapter import prepare_container_environment
+from inference_cli.lib.container_adapter import (
+    prepare_container_environment,
+    start_inference_container,
+)
 
 
 @mock.patch.object(container_adapter, "read_env_file")
@@ -43,6 +46,51 @@ def test_prepare_container_environment_when_env_file_defined(
     read_env_file_mock.assert_called_once_with(path="my_env_file")
 
 
+@mock.patch.object(container_adapter, "pull_image")
+@mock.patch.object(
+    container_adapter, "find_running_inference_containers", return_value=[]
+)
+@mock.patch.object(container_adapter, "docker")
+def test_start_inference_container_default_tmp_volume_always_present(
+    docker_mock: MagicMock,
+    _find_containers_mock: MagicMock,
+    _pull_image_mock: MagicMock,
+) -> None:
+    # when
+    start_inference_container(image="roboflow/roboflow-inference-server-cpu:latest")
+
+    # then
+    _, kwargs = docker_mock.from_env.return_value.containers.run.call_args
+    assert "/tmp" in kwargs["volumes"]
+    assert kwargs["volumes"]["/tmp"] == {"bind": "/tmp", "mode": "rw"}
+
+
+@mock.patch.object(container_adapter, "pull_image")
+@mock.patch.object(
+    container_adapter, "find_running_inference_containers", return_value=[]
+)
+@mock.patch.object(container_adapter, "docker")
+def test_start_inference_container_user_volumes_merged_with_tmp(
+    docker_mock: MagicMock,
+    _find_containers_mock: MagicMock,
+    _pull_image_mock: MagicMock,
+) -> None:
+    # given
+    user_volumes = {"/home/user/data": {"bind": "/data", "mode": "rw"}}
+
+    # when
+    start_inference_container(
+        image="roboflow/roboflow-inference-server-cpu:latest",
+        volumes=user_volumes,
+    )
+
+    # then
+    _, kwargs = docker_mock.from_env.return_value.containers.run.call_args
+    assert "/tmp" in kwargs["volumes"]
+    assert "/home/user/data" in kwargs["volumes"]
+    assert kwargs["volumes"]["/home/user/data"] == {"bind": "/data", "mode": "rw"}
+
+
 def test_prepare_container_environment_when_env_file_not_defined() -> None:
     # when
     result = prepare_container_environment(

From 1e3d5a98b394463d34d8f3bfe98abce14dda563c Mon Sep 17 00:00:00 2001
From: Patrick Nihranz <patrick.nihranz@roboflow.com>
Date: Thu, 4 Jun 2026 14:00:52 -0400
Subject: [PATCH 30/76] Add roboflow_core/current_time@v1 Workflow block
 (#2410)

* Expose NumberInRange operator in workflow builder UI

The (Number) in range operator was already implemented in the evaluation
engine and BinaryStatement union but was not included in the introspection
export, making it invisible to the workflow builder UI.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

* Implement NumberInRange operator in query language backend

Adds the NumberInRange BinaryOperator class and its evaluation lambda so
the operator exposed in the workflow builder UI has a working backend.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* Add roboflow_core/current_time@v1 Workflow block

New formatter block that outputs the current date/time for a user-selected
timezone. Inputs a curated IANA timezone (literal or selector, default UTC) and
returns a timezone-aware timestamp plus iso_string, date, and time strings.

Uses stdlib zoneinfo (backports.zoneinfo for py<3.9) and adds tzdata so the
timezone database is available on slim/Windows runtimes. Curated dropdown options
expose friendly UTC-offset labels via values_metadata. Registered in loader.py.
Includes unit tests and a full execution-engine integration test.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>

* Remove zoneinfo backport dependency

---------

Co-authored-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 .../query_language/entities/operations.py     |  16 ++
 .../query_language/evaluation_engine/core.py  |   1 +
 .../query_language/introspection/core.py      |   2 +
 .../formatters/current_time/__init__.py       |   0
 .../core_steps/formatters/current_time/v1.py  | 159 +++++++++++++++
 inference/core/workflows/core_steps/loader.py |   4 +
 requirements/_requirements.txt                |   6 +-
 .../test_workflow_with_current_time.py        |  60 ++++++
 .../formatters/test_current_time.py           | 183 ++++++++++++++++++
 9 files changed, 428 insertions(+), 3 deletions(-)
 create mode 100644 inference/core/workflows/core_steps/formatters/current_time/__init__.py
 create mode 100644 inference/core/workflows/core_steps/formatters/current_time/v1.py
 create mode 100644 tests/workflows/integration_tests/execution/test_workflow_with_current_time.py
 create mode 100644 tests/workflows/unit_tests/core_steps/formatters/test_current_time.py

diff --git a/inference/core/workflows/core_steps/common/query_language/entities/operations.py b/inference/core/workflows/core_steps/common/query_language/entities/operations.py
index 078da5c0cc..64d5c92655 100644
--- a/inference/core/workflows/core_steps/common/query_language/entities/operations.py
+++ b/inference/core/workflows/core_steps/common/query_language/entities/operations.py
@@ -765,6 +765,21 @@ class NumberLowerEqual(BinaryOperator):
     type: Literal["(Number) <="]
 
 
+class NumberInRange(BinaryOperator):
+    model_config = ConfigDict(
+        json_schema_extra={
+            "description": "Checks if first value (number) is within the inclusive range given as second value (list of two numbers: [min, max])",
+            "operands_number": 2,
+            "operands_kinds": [
+                [INTEGER_KIND, FLOAT_KIND, FLOAT_ZERO_TO_ONE_KIND],
+                [LIST_OF_VALUES_KIND],
+            ],
+            "output_kind": [BOOLEAN_KIND],
+        },
+    )
+    type: Literal["(Number) in range"]
+
+
 class StringStartsWith(BinaryOperator):
     model_config = ConfigDict(
         json_schema_extra={
@@ -994,6 +1009,7 @@ class BinaryStatement(BaseModel):
             StringContains,
             StringEndsWith,
             StringStartsWith,
+            NumberInRange,
             NumberLowerEqual,
             NumberLower,
             NumberGreaterEqual,
diff --git a/inference/core/workflows/core_steps/common/query_language/evaluation_engine/core.py b/inference/core/workflows/core_steps/common/query_language/evaluation_engine/core.py
index 6b615f3054..ad8d37cefb 100644
--- a/inference/core/workflows/core_steps/common/query_language/evaluation_engine/core.py
+++ b/inference/core/workflows/core_steps/common/query_language/evaluation_engine/core.py
@@ -34,6 +34,7 @@
     "(Number) >=": lambda a, b: a >= b,
     "(Number) <": lambda a, b: a < b,
     "(Number) <=": lambda a, b: a <= b,
+    "(Number) in range": lambda a, b: b[0] <= a <= b[1],
     "(String) startsWith": lambda a, b: a.startswith(b),
     "(String) endsWith": lambda a, b: a.endswith(b),
     "(String) contains": lambda a, b: b in a,
diff --git a/inference/core/workflows/core_steps/common/query_language/introspection/core.py b/inference/core/workflows/core_steps/common/query_language/introspection/core.py
index c16009e8d8..71106d302b 100644
--- a/inference/core/workflows/core_steps/common/query_language/introspection/core.py
+++ b/inference/core/workflows/core_steps/common/query_language/introspection/core.py
@@ -16,6 +16,7 @@
     NotEquals,
     NumberGreater,
     NumberGreaterEqual,
+    NumberInRange,
     NumberLower,
     NumberLowerEqual,
     OperationsChain,
@@ -104,6 +105,7 @@ def prepare_operators_descriptions() -> List[OperatorDescription]:
         NumberLower,
         NumberGreaterEqual,
         NumberGreater,
+        NumberInRange,
         NotEquals,
         Equals,
         Exists,
diff --git a/inference/core/workflows/core_steps/formatters/current_time/__init__.py b/inference/core/workflows/core_steps/formatters/current_time/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/inference/core/workflows/core_steps/formatters/current_time/v1.py b/inference/core/workflows/core_steps/formatters/current_time/v1.py
new file mode 100644
index 0000000000..05693880c0
--- /dev/null
+++ b/inference/core/workflows/core_steps/formatters/current_time/v1.py
@@ -0,0 +1,159 @@
+from datetime import datetime
+from typing import Dict, List, Literal, Optional, Type, Union
+from zoneinfo import ZoneInfo, ZoneInfoNotFoundError
+
+from pydantic import ConfigDict, Field
+
+from inference.core.workflows.execution_engine.entities.base import OutputDefinition
+from inference.core.workflows.execution_engine.entities.types import (
+    STRING_KIND,
+    TIMESTAMP_KIND,
+    Selector,
+)
+from inference.core.workflows.prototypes.block import (
+    BlockResult,
+    WorkflowBlock,
+    WorkflowBlockManifest,
+)
+
+LONG_DESCRIPTION = """
+Output the current date and time for a given timezone.
+
+Provide one of the curated timezone options (for example `America/New_York`,
+`Europe/Berlin`, or `UTC`) and the block returns the current moment in that
+timezone. The block produces a `timestamp` (a timezone-aware `datetime` object you
+can pass to other blocks), along with ready-to-use `iso_string`, `date`, and
+`time` strings.
+
+The timezone may be a literal value typed into the block, or a reference to a workflow
+input or another step's output.
+"""
+
+SHORT_DESCRIPTION = "Output the current date and time for a given timezone."
+TIMEZONE_OPTIONS = (
+    ("Etc/GMT+12", "UTC-12 International Date Line West"),
+    ("Pacific/Pago_Pago", "UTC-11 Samoa Time (SST)"),
+    ("Pacific/Honolulu", "UTC-10 Hawaii-Aleutian Time (HST/HAST)"),
+    ("Pacific/Marquesas", "UTC-9:30 Marquesas Time (MART)"),
+    ("America/Anchorage", "UTC-9/-8 Alaska Time (AKST/AKDT)"),
+    ("Pacific/Gambier", "UTC-9 Gambier Time (GAMT)"),
+    ("America/Los_Angeles", "UTC-8/-7 Pacific Time (PST/PDT)"),
+    ("America/Denver", "UTC-7/-6 Mountain Time (MST/MDT)"),
+    ("America/Phoenix", "UTC-7 Mountain Standard Time (MST)"),
+    ("America/Chicago", "UTC-6/-5 Central Time (CST/CDT)"),
+    ("America/Mexico_City", "UTC-6 Mexico / Central America Time (CST)"),
+    ("America/New_York", "UTC-5/-4 Eastern Time (EST/EDT)"),
+    ("America/Bogota", "UTC-5 Colombia / Peru Time (COT/PET)"),
+    ("America/Halifax", "UTC-4/-3 Atlantic Time (AST/ADT)"),
+    ("America/Puerto_Rico", "UTC-4 Atlantic Standard Time (AST)"),
+    ("America/St_Johns", "UTC-3:30/-2:30 Newfoundland Time (NST/NDT)"),
+    ("America/Sao_Paulo", "UTC-3 Brasilia Time (BRT)"),
+    ("Atlantic/South_Georgia", "UTC-2 Mid-Atlantic Time (GST)"),
+    ("Atlantic/Azores", "UTC-1/+0 Azores Time (AZOT/AZOST)"),
+    ("Atlantic/Cape_Verde", "UTC-1 Cape Verde Time (CVT)"),
+    ("UTC", "UTC+0 Greenwich Mean Time (GMT/WET)"),
+    ("Europe/London", "UTC+0/+1 UK / Western European Time (GMT/BST/WET/WEST)"),
+    ("Europe/Berlin", "UTC+1/+2 Central European Time (CET/CEST)"),
+    ("Africa/Lagos", "UTC+1 West Africa Time (WAT)"),
+    ("Europe/Kyiv", "UTC+2/+3 Eastern European Time (EET/EEST)"),
+    ("Africa/Cairo", "UTC+2/+3 Egypt Time (EET/EEST)"),
+    ("Africa/Johannesburg", "UTC+2 South Africa Time (SAST)"),
+    ("Europe/Moscow", "UTC+3 Moscow Time (MSK)"),
+    ("Europe/Istanbul", "UTC+3 Turkey Time (TRT)"),
+    ("Africa/Nairobi", "UTC+3 East Africa Time (EAT)"),
+    ("Asia/Tehran", "UTC+3:30 Iran Time (IRST)"),
+    ("Asia/Dubai", "UTC+4 Gulf Time (GST)"),
+    ("Asia/Kabul", "UTC+4:30 Afghanistan Time (AFT)"),
+    ("Asia/Karachi", "UTC+5 Pakistan Time (PKT)"),
+    ("Asia/Kolkata", "UTC+5:30 India Time (IST)"),
+    ("Asia/Kathmandu", "UTC+5:45 Nepal Time (NPT)"),
+    ("Asia/Dhaka", "UTC+6 Bangladesh Time (BST)"),
+    ("Asia/Yangon", "UTC+6:30 Myanmar Time (MMT)"),
+    ("Asia/Bangkok", "UTC+7 Indochina Time (ICT)"),
+    ("Asia/Shanghai", "UTC+8 China / Western Australia Time (CST/AWST/PHT)"),
+    ("Australia/Eucla", "UTC+8:45 Central Western Australia Time (ACWST)"),
+    ("Asia/Tokyo", "UTC+9 Japan / Korea Time (JST/KST)"),
+    ("Australia/Darwin", "UTC+9:30 Australian Central Standard Time (ACST)"),
+    ("Australia/Adelaide", "UTC+9:30/+10:30 Australian Central Time (ACST/ACDT)"),
+    ("Australia/Sydney", "UTC+10/+11 Australian Eastern Time (AEST/AEDT)"),
+    ("Pacific/Port_Moresby", "UTC+10 Papua New Guinea Time (PGT)"),
+    ("Australia/Lord_Howe", "UTC+10:30/+11 Lord Howe Time (LHST/LHDT)"),
+    ("Pacific/Guadalcanal", "UTC+11 Solomon Islands Time (SBT)"),
+    ("Pacific/Norfolk", "UTC+11/+12 Norfolk Island Time (NFT/NFDT)"),
+    ("Pacific/Fiji", "UTC+12 Fiji Time (FJT)"),
+    ("Pacific/Auckland", "UTC+12/+13 New Zealand Time (NZST/NZDT)"),
+    ("Pacific/Chatham", "UTC+12:45/+13:45 Chatham Time (CHAST/CHADT)"),
+    ("Pacific/Tongatapu", "UTC+13 Tonga Time (TOT)"),
+    ("Pacific/Apia", "UTC+13 Samoa Time (WSST)"),
+    ("Pacific/Kiritimati", "UTC+14 Line Islands Time (LINT)"),
+)
+ALLOWED_TIMEZONES = tuple(timezone for timezone, _ in TIMEZONE_OPTIONS)
+ALLOWED_TIMEZONE_SET = frozenset(ALLOWED_TIMEZONES)
+TIMEZONE_METADATA: Dict[str, Dict[str, str]] = {
+    timezone: {"name": label} for timezone, label in TIMEZONE_OPTIONS
+}
+
+
+class BlockManifest(WorkflowBlockManifest):
+    model_config = ConfigDict(
+        json_schema_extra={
+            "name": "Current Time",
+            "version": "v1",
+            "short_description": SHORT_DESCRIPTION,
+            "long_description": LONG_DESCRIPTION,
+            "license": "Apache-2.0",
+            "block_type": "formatter",
+            "ui_manifest": {
+                "section": "advanced",
+                "icon": "far fa-clock",
+                "blockPriority": 10,
+            },
+        }
+    )
+    type: Literal["roboflow_core/current_time@v1"]
+    timezone: Union[Literal[ALLOWED_TIMEZONES], Selector(kind=[STRING_KIND])] = Field(  # type: ignore
+        default="UTC",
+        description="Curated IANA timezone name to report the current time in.",
+        examples=["UTC", "America/New_York", "Europe/Berlin", "$inputs.timezone"],
+        json_schema_extra={"values_metadata": TIMEZONE_METADATA},
+    )
+
+    @classmethod
+    def describe_outputs(cls) -> List[OutputDefinition]:
+        return [
+            OutputDefinition(name="timestamp", kind=[TIMESTAMP_KIND]),
+            OutputDefinition(name="iso_string", kind=[STRING_KIND]),
+            OutputDefinition(name="date", kind=[STRING_KIND]),
+            OutputDefinition(name="time", kind=[STRING_KIND]),
+        ]
+
+    @classmethod
+    def get_execution_engine_compatibility(cls) -> Optional[str]:
+        return ">=1.3.0,<2.0.0"
+
+
+class CurrentTimeBlockV1(WorkflowBlock):
+
+    @classmethod
+    def get_manifest(cls) -> Type[WorkflowBlockManifest]:
+        return BlockManifest
+
+    def run(self, timezone: str = "UTC") -> BlockResult:
+        if timezone not in ALLOWED_TIMEZONE_SET:
+            raise ValueError(
+                f"`roboflow_core/current_time@v1` received unsupported timezone '{timezone}'. "
+                "Provide one of the curated timezone options shown in the block dropdown."
+            )
+        try:
+            now = datetime.now(ZoneInfo(timezone))
+        except ZoneInfoNotFoundError as error:
+            raise ValueError(
+                f"`roboflow_core/current_time@v1` received unknown timezone '{timezone}'. "
+                "Provide one of the curated timezone options shown in the block dropdown."
+            ) from error
+        return {
+            "timestamp": now,
+            "iso_string": now.isoformat(),
+            "date": now.strftime("%Y-%m-%d"),
+            "time": now.strftime("%H:%M:%S"),
+        }
diff --git a/inference/core/workflows/core_steps/loader.py b/inference/core/workflows/core_steps/loader.py
index 3a24b417ff..10a5137819 100644
--- a/inference/core/workflows/core_steps/loader.py
+++ b/inference/core/workflows/core_steps/loader.py
@@ -153,6 +153,9 @@
     RateLimiterBlockV1,
 )
 from inference.core.workflows.core_steps.formatters.csv.v1 import CSVFormatterBlockV1
+from inference.core.workflows.core_steps.formatters.current_time.v1 import (
+    CurrentTimeBlockV1,
+)
 from inference.core.workflows.core_steps.formatters.expression.v1 import (
     ExpressionBlockV1,
 )
@@ -815,6 +818,7 @@ def load_blocks() -> List[Type[WorkflowBlock]]:
         DimensionCollapseBlockV1,
         DetectionsListRollUpBlockV1,
         FirstNonEmptyOrDefaultBlockV1,
+        CurrentTimeBlockV1,
         AnthropicClaudeBlockV1,
         AnthropicClaudeBlockV2,
         AnthropicClaudeBlockV3,
diff --git a/requirements/_requirements.txt b/requirements/_requirements.txt
index 2326b8e285..a1352697d5 100644
--- a/requirements/_requirements.txt
+++ b/requirements/_requirements.txt
@@ -4,9 +4,9 @@ APScheduler>=3.10.1,<4.0.0
 asyncua~=1.1.5
 cachetools<6.0.0
 cython~=3.0.0
-python-dotenv~=1.2.2
-fastapi>=0.133.0,<0.140  # 0.133+ dropped the starlette<1.0 cap; be careful with upper pin - fastapi might remove support for on_event
-starlette>=1.0.1  # CVE-2026-48710 (BadHost) — Host-header path-injection fix
+python-dotenv~=1.0.0
+tzdata>=2024.1
+fastapi>=0.100,<0.116  # be careful with upper pin - fastapi might remove support for on_event
 numpy>=2.0.0,<2.4.0
 opencv-python>=4.8.1.78,<=4.10.0.84
 opencv-contrib-python>=4.8.1.78,<=4.10.0.84  # Note: opencv-python considers this as a bad practice, but since our dependencies rely on both we pin both here
diff --git a/tests/workflows/integration_tests/execution/test_workflow_with_current_time.py b/tests/workflows/integration_tests/execution/test_workflow_with_current_time.py
new file mode 100644
index 0000000000..6f952e9068
--- /dev/null
+++ b/tests/workflows/integration_tests/execution/test_workflow_with_current_time.py
@@ -0,0 +1,60 @@
+import datetime
+
+from inference.core.env import WORKFLOWS_MAX_CONCURRENT_STEPS
+from inference.core.managers.base import ModelManager
+from inference.core.workflows.core_steps.common.entities import StepExecutionMode
+from inference.core.workflows.execution_engine.core import ExecutionEngine
+
+WORKFLOW_WITH_CURRENT_TIME = {
+    "version": "1.0",
+    "inputs": [
+        {
+            "type": "WorkflowParameter",
+            "name": "timezone",
+            "default_value": "UTC",
+        },
+    ],
+    "steps": [
+        {
+            "type": "roboflow_core/current_time@v1",
+            "name": "now",
+            "timezone": "$inputs.timezone",
+        },
+    ],
+    "outputs": [
+        {"type": "JsonField", "name": "timestamp", "selector": "$steps.now.timestamp"},
+        {"type": "JsonField", "name": "iso_string", "selector": "$steps.now.iso_string"},
+        {"type": "JsonField", "name": "date", "selector": "$steps.now.date"},
+        {"type": "JsonField", "name": "time", "selector": "$steps.now.time"},
+    ],
+}
+
+
+def test_current_time_workflow(model_manager: ModelManager) -> None:
+    # given
+    execution_engine = ExecutionEngine.init(
+        workflow_definition=WORKFLOW_WITH_CURRENT_TIME,
+        init_parameters={
+            "workflows_core.model_manager": model_manager,
+            "workflows_core.step_execution_mode": StepExecutionMode.LOCAL,
+        },
+        max_concurrent_steps=WORKFLOWS_MAX_CONCURRENT_STEPS,
+    )
+
+    # when
+    result = execution_engine.run(
+        runtime_parameters={"timezone": "America/New_York"}
+    )
+
+    # then
+    assert len(result) == 1, "Single image/parameter batch expected"
+    row = result[0]
+    assert set(row.keys()) == {"timestamp", "iso_string", "date", "time"}
+    assert isinstance(row["timestamp"], datetime.datetime)
+    assert row["timestamp"].tzinfo is not None, "Timestamp must be timezone-aware"
+    assert str(row["timestamp"].tzinfo) == "America/New_York"
+    # derived strings agree with the datetime object
+    assert row["iso_string"] == row["timestamp"].isoformat()
+    assert row["iso_string"].startswith(row["date"])
+    assert row["date"] == row["timestamp"].strftime("%Y-%m-%d")
+    assert row["time"] == row["timestamp"].strftime("%H:%M:%S")
diff --git a/tests/workflows/unit_tests/core_steps/formatters/test_current_time.py b/tests/workflows/unit_tests/core_steps/formatters/test_current_time.py
new file mode 100644
index 0000000000..00197ac994
--- /dev/null
+++ b/tests/workflows/unit_tests/core_steps/formatters/test_current_time.py
@@ -0,0 +1,183 @@
+from datetime import datetime
+from zoneinfo import ZoneInfo
+
+import pytest
+from pydantic import ValidationError
+
+from inference.core.workflows.core_steps.formatters.current_time.v1 import (
+    ALLOWED_TIMEZONES,
+    BlockManifest,
+    CurrentTimeBlockV1,
+    TIMEZONE_METADATA,
+)
+
+
+def test_manifest_parsing_when_data_is_valid() -> None:
+    # given
+    data = {
+        "type": "roboflow_core/current_time@v1",
+        "name": "now",
+        "timezone": "America/New_York",
+    }
+
+    # when
+    result = BlockManifest.model_validate(data)
+
+    # then
+    assert result.type == "roboflow_core/current_time@v1"
+    assert result.name == "now"
+    assert result.timezone == "America/New_York"
+
+
+def test_manifest_parsing_defaults_to_utc() -> None:
+    # given
+    data = {"type": "roboflow_core/current_time@v1", "name": "now"}
+
+    # when
+    result = BlockManifest.model_validate(data)
+
+    # then
+    assert result.timezone == "UTC"
+
+
+def test_manifest_accepts_selector_for_timezone() -> None:
+    # given
+    data = {
+        "type": "roboflow_core/current_time@v1",
+        "name": "now",
+        "timezone": "$inputs.timezone",
+    }
+
+    # when
+    result = BlockManifest.model_validate(data)
+
+    # then
+    assert result.timezone == "$inputs.timezone"
+
+
+def test_manifest_schema_exposes_timezone_dropdown_options() -> None:
+    # when
+    timezone_schema = BlockManifest.model_json_schema()["properties"]["timezone"]
+    timezone_enum = next(
+        item["enum"] for item in timezone_schema["anyOf"] if "enum" in item
+    )
+
+    # then
+    assert timezone_enum == list(ALLOWED_TIMEZONES)
+    assert len(timezone_enum) == 55
+    assert "UTC" in timezone_enum
+    assert "America/New_York" in timezone_enum
+    assert "America/Bogota" in timezone_enum
+    assert "Europe/Berlin" in timezone_enum
+    assert "Europe/London" in timezone_enum
+    assert "Africa/Lagos" in timezone_enum
+    assert "Australia/Darwin" in timezone_enum
+    assert "Australia/Adelaide" in timezone_enum
+    assert "Europe/Paris" not in timezone_enum
+
+
+def test_manifest_schema_exposes_timezone_dropdown_metadata() -> None:
+    # when
+    timezone_schema = BlockManifest.model_json_schema()["properties"]["timezone"]
+
+    # then
+    assert timezone_schema["values_metadata"] == TIMEZONE_METADATA
+    assert (
+        timezone_schema["values_metadata"]["America/New_York"]["name"]
+        == "UTC-5/-4 Eastern Time (EST/EDT)"
+    )
+    assert "description" not in timezone_schema["values_metadata"]["America/New_York"]
+    assert "-8" in timezone_schema["values_metadata"]["America/Los_Angeles"]["name"]
+    assert "PST" in timezone_schema["values_metadata"]["America/Los_Angeles"]["name"]
+    assert timezone_schema["values_metadata"]["Africa/Lagos"]["name"] == (
+        "UTC+1 West Africa Time (WAT)"
+    )
+    assert timezone_schema["values_metadata"]["Australia/Darwin"]["name"] == (
+        "UTC+9:30 Australian Central Standard Time (ACST)"
+    )
+    assert timezone_schema["values_metadata"]["Australia/Adelaide"]["name"] == (
+        "UTC+9:30/+10:30 Australian Central Time (ACST/ACDT)"
+    )
+
+
+def test_manifest_parsing_when_type_is_invalid() -> None:
+    # given
+    data = {"type": "roboflow_core/not_current_time@v1", "name": "now"}
+
+    # when / then
+    with pytest.raises(ValidationError):
+        BlockManifest.model_validate(data)
+
+
+def test_manifest_parsing_rejects_timezone_outside_curated_options() -> None:
+    # given
+    data = {
+        "type": "roboflow_core/current_time@v1",
+        "name": "now",
+        "timezone": "Europe/Paris",
+    }
+
+    # when / then
+    with pytest.raises(ValidationError):
+        BlockManifest.model_validate(data)
+
+
+def test_run_returns_consistent_timestamp_for_valid_timezone() -> None:
+    # given
+    block = CurrentTimeBlockV1()
+
+    # when
+    result = block.run(timezone="America/New_York")
+
+    # then
+    assert set(result.keys()) == {"timestamp", "iso_string", "date", "time"}
+    assert isinstance(result["timestamp"], datetime)
+    assert result["timestamp"].tzinfo is not None
+    # the derived strings agree with the datetime object
+    assert result["iso_string"] == result["timestamp"].isoformat()
+    assert result["date"] == result["timestamp"].strftime("%Y-%m-%d")
+    assert result["time"] == result["timestamp"].strftime("%H:%M:%S")
+
+
+def test_curated_options_distinguish_dst_and_fixed_timezone_rules() -> None:
+    # given
+    january = datetime(2026, 1, 15, 12)
+    july = datetime(2026, 7, 15, 12)
+
+    # when / then
+    assert january.replace(tzinfo=ZoneInfo("America/New_York")).utcoffset() != (
+        july.replace(tzinfo=ZoneInfo("America/New_York")).utcoffset()
+    )
+    assert january.replace(tzinfo=ZoneInfo("America/Bogota")).utcoffset() == (
+        july.replace(tzinfo=ZoneInfo("America/Bogota")).utcoffset()
+    )
+    assert january.replace(tzinfo=ZoneInfo("Europe/Berlin")).utcoffset() != (
+        july.replace(tzinfo=ZoneInfo("Europe/Berlin")).utcoffset()
+    )
+    assert january.replace(tzinfo=ZoneInfo("Africa/Lagos")).utcoffset() == (
+        july.replace(tzinfo=ZoneInfo("Africa/Lagos")).utcoffset()
+    )
+    assert january.replace(tzinfo=ZoneInfo("Australia/Darwin")).utcoffset() == (
+        july.replace(tzinfo=ZoneInfo("Australia/Darwin")).utcoffset()
+    )
+    assert january.replace(tzinfo=ZoneInfo("Australia/Adelaide")).utcoffset() != (
+        july.replace(tzinfo=ZoneInfo("Australia/Adelaide")).utcoffset()
+    )
+
+
+def test_run_raises_for_unknown_timezone() -> None:
+    # given
+    block = CurrentTimeBlockV1()
+
+    # when / then
+    with pytest.raises(ValueError):
+        block.run(timezone="Mars/Olympus_Mons")
+
+
+def test_run_raises_for_timezone_outside_curated_options() -> None:
+    # given
+    block = CurrentTimeBlockV1()
+
+    # when / then
+    with pytest.raises(ValueError):
+        block.run(timezone="Europe/Paris")

From 347785e6f5d4be5d920a61ab38fec6e0f5e58c55 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Pawe=C5=82=20P=C4=99czek?=
 <146137186+PawelPeczek-Roboflow@users.noreply.github.com>
Date: Thu, 4 Jun 2026 19:39:35 +0000
Subject: [PATCH 31/76] Update depenencies to fix main (#2415)

---
 inference_models/pyproject.toml            |   2 +-
 inference_models/uv.lock                   | 306 ++-------------------
 requirements/requirements.transformers.txt |   2 +-
 3 files changed, 29 insertions(+), 281 deletions(-)

diff --git a/inference_models/pyproject.toml b/inference_models/pyproject.toml
index c6e07b31c6..ea42d510cf 100644
--- a/inference_models/pyproject.toml
+++ b/inference_models/pyproject.toml
@@ -12,7 +12,7 @@ dependencies = [
   "requests>=2.33.0,<3.0.0",
   "supervision>=0.26.0",
   "backoff~=2.2.0",
-  "transformers~=5.5",
+  "transformers>=5.5.0,<=5.9.0",
   "timm>=1.0.0,<2.0.0",
   "accelerate>=1.0.0,<2.0.0",
   "einops>=0.7.0,<1.0.0",
diff --git a/inference_models/uv.lock b/inference_models/uv.lock
index c4e11ec010..3a49b3a0c3 100644
--- a/inference_models/uv.lock
+++ b/inference_models/uv.lock
@@ -1,5 +1,5 @@
 version = 1
-revision = 3
+revision = 2
 requires-python = ">=3.10, <3.13"
 resolution-markers = [
     "python_full_version >= '3.12' and platform_machine != 's390x' and sys_platform == 'darwin'",
@@ -139,7 +139,7 @@ name = "anyio"
 version = "4.12.1"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "exceptiongroup", marker = "python_full_version < '3.11' or (extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu118') or (extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu12') or (extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (extra == 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-cu12') or (extra == 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (extra == 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu118') or (extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu124') or (extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu126') or (extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu128') or (extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu130') or (extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu124') or (extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu126') or (extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu128') or (extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu130') or (extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu126') or (extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu128') or (extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu130') or (extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128') or (extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu130') or (extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (extra == 'extra-16-inference-models-torch-cu130' and extra == 'extra-16-inference-models-torch-jp6-cu126')" },
+    { name = "exceptiongroup", marker = "python_full_version < '3.11' or (extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu118') or (extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu12') or (extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (extra == 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-cu12') or (extra == 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (extra == 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-onnx-jp6-cu126')" },
     { name = "idna" },
     { name = "typing-extensions" },
 ]
@@ -390,7 +390,7 @@ name = "click"
 version = "8.3.1"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "colorama", marker = "sys_platform == 'win32' or (extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu118') or (extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu12') or (extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (extra == 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-cu12') or (extra == 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (extra == 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu118') or (extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu124') or (extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu126') or (extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu128') or (extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu130') or (extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu124') or (extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu126') or (extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu128') or (extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu130') or (extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu126') or (extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu128') or (extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu130') or (extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128') or (extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu130') or (extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (extra == 'extra-16-inference-models-torch-cu130' and extra == 'extra-16-inference-models-torch-jp6-cu126')" },
+    { name = "colorama", marker = "sys_platform == 'win32' or (extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu118') or (extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu12') or (extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (extra == 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-cu12') or (extra == 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (extra == 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-onnx-jp6-cu126')" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/3d/fa/656b739db8587d7b5dfa22e22ed02566950fbfbcdc20311993483657a5c0/click-8.3.1.tar.gz", hash = "sha256:12ff4785d337a1bb490bb7e9c2b1ee5da3112e94a8622f26a6c77f5d2fc6842a", size = 295065, upload-time = "2025-11-15T20:45:42.706Z" }
 wheels = [
@@ -411,7 +411,7 @@ name = "coloredlogs"
 version = "15.0.1"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "humanfriendly", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu118') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu124') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu126') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu128') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu124') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu126') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu128') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu126') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu128') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-torch-cu130' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform != 'linux' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu118') or (sys_platform != 'linux' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu124') or (sys_platform != 'linux' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu126') or (sys_platform != 'linux' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu128') or (sys_platform != 'linux' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform != 'linux' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform != 'linux' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu124') or (sys_platform != 'linux' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu126') or (sys_platform != 'linux' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu128') or (sys_platform != 'linux' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform != 'linux' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform != 'linux' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu126') or (sys_platform != 'linux' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu128') or (sys_platform != 'linux' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform != 'linux' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform != 'linux' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128') or (sys_platform != 'linux' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform != 'linux' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform != 'linux' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform != 'linux' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform != 'linux' and extra == 'extra-16-inference-models-torch-cu130' and extra == 'extra-16-inference-models-torch-jp6-cu126') or extra == 'extra-16-inference-models-onnx-cpu' or extra == 'extra-16-inference-models-onnx-cu118' or extra == 'extra-16-inference-models-onnx-cu12' or (extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu118') or (extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu124') or (extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu126') or (extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu128') or (extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu130') or (extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu124') or (extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu126') or (extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu128') or (extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu130') or (extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu126') or (extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu128') or (extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu130') or (extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128') or (extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu130') or (extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu130' and extra == 'extra-16-inference-models-torch-jp6-cu126')" },
+    { name = "humanfriendly", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu118') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu124') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu126') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu128') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu124') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu126') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu128') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu126') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu128') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-torch-cu130' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform != 'linux' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu118') or (sys_platform != 'linux' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu124') or (sys_platform != 'linux' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu126') or (sys_platform != 'linux' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu128') or (sys_platform != 'linux' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform != 'linux' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform != 'linux' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu124') or (sys_platform != 'linux' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu126') or (sys_platform != 'linux' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu128') or (sys_platform != 'linux' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform != 'linux' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform != 'linux' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu126') or (sys_platform != 'linux' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu128') or (sys_platform != 'linux' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform != 'linux' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform != 'linux' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128') or (sys_platform != 'linux' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform != 'linux' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform != 'linux' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform != 'linux' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform != 'linux' and extra == 'extra-16-inference-models-torch-cu130' and extra == 'extra-16-inference-models-torch-jp6-cu126')" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/cc/c7/eed8f27100517e8c0e6b923d5f0845d0cb99763da6fdee00478f91db7325/coloredlogs-15.0.1.tar.gz", hash = "sha256:7c991aa71a4577af2f82600d8f8f3a89f936baeaf9b50a9c197da014e5bf16b0", size = 278520, upload-time = "2021-06-11T10:22:45.202Z" }
 wheels = [
@@ -590,7 +590,7 @@ name = "exceptiongroup"
 version = "1.3.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "typing-extensions", marker = "python_full_version < '3.11' or (extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu118') or (extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu12') or (extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (extra == 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-cu12') or (extra == 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (extra == 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu118') or (extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu124') or (extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu126') or (extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu128') or (extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu130') or (extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu124') or (extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu126') or (extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu128') or (extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu130') or (extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu126') or (extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu128') or (extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu130') or (extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128') or (extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu130') or (extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (extra == 'extra-16-inference-models-torch-cu130' and extra == 'extra-16-inference-models-torch-jp6-cu126')" },
+    { name = "typing-extensions", marker = "python_full_version < '3.11' or (extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu118') or (extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu12') or (extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (extra == 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-cu12') or (extra == 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (extra == 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-onnx-jp6-cu126')" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/0b/9f/a65090624ecf468cdca03533906e7c69ed7588582240cfe7cc9e770b50eb/exceptiongroup-1.3.0.tar.gz", hash = "sha256:b241f5885f560bc56a59ee63ca4c6a8bfa46ae4ad651af316d4e81817bb9fd88", size = 29749, upload-time = "2025-05-10T17:42:51.123Z" }
 wheels = [
@@ -829,7 +829,7 @@ source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "filelock" },
     { name = "fsspec" },
-    { name = "hf-xet", marker = "platform_machine == 'AMD64' or platform_machine == 'aarch64' or platform_machine == 'amd64' or platform_machine == 'arm64' or platform_machine == 'x86_64' or (extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu118') or (extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu12') or (extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (extra == 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-cu12') or (extra == 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (extra == 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu118') or (extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu124') or (extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu126') or (extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu128') or (extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu130') or (extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu124') or (extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu126') or (extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu128') or (extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu130') or (extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu126') or (extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu128') or (extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu130') or (extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128') or (extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu130') or (extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (extra == 'extra-16-inference-models-torch-cu130' and extra == 'extra-16-inference-models-torch-jp6-cu126')" },
+    { name = "hf-xet", marker = "platform_machine == 'AMD64' or platform_machine == 'aarch64' or platform_machine == 'amd64' or platform_machine == 'arm64' or platform_machine == 'x86_64' or (extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu118') or (extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu12') or (extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (extra == 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-cu12') or (extra == 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (extra == 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-onnx-jp6-cu126')" },
     { name = "httpx" },
     { name = "packaging" },
     { name = "pyyaml" },
@@ -1184,7 +1184,7 @@ requires-dist = [
     { name = "torchvision", marker = "extra == 'torch-cu130'" },
     { name = "torchvision", marker = "extra == 'torch-jp6-cu126'", index = "https://pypi.jetson-ai-lab.io/jp6/cu126/+simple", conflict = { package = "inference-models", extra = "torch-jp6-cu126" } },
     { name = "tornado", marker = "extra == 'docs'", specifier = ">=6.5.5" },
-    { name = "transformers", specifier = "~=5.5" },
+    { name = "transformers", specifier = ">=5.5.0,<=5.9.0" },
     { name = "urllib3", specifier = ">=2.7.0,<3.0.0" },
 ]
 provides-extras = ["torch-cpu", "torch-cu118", "torch-cu124", "torch-cu126", "torch-cu128", "torch-cu130", "torch-jp6-cu126", "onnx-cpu", "onnx-cu118", "onnx-cu12", "onnx-jp6-cu126", "trt10", "test", "docs"]
@@ -1886,7 +1886,7 @@ resolution-markers = [
     "(python_full_version < '3.11' and platform_machine != 'aarch64' and platform_machine != 's390x' and sys_platform == 'linux') or (python_full_version < '3.11' and platform_machine != 's390x' and sys_platform != 'darwin' and sys_platform != 'linux')",
 ]
 dependencies = [
-    { name = "numpy", marker = "platform_machine != 's390x' or (extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu118') or (extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu12') or (extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (extra == 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-cu12') or (extra == 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (extra == 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu118') or (extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu124') or (extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu126') or (extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu128') or (extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu130') or (extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu124') or (extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu126') or (extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu128') or (extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu130') or (extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu126') or (extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu128') or (extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu130') or (extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128') or (extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu130') or (extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (extra == 'extra-16-inference-models-torch-cu130' and extra == 'extra-16-inference-models-torch-jp6-cu126')" },
+    { name = "numpy", marker = "platform_machine != 's390x' or (extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu118') or (extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu124') or (extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu126') or (extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu128') or (extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu130') or (extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu124') or (extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu126') or (extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu128') or (extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu130') or (extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu126') or (extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu128') or (extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu130') or (extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128') or (extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu130') or (extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (extra == 'extra-16-inference-models-torch-cu130' and extra == 'extra-16-inference-models-torch-jp6-cu126')" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/32/49/6e67c334872d2c114df3020e579f3718c333198f8312290e09ec0216703a/ml_dtypes-0.5.1.tar.gz", hash = "sha256:ac5b58559bb84a95848ed6984eb8013249f90b6bab62aa5acbad876e256002c9", size = 698772, upload-time = "2025-01-07T03:34:55.613Z" }
 wheels = [
@@ -1917,7 +1917,7 @@ resolution-markers = [
     "python_full_version < '3.11' and platform_machine == 's390x' and sys_platform != 'darwin'",
 ]
 dependencies = [
-    { name = "numpy", marker = "platform_machine == 's390x' or (extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu118') or (extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu12') or (extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (extra == 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-cu12') or (extra == 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (extra == 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu118') or (extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu124') or (extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu126') or (extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu128') or (extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu130') or (extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu124') or (extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu126') or (extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu128') or (extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu130') or (extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu126') or (extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu128') or (extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu130') or (extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128') or (extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu130') or (extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (extra == 'extra-16-inference-models-torch-cu130' and extra == 'extra-16-inference-models-torch-jp6-cu126')" },
+    { name = "numpy", marker = "platform_machine == 's390x' or (extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu118') or (extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu124') or (extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu126') or (extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu128') or (extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu130') or (extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu124') or (extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu126') or (extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu128') or (extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu130') or (extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu126') or (extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu128') or (extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu130') or (extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128') or (extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu130') or (extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (extra == 'extra-16-inference-models-torch-cu130' and extra == 'extra-16-inference-models-torch-jp6-cu126')" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/0e/4a/c27b42ed9b1c7d13d9ba8b6905dece787d6259152f2309338aed29b2447b/ml_dtypes-0.5.4.tar.gz", hash = "sha256:8ab06a50fb9bf9666dd0fe5dfb4676fa2b0ac0f31ecff72a6c3af8e22c063453", size = 692314, upload-time = "2025-11-17T22:32:31.031Z" }
 wheels = [
@@ -2158,21 +2158,6 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/6c/54/fbfa3315b936d3358517f7da5f9f2557c279bf210e5261f0cf66cc0f9832/nvidia_cublas_cu12-12.8.3.14-py3-none-win_amd64.whl", hash = "sha256:9ae5eae500aead01fc4bdfc458209df638b1a3551557ce11a78eea9ece602ae9", size = 578387959, upload-time = "2025-01-23T18:08:00.662Z" },
 ]
 
-[[package]]
-name = "nvidia-cublas-cu12"
-version = "12.8.4.1"
-source = { registry = "https://pypi.org/simple" }
-resolution-markers = [
-    "(python_full_version >= '3.12' and platform_machine != 'aarch64' and platform_machine != 's390x' and sys_platform == 'linux') or (python_full_version >= '3.12' and platform_machine != 's390x' and sys_platform != 'darwin' and sys_platform != 'linux')",
-    "(python_full_version == '3.11.*' and platform_machine != 'aarch64' and platform_machine != 's390x' and sys_platform == 'linux') or (python_full_version == '3.11.*' and platform_machine != 's390x' and sys_platform != 'darwin' and sys_platform != 'linux')",
-    "(python_full_version < '3.11' and platform_machine != 'aarch64' and platform_machine != 's390x' and sys_platform == 'linux') or (python_full_version < '3.11' and platform_machine != 's390x' and sys_platform != 'darwin' and sys_platform != 'linux')",
-]
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/29/99/db44d685f0e257ff0e213ade1964fc459b4a690a73293220e98feb3307cf/nvidia_cublas_cu12-12.8.4.1-py3-none-manylinux_2_27_aarch64.whl", hash = "sha256:b86f6dd8935884615a0683b663891d43781b819ac4f2ba2b0c9604676af346d0", size = 590537124, upload-time = "2025-03-07T01:43:53.556Z" },
-    { url = "https://files.pythonhosted.org/packages/dc/61/e24b560ab2e2eaeb3c839129175fb330dfcfc29e5203196e5541a4c44682/nvidia_cublas_cu12-12.8.4.1-py3-none-manylinux_2_27_x86_64.whl", hash = "sha256:8ac4e771d5a348c551b2a426eda6193c19aa630236b418086020df5ba9667142", size = 594346921, upload-time = "2025-03-07T01:44:31.254Z" },
-    { url = "https://files.pythonhosted.org/packages/70/61/7d7b3c70186fb651d0fbd35b01dbfc8e755f69fd58f817f3d0f642df20c3/nvidia_cublas_cu12-12.8.4.1-py3-none-win_amd64.whl", hash = "sha256:47e9b82132fa8d2b4944e708049229601448aaad7e6f296f630f2d1a32de35af", size = 567544208, upload-time = "2025-03-07T01:53:30.535Z" },
-]
-
 [[package]]
 name = "nvidia-cuda-cupti-cu11"
 version = "11.8.87"
@@ -2214,21 +2199,6 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/3f/2a/cabe033045427beb042b70b394ac3fd7cfefe157c965268824011b16af67/nvidia_cuda_cupti_cu12-12.8.57-py3-none-win_amd64.whl", hash = "sha256:bbed719c52a476958a74cfc42f2b95a3fd6b3fd94eb40134acc4601feb4acac3", size = 7002337, upload-time = "2025-01-23T18:04:35.34Z" },
 ]
 
-[[package]]
-name = "nvidia-cuda-cupti-cu12"
-version = "12.8.90"
-source = { registry = "https://pypi.org/simple" }
-resolution-markers = [
-    "(python_full_version >= '3.12' and platform_machine != 'aarch64' and platform_machine != 's390x' and sys_platform == 'linux') or (python_full_version >= '3.12' and platform_machine != 's390x' and sys_platform != 'darwin' and sys_platform != 'linux')",
-    "(python_full_version == '3.11.*' and platform_machine != 'aarch64' and platform_machine != 's390x' and sys_platform == 'linux') or (python_full_version == '3.11.*' and platform_machine != 's390x' and sys_platform != 'darwin' and sys_platform != 'linux')",
-    "(python_full_version < '3.11' and platform_machine != 'aarch64' and platform_machine != 's390x' and sys_platform == 'linux') or (python_full_version < '3.11' and platform_machine != 's390x' and sys_platform != 'darwin' and sys_platform != 'linux')",
-]
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/d5/1f/b3bd73445e5cb342727fd24fe1f7b748f690b460acadc27ea22f904502c8/nvidia_cuda_cupti_cu12-12.8.90-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:4412396548808ddfed3f17a467b104ba7751e6b58678a4b840675c56d21cf7ed", size = 9533318, upload-time = "2025-03-07T01:40:10.421Z" },
-    { url = "https://files.pythonhosted.org/packages/f8/02/2adcaa145158bf1a8295d83591d22e4103dbfd821bcaf6f3f53151ca4ffa/nvidia_cuda_cupti_cu12-12.8.90-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:ea0cb07ebda26bb9b29ba82cda34849e73c166c18162d3913575b0c9db9a6182", size = 10248621, upload-time = "2025-03-07T01:40:21.213Z" },
-    { url = "https://files.pythonhosted.org/packages/41/bc/83f5426095d93694ae39fe1311431b5d5a9bb82e48bf0dd8e19be2765942/nvidia_cuda_cupti_cu12-12.8.90-py3-none-win_amd64.whl", hash = "sha256:bb479dcdf7e6d4f8b0b01b115260399bf34154a1a2e9fe11c85c517d87efd98e", size = 7015759, upload-time = "2025-03-07T01:51:11.355Z" },
-]
-
 [[package]]
 name = "nvidia-cuda-nvrtc-cu11"
 version = "11.8.89"
@@ -2270,21 +2240,6 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/f8/5b/052d05aa068e4752415ad03bac58e852ea8bc17c9321e08546b3f261e47e/nvidia_cuda_nvrtc_cu12-12.8.61-py3-none-win_amd64.whl", hash = "sha256:9c8887bf5e5dffc441018ba8c5dc59952372a6f4806819e8c1f03d62637dbeea", size = 73567440, upload-time = "2025-01-23T18:05:51.036Z" },
 ]
 
-[[package]]
-name = "nvidia-cuda-nvrtc-cu12"
-version = "12.8.93"
-source = { registry = "https://pypi.org/simple" }
-resolution-markers = [
-    "(python_full_version >= '3.12' and platform_machine != 'aarch64' and platform_machine != 's390x' and sys_platform == 'linux') or (python_full_version >= '3.12' and platform_machine != 's390x' and sys_platform != 'darwin' and sys_platform != 'linux')",
-    "(python_full_version == '3.11.*' and platform_machine != 'aarch64' and platform_machine != 's390x' and sys_platform == 'linux') or (python_full_version == '3.11.*' and platform_machine != 's390x' and sys_platform != 'darwin' and sys_platform != 'linux')",
-    "(python_full_version < '3.11' and platform_machine != 'aarch64' and platform_machine != 's390x' and sys_platform == 'linux') or (python_full_version < '3.11' and platform_machine != 's390x' and sys_platform != 'darwin' and sys_platform != 'linux')",
-]
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/05/6b/32f747947df2da6994e999492ab306a903659555dddc0fbdeb9d71f75e52/nvidia_cuda_nvrtc_cu12-12.8.93-py3-none-manylinux2010_x86_64.manylinux_2_12_x86_64.whl", hash = "sha256:a7756528852ef889772a84c6cd89d41dfa74667e24cca16bb31f8f061e3e9994", size = 88040029, upload-time = "2025-03-07T01:42:13.562Z" },
-    { url = "https://files.pythonhosted.org/packages/eb/d1/e50d0acaab360482034b84b6e27ee83c6738f7d32182b987f9c7a4e32962/nvidia_cuda_nvrtc_cu12-12.8.93-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:fc1fec1e1637854b4c0a65fb9a8346b51dd9ee69e61ebaccc82058441f15bce8", size = 43106076, upload-time = "2025-03-07T01:41:59.817Z" },
-    { url = "https://files.pythonhosted.org/packages/45/51/52a3d84baa2136cc8df15500ad731d74d3a1114d4c123e043cb608d4a32b/nvidia_cuda_nvrtc_cu12-12.8.93-py3-none-win_amd64.whl", hash = "sha256:7a4b6b2904850fe78e0bd179c4b655c404d4bb799ef03ddc60804247099ae909", size = 73586838, upload-time = "2025-03-07T01:52:13.483Z" },
-]
-
 [[package]]
 name = "nvidia-cuda-runtime-cu11"
 version = "11.8.89"
@@ -2326,21 +2281,6 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/16/ee/52508c74bee2a3de8d59c6fd9af4ca2f216052fa2bc916da3a6a7bb998af/nvidia_cuda_runtime_cu12-12.8.57-py3-none-win_amd64.whl", hash = "sha256:89be637e3ee967323865b85e0f147d75f9a5bd98360befa37481b02dd57af8f5", size = 944309, upload-time = "2025-01-23T18:04:23.143Z" },
 ]
 
-[[package]]
-name = "nvidia-cuda-runtime-cu12"
-version = "12.8.90"
-source = { registry = "https://pypi.org/simple" }
-resolution-markers = [
-    "(python_full_version >= '3.12' and platform_machine != 'aarch64' and platform_machine != 's390x' and sys_platform == 'linux') or (python_full_version >= '3.12' and platform_machine != 's390x' and sys_platform != 'darwin' and sys_platform != 'linux')",
-    "(python_full_version == '3.11.*' and platform_machine != 'aarch64' and platform_machine != 's390x' and sys_platform == 'linux') or (python_full_version == '3.11.*' and platform_machine != 's390x' and sys_platform != 'darwin' and sys_platform != 'linux')",
-    "(python_full_version < '3.11' and platform_machine != 'aarch64' and platform_machine != 's390x' and sys_platform == 'linux') or (python_full_version < '3.11' and platform_machine != 's390x' and sys_platform != 'darwin' and sys_platform != 'linux')",
-]
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/7c/75/f865a3b236e4647605ea34cc450900854ba123834a5f1598e160b9530c3a/nvidia_cuda_runtime_cu12-12.8.90-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:52bf7bbee900262ffefe5e9d5a2a69a30d97e2bc5bb6cc866688caa976966e3d", size = 965265, upload-time = "2025-03-07T01:39:43.533Z" },
-    { url = "https://files.pythonhosted.org/packages/0d/9b/a997b638fcd068ad6e4d53b8551a7d30fe8b404d6f1804abf1df69838932/nvidia_cuda_runtime_cu12-12.8.90-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:adade8dcbd0edf427b7204d480d6066d33902cab2a4707dcfc48a2d0fd44ab90", size = 954765, upload-time = "2025-03-07T01:40:01.615Z" },
-    { url = "https://files.pythonhosted.org/packages/30/a5/a515b7600ad361ea14bfa13fb4d6687abf500adc270f19e89849c0590492/nvidia_cuda_runtime_cu12-12.8.90-py3-none-win_amd64.whl", hash = "sha256:c0c6027f01505bfed6c3b21ec546f69c687689aad5f1a377554bc6ca4aa993a8", size = 944318, upload-time = "2025-03-07T01:51:01.794Z" },
-]
-
 [[package]]
 name = "nvidia-cuda-runtime-cu12"
 version = "12.9.79"
@@ -2409,24 +2349,6 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/d0/ea/636cda41b3865caa0d43c34f558167304acde3d2c5f6c54c00a550e69ecd/nvidia_cudnn_cu12-9.7.1.26-py3-none-win_amd64.whl", hash = "sha256:7b805b9a4cf9f3da7c5f4ea4a9dff7baf62d1a612d6154a7e0d2ea51ed296241", size = 715962100, upload-time = "2025-02-06T22:21:32.431Z" },
 ]
 
-[[package]]
-name = "nvidia-cudnn-cu12"
-version = "9.10.2.21"
-source = { registry = "https://pypi.org/simple" }
-resolution-markers = [
-    "(python_full_version >= '3.12' and platform_machine != 'aarch64' and platform_machine != 's390x' and sys_platform == 'linux') or (python_full_version >= '3.12' and platform_machine != 's390x' and sys_platform != 'darwin' and sys_platform != 'linux')",
-    "(python_full_version == '3.11.*' and platform_machine != 'aarch64' and platform_machine != 's390x' and sys_platform == 'linux') or (python_full_version == '3.11.*' and platform_machine != 's390x' and sys_platform != 'darwin' and sys_platform != 'linux')",
-    "(python_full_version < '3.11' and platform_machine != 'aarch64' and platform_machine != 's390x' and sys_platform == 'linux') or (python_full_version < '3.11' and platform_machine != 's390x' and sys_platform != 'darwin' and sys_platform != 'linux')",
-]
-dependencies = [
-    { name = "nvidia-cublas-cu12", version = "12.8.4.1", source = { registry = "https://pypi.org/simple" }, marker = "(platform_machine != 'aarch64' and platform_machine != 's390x' and sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu126') or (platform_machine != 'aarch64' and platform_machine != 's390x' and sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra != 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu126') or (platform_machine != 'aarch64' and platform_machine != 's390x' and sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra != 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine != 'aarch64' and platform_machine != 's390x' and sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra != 'extra-16-inference-models-torch-cu124' and extra != 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine != 'aarch64' and platform_machine != 's390x' and sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra != 'extra-16-inference-models-torch-cu124' and extra != 'extra-16-inference-models-torch-cu128' and extra != 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine != 'aarch64' and platform_machine != 's390x' and sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra != 'extra-16-inference-models-torch-cu124' and extra != 'extra-16-inference-models-torch-cu128' and extra != 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine != 's390x' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu126') or (platform_machine != 's390x' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra != 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu126') or (platform_machine != 's390x' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra != 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine != 's390x' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra != 'extra-16-inference-models-torch-cu124' and extra != 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine != 's390x' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra != 'extra-16-inference-models-torch-cu124' and extra != 'extra-16-inference-models-torch-cu128' and extra != 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine != 's390x' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra != 'extra-16-inference-models-torch-cu124' and extra != 'extra-16-inference-models-torch-cu128' and extra != 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu118') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu12') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-cu12') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu118') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu124') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu126') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu128') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu124') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu126') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu128') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu126') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu128') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine != 's390x' and sys_platform != 'linux' and extra != 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine != 's390x' and sys_platform != 'linux' and extra != 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine == 'aarch64' and sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128') or (platform_machine == 'aarch64' and sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine == 'aarch64' and sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine == 'aarch64' and sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra != 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128') or (platform_machine == 'aarch64' and sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra != 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine == 'aarch64' and sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra != 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine == 'aarch64' and sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra != 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine == 'aarch64' and sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra != 'extra-16-inference-models-torch-cu124' and extra != 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu118') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu12') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-cu12') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu118') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu124') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu126') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu128') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu124') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu126') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu128') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu126') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu128') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu118') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu12') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-cu12') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu118') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu124') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu126') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu128') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu124') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu126') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu128') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu126') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu128') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cu130' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine != 'aarch64' and extra != 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine != 'aarch64' and extra != 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'darwin' and extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu118') or (sys_platform == 'darwin' and extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu12') or (sys_platform == 'darwin' and extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (sys_platform == 'darwin' and extra == 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-cu12') or (sys_platform == 'darwin' and extra == 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (sys_platform == 'darwin' and extra == 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu118') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu12') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-cu12') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu118') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu124') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu126') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu128') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu124') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu126') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu128') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu126') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu128') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu118') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu124') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu128') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu124') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu128') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu128') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu130' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'linux' and extra != 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform == 'linux' and extra != 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu118') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu124') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu128') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu124') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu128') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu128') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu130' and extra == 'extra-16-inference-models-torch-jp6-cu126')" },
-]
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/fa/41/e79269ce215c857c935fd86bcfe91a451a584dfc27f1e068f568b9ad1ab7/nvidia_cudnn_cu12-9.10.2.21-py3-none-manylinux_2_27_aarch64.whl", hash = "sha256:c9132cc3f8958447b4910a1720036d9eff5928cc3179b0a51fb6d167c6cc87d8", size = 705026878, upload-time = "2025-06-06T21:52:51.348Z" },
-    { url = "https://files.pythonhosted.org/packages/ba/51/e123d997aa098c61d029f76663dedbfb9bc8dcf8c60cbd6adbe42f76d049/nvidia_cudnn_cu12-9.10.2.21-py3-none-manylinux_2_27_x86_64.whl", hash = "sha256:949452be657fa16687d0930933f032835951ef0892b37d2d53824d1a84dc97a8", size = 706758467, upload-time = "2025-06-06T21:54:08.597Z" },
-    { url = "https://files.pythonhosted.org/packages/3d/90/0bd6e586701b3a890fd38aa71c387dab4883d619d6e5ad912ccbd05bfd67/nvidia_cudnn_cu12-9.10.2.21-py3-none-win_amd64.whl", hash = "sha256:c6288de7d63e6cf62988f0923f96dc339cea362decb1bf5b3141883392a7d65e", size = 692992268, upload-time = "2025-06-06T21:55:18.114Z" },
-]
-
 [[package]]
 name = "nvidia-cufft-cu11"
 version = "10.9.0.58"
@@ -2474,52 +2396,15 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/32/f3/f6248aa119c2726b1bdd02d472332cae274133bd32ca5fa8822efb0c308c/nvidia_cufft_cu12-11.3.3.41-py3-none-win_amd64.whl", hash = "sha256:f9760612886786601d27a0993bb29ce1f757e6b8b173499d0ecfa850d31b50f8", size = 192216738, upload-time = "2025-01-23T18:08:51.102Z" },
 ]
 
-[[package]]
-name = "nvidia-cufft-cu12"
-version = "11.3.3.83"
-source = { registry = "https://pypi.org/simple" }
-resolution-markers = [
-    "(python_full_version >= '3.12' and platform_machine != 'aarch64' and platform_machine != 's390x' and sys_platform == 'linux') or (python_full_version >= '3.12' and platform_machine != 's390x' and sys_platform != 'darwin' and sys_platform != 'linux')",
-    "(python_full_version == '3.11.*' and platform_machine != 'aarch64' and platform_machine != 's390x' and sys_platform == 'linux') or (python_full_version == '3.11.*' and platform_machine != 's390x' and sys_platform != 'darwin' and sys_platform != 'linux')",
-    "(python_full_version < '3.11' and platform_machine != 'aarch64' and platform_machine != 's390x' and sys_platform == 'linux') or (python_full_version < '3.11' and platform_machine != 's390x' and sys_platform != 'darwin' and sys_platform != 'linux')",
-]
-dependencies = [
-    { name = "nvidia-nvjitlink-cu12", version = "12.8.93", source = { registry = "https://pypi.org/simple" }, marker = "(platform_machine != 'aarch64' and platform_machine != 's390x' and sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu126') or (platform_machine != 'aarch64' and platform_machine != 's390x' and sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra != 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu126') or (platform_machine != 'aarch64' and platform_machine != 's390x' and sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra != 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine != 'aarch64' and platform_machine != 's390x' and sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra != 'extra-16-inference-models-torch-cu124' and extra != 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine != 'aarch64' and platform_machine != 's390x' and sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra != 'extra-16-inference-models-torch-cu124' and extra != 'extra-16-inference-models-torch-cu128' and extra != 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine != 'aarch64' and platform_machine != 's390x' and sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra != 'extra-16-inference-models-torch-cu124' and extra != 'extra-16-inference-models-torch-cu128' and extra != 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine != 's390x' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu126') or (platform_machine != 's390x' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra != 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu126') or (platform_machine != 's390x' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra != 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine != 's390x' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra != 'extra-16-inference-models-torch-cu124' and extra != 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine != 's390x' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra != 'extra-16-inference-models-torch-cu124' and extra != 'extra-16-inference-models-torch-cu128' and extra != 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine != 's390x' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra != 'extra-16-inference-models-torch-cu124' and extra != 'extra-16-inference-models-torch-cu128' and extra != 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu118') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu12') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-cu12') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu118') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu124') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu126') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu128') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu124') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu126') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu128') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu126') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu128') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine != 's390x' and sys_platform != 'linux' and extra != 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine != 's390x' and sys_platform != 'linux' and extra != 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine == 'aarch64' and sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128') or (platform_machine == 'aarch64' and sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine == 'aarch64' and sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine == 'aarch64' and sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra != 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128') or (platform_machine == 'aarch64' and sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra != 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine == 'aarch64' and sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra != 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine == 'aarch64' and sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra != 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine == 'aarch64' and sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra != 'extra-16-inference-models-torch-cu124' and extra != 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu118') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu12') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-cu12') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu118') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu124') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu126') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu128') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu124') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu126') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu128') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu126') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu128') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu118') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu12') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-cu12') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu118') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu124') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu126') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu128') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu124') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu126') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu128') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu126') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu128') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cu130' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine != 'aarch64' and extra != 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine != 'aarch64' and extra != 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'darwin' and extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu118') or (sys_platform == 'darwin' and extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu12') or (sys_platform == 'darwin' and extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (sys_platform == 'darwin' and extra == 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-cu12') or (sys_platform == 'darwin' and extra == 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (sys_platform == 'darwin' and extra == 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu118') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu12') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-cu12') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu118') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu124') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu126') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu128') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu124') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu126') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu128') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu126') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu128') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu118') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu124') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu128') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu124') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu128') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu128') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu130' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'linux' and extra != 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform == 'linux' and extra != 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu118') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu124') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu128') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu124') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu128') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu128') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu130' and extra == 'extra-16-inference-models-torch-jp6-cu126')" },
-]
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/60/bc/7771846d3a0272026c416fbb7e5f4c1f146d6d80704534d0b187dd6f4800/nvidia_cufft_cu12-11.3.3.83-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:848ef7224d6305cdb2a4df928759dca7b1201874787083b6e7550dd6765ce69a", size = 193109211, upload-time = "2025-03-07T01:44:56.873Z" },
-    { url = "https://files.pythonhosted.org/packages/1f/13/ee4e00f30e676b66ae65b4f08cb5bcbb8392c03f54f2d5413ea99a5d1c80/nvidia_cufft_cu12-11.3.3.83-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:4d2dd21ec0b88cf61b62e6b43564355e5222e4a3fb394cac0db101f2dd0d4f74", size = 193118695, upload-time = "2025-03-07T01:45:27.821Z" },
-    { url = "https://files.pythonhosted.org/packages/7d/ec/ce1629f1e478bb5ccd208986b5f9e0316a78538dd6ab1d0484f012f8e2a1/nvidia_cufft_cu12-11.3.3.83-py3-none-win_amd64.whl", hash = "sha256:7a64a98ef2a7c47f905aaf8931b69a3a43f27c55530c698bb2ed7c75c0b42cb7", size = 192216559, upload-time = "2025-03-07T01:53:57.106Z" },
-]
-
 [[package]]
 name = "nvidia-cufile-cu12"
 version = "1.13.0.11"
 source = { registry = "https://pypi.org/simple" }
-resolution-markers = [
-    "(python_full_version >= '3.12' and platform_machine != 'aarch64' and platform_machine != 's390x' and sys_platform == 'linux') or (python_full_version >= '3.12' and platform_machine != 's390x' and sys_platform != 'darwin' and sys_platform != 'linux')",
-    "(python_full_version == '3.11.*' and platform_machine != 'aarch64' and platform_machine != 's390x' and sys_platform == 'linux') or (python_full_version == '3.11.*' and platform_machine != 's390x' and sys_platform != 'darwin' and sys_platform != 'linux')",
-    "(python_full_version < '3.11' and platform_machine != 'aarch64' and platform_machine != 's390x' and sys_platform == 'linux') or (python_full_version < '3.11' and platform_machine != 's390x' and sys_platform != 'darwin' and sys_platform != 'linux')",
-]
 wheels = [
     { url = "https://files.pythonhosted.org/packages/e5/9c/1f3264d0a84c8a031487fb7f59780fc78fa6f1c97776233956780e3dc3ac/nvidia_cufile_cu12-1.13.0.11-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:483f434c541806936b98366f6d33caef5440572de8ddf38d453213729da3e7d4", size = 1197801, upload-time = "2025-01-23T17:57:07.247Z" },
     { url = "https://files.pythonhosted.org/packages/35/80/f6a0fc90ab6fa4ac916f3643e5b620fd19724626c59ae83b74f5efef0349/nvidia_cufile_cu12-1.13.0.11-py3-none-manylinux_2_27_aarch64.whl", hash = "sha256:2acbee65dc2eaf58331f0798c5e6bcdd790c4acb26347530297e63528c9eba5d", size = 1120660, upload-time = "2025-01-23T17:56:56.608Z" },
 ]
 
-[[package]]
-name = "nvidia-cufile-cu12"
-version = "1.13.1.3"
-source = { registry = "https://pypi.org/simple" }
-resolution-markers = [
-    "(python_full_version >= '3.12' and platform_machine != 'aarch64' and platform_machine != 's390x' and sys_platform == 'linux') or (python_full_version >= '3.12' and platform_machine != 's390x' and sys_platform != 'darwin' and sys_platform != 'linux')",
-    "(python_full_version == '3.11.*' and platform_machine != 'aarch64' and platform_machine != 's390x' and sys_platform == 'linux') or (python_full_version == '3.11.*' and platform_machine != 's390x' and sys_platform != 'darwin' and sys_platform != 'linux')",
-    "(python_full_version < '3.11' and platform_machine != 'aarch64' and platform_machine != 's390x' and sys_platform == 'linux') or (python_full_version < '3.11' and platform_machine != 's390x' and sys_platform != 'darwin' and sys_platform != 'linux')",
-]
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/bb/fe/1bcba1dfbfb8d01be8d93f07bfc502c93fa23afa6fd5ab3fc7c1df71038a/nvidia_cufile_cu12-1.13.1.3-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:1d069003be650e131b21c932ec3d8969c1715379251f8d23a1860554b1cb24fc", size = 1197834, upload-time = "2025-03-07T01:45:50.723Z" },
-    { url = "https://files.pythonhosted.org/packages/1e/f5/5607710447a6fe9fd9b3283956fceeee8a06cda1d2f56ce31371f595db2a/nvidia_cufile_cu12-1.13.1.3-py3-none-manylinux_2_27_aarch64.whl", hash = "sha256:4beb6d4cce47c1a0f1013d72e02b0994730359e17801d395bdcbf20cfb3bb00a", size = 1120705, upload-time = "2025-03-07T01:45:41.434Z" },
-]
-
 [[package]]
 name = "nvidia-curand-cu11"
 version = "10.3.0.86"
@@ -2561,21 +2446,6 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/d6/f0/91252f3cffe3f3c233a8e17262c21b41534652edfe783c1e58ea1c92c115/nvidia_curand_cu12-10.3.9.55-py3-none-win_amd64.whl", hash = "sha256:570d82475fe7f3d8ed01ffbe3b71796301e0e24c98762ca018ff8ce4f5418e1f", size = 62761446, upload-time = "2025-01-23T18:09:21.663Z" },
 ]
 
-[[package]]
-name = "nvidia-curand-cu12"
-version = "10.3.9.90"
-source = { registry = "https://pypi.org/simple" }
-resolution-markers = [
-    "(python_full_version >= '3.12' and platform_machine != 'aarch64' and platform_machine != 's390x' and sys_platform == 'linux') or (python_full_version >= '3.12' and platform_machine != 's390x' and sys_platform != 'darwin' and sys_platform != 'linux')",
-    "(python_full_version == '3.11.*' and platform_machine != 'aarch64' and platform_machine != 's390x' and sys_platform == 'linux') or (python_full_version == '3.11.*' and platform_machine != 's390x' and sys_platform != 'darwin' and sys_platform != 'linux')",
-    "(python_full_version < '3.11' and platform_machine != 'aarch64' and platform_machine != 's390x' and sys_platform == 'linux') or (python_full_version < '3.11' and platform_machine != 's390x' and sys_platform != 'darwin' and sys_platform != 'linux')",
-]
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/45/5e/92aa15eca622a388b80fbf8375d4760738df6285b1e92c43d37390a33a9a/nvidia_curand_cu12-10.3.9.90-py3-none-manylinux_2_27_aarch64.whl", hash = "sha256:dfab99248034673b779bc6decafdc3404a8a6f502462201f2f31f11354204acd", size = 63625754, upload-time = "2025-03-07T01:46:10.735Z" },
-    { url = "https://files.pythonhosted.org/packages/fb/aa/6584b56dc84ebe9cf93226a5cde4d99080c8e90ab40f0c27bda7a0f29aa1/nvidia_curand_cu12-10.3.9.90-py3-none-manylinux_2_27_x86_64.whl", hash = "sha256:b32331d4f4df5d6eefa0554c565b626c7216f87a06a4f56fab27c3b68a830ec9", size = 63619976, upload-time = "2025-03-07T01:46:23.323Z" },
-    { url = "https://files.pythonhosted.org/packages/b9/75/70c05b2f3ed5be3bb30b7102b6eb78e100da4bbf6944fd6725c012831cab/nvidia_curand_cu12-10.3.9.90-py3-none-win_amd64.whl", hash = "sha256:f149a8ca457277da854f89cf282d6ef43176861926c7ac85b2a0fbd237c587ec", size = 62765309, upload-time = "2025-03-07T01:54:20.478Z" },
-]
-
 [[package]]
 name = "nvidia-cusolver-cu11"
 version = "11.4.1.48"
@@ -2630,26 +2500,6 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/c4/f9/e0e6f8b7aecd13e0f9e937d116fb3211329a0a92b9bea9624b1368de307a/nvidia_cusolver_cu12-11.7.2.55-py3-none-win_amd64.whl", hash = "sha256:a5a516c55da5c5aba98420d9bc9bcab18245f21ec87338cc1f930eb18dd411ac", size = 249600787, upload-time = "2025-01-23T18:10:07.641Z" },
 ]
 
-[[package]]
-name = "nvidia-cusolver-cu12"
-version = "11.7.3.90"
-source = { registry = "https://pypi.org/simple" }
-resolution-markers = [
-    "(python_full_version >= '3.12' and platform_machine != 'aarch64' and platform_machine != 's390x' and sys_platform == 'linux') or (python_full_version >= '3.12' and platform_machine != 's390x' and sys_platform != 'darwin' and sys_platform != 'linux')",
-    "(python_full_version == '3.11.*' and platform_machine != 'aarch64' and platform_machine != 's390x' and sys_platform == 'linux') or (python_full_version == '3.11.*' and platform_machine != 's390x' and sys_platform != 'darwin' and sys_platform != 'linux')",
-    "(python_full_version < '3.11' and platform_machine != 'aarch64' and platform_machine != 's390x' and sys_platform == 'linux') or (python_full_version < '3.11' and platform_machine != 's390x' and sys_platform != 'darwin' and sys_platform != 'linux')",
-]
-dependencies = [
-    { name = "nvidia-cublas-cu12", version = "12.8.4.1", source = { registry = "https://pypi.org/simple" }, marker = "(platform_machine != 'aarch64' and platform_machine != 's390x' and sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu126') or (platform_machine != 'aarch64' and platform_machine != 's390x' and sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra != 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu126') or (platform_machine != 'aarch64' and platform_machine != 's390x' and sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra != 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine != 'aarch64' and platform_machine != 's390x' and sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra != 'extra-16-inference-models-torch-cu124' and extra != 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine != 'aarch64' and platform_machine != 's390x' and sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra != 'extra-16-inference-models-torch-cu124' and extra != 'extra-16-inference-models-torch-cu128' and extra != 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine != 'aarch64' and platform_machine != 's390x' and sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra != 'extra-16-inference-models-torch-cu124' and extra != 'extra-16-inference-models-torch-cu128' and extra != 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine != 's390x' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu126') or (platform_machine != 's390x' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra != 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu126') or (platform_machine != 's390x' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra != 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine != 's390x' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra != 'extra-16-inference-models-torch-cu124' and extra != 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine != 's390x' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra != 'extra-16-inference-models-torch-cu124' and extra != 'extra-16-inference-models-torch-cu128' and extra != 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine != 's390x' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra != 'extra-16-inference-models-torch-cu124' and extra != 'extra-16-inference-models-torch-cu128' and extra != 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu118') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu12') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-cu12') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu118') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu124') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu126') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu128') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu124') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu126') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu128') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu126') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu128') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine != 's390x' and sys_platform != 'linux' and extra != 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine != 's390x' and sys_platform != 'linux' and extra != 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine == 'aarch64' and sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128') or (platform_machine == 'aarch64' and sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine == 'aarch64' and sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine == 'aarch64' and sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra != 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128') or (platform_machine == 'aarch64' and sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra != 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine == 'aarch64' and sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra != 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine == 'aarch64' and sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra != 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine == 'aarch64' and sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra != 'extra-16-inference-models-torch-cu124' and extra != 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu118') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu12') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-cu12') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu118') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu124') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu126') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu128') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu124') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu126') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu128') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu126') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu128') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu118') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu12') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-cu12') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu118') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu124') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu126') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu128') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu124') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu126') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu128') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu126') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu128') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cu130' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine != 'aarch64' and extra != 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine != 'aarch64' and extra != 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'darwin' and extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu118') or (sys_platform == 'darwin' and extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu12') or (sys_platform == 'darwin' and extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (sys_platform == 'darwin' and extra == 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-cu12') or (sys_platform == 'darwin' and extra == 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (sys_platform == 'darwin' and extra == 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu118') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu12') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-cu12') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu118') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu124') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu126') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu128') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu124') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu126') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu128') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu126') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu128') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu118') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu124') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu128') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu124') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu128') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu128') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu130' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'linux' and extra != 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform == 'linux' and extra != 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu118') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu124') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu128') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu124') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu128') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu128') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu130' and extra == 'extra-16-inference-models-torch-jp6-cu126')" },
-    { name = "nvidia-cusparse-cu12", version = "12.5.8.93", source = { registry = "https://pypi.org/simple" }, marker = "(platform_machine != 'aarch64' and platform_machine != 's390x' and sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu126') or (platform_machine != 'aarch64' and platform_machine != 's390x' and sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra != 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu126') or (platform_machine != 'aarch64' and platform_machine != 's390x' and sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra != 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine != 'aarch64' and platform_machine != 's390x' and sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra != 'extra-16-inference-models-torch-cu124' and extra != 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine != 'aarch64' and platform_machine != 's390x' and sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra != 'extra-16-inference-models-torch-cu124' and extra != 'extra-16-inference-models-torch-cu128' and extra != 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine != 'aarch64' and platform_machine != 's390x' and sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra != 'extra-16-inference-models-torch-cu124' and extra != 'extra-16-inference-models-torch-cu128' and extra != 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine != 's390x' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu126') or (platform_machine != 's390x' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra != 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu126') or (platform_machine != 's390x' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra != 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine != 's390x' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra != 'extra-16-inference-models-torch-cu124' and extra != 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine != 's390x' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra != 'extra-16-inference-models-torch-cu124' and extra != 'extra-16-inference-models-torch-cu128' and extra != 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine != 's390x' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra != 'extra-16-inference-models-torch-cu124' and extra != 'extra-16-inference-models-torch-cu128' and extra != 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu118') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu12') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-cu12') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu118') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu124') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu126') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu128') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu124') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu126') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu128') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu126') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu128') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine != 's390x' and sys_platform != 'linux' and extra != 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine != 's390x' and sys_platform != 'linux' and extra != 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine == 'aarch64' and sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128') or (platform_machine == 'aarch64' and sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine == 'aarch64' and sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine == 'aarch64' and sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra != 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128') or (platform_machine == 'aarch64' and sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra != 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine == 'aarch64' and sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra != 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine == 'aarch64' and sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra != 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine == 'aarch64' and sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra != 'extra-16-inference-models-torch-cu124' and extra != 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu118') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu12') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-cu12') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu118') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu124') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu126') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu128') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu124') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu126') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu128') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu126') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu128') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu118') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu12') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-cu12') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu118') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu124') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu126') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu128') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu124') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu126') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu128') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu126') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu128') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cu130' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine != 'aarch64' and extra != 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine != 'aarch64' and extra != 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'darwin' and extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu118') or (sys_platform == 'darwin' and extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu12') or (sys_platform == 'darwin' and extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (sys_platform == 'darwin' and extra == 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-cu12') or (sys_platform == 'darwin' and extra == 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (sys_platform == 'darwin' and extra == 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu118') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu12') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-cu12') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu118') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu124') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu126') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu128') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu124') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu126') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu128') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu126') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu128') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu118') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu124') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu128') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu124') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu128') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu128') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu130' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'linux' and extra != 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform == 'linux' and extra != 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu118') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu124') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu128') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu124') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu128') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu128') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu130' and extra == 'extra-16-inference-models-torch-jp6-cu126')" },
-    { name = "nvidia-nvjitlink-cu12", version = "12.8.93", source = { registry = "https://pypi.org/simple" }, marker = "(platform_machine != 'aarch64' and platform_machine != 's390x' and sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu126') or (platform_machine != 'aarch64' and platform_machine != 's390x' and sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra != 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu126') or (platform_machine != 'aarch64' and platform_machine != 's390x' and sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra != 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine != 'aarch64' and platform_machine != 's390x' and sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra != 'extra-16-inference-models-torch-cu124' and extra != 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine != 'aarch64' and platform_machine != 's390x' and sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra != 'extra-16-inference-models-torch-cu124' and extra != 'extra-16-inference-models-torch-cu128' and extra != 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine != 'aarch64' and platform_machine != 's390x' and sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra != 'extra-16-inference-models-torch-cu124' and extra != 'extra-16-inference-models-torch-cu128' and extra != 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine != 's390x' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu126') or (platform_machine != 's390x' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra != 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu126') or (platform_machine != 's390x' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra != 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine != 's390x' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra != 'extra-16-inference-models-torch-cu124' and extra != 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine != 's390x' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra != 'extra-16-inference-models-torch-cu124' and extra != 'extra-16-inference-models-torch-cu128' and extra != 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine != 's390x' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra != 'extra-16-inference-models-torch-cu124' and extra != 'extra-16-inference-models-torch-cu128' and extra != 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu118') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu12') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-cu12') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu118') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu124') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu126') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu128') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu124') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu126') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu128') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu126') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu128') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine != 's390x' and sys_platform != 'linux' and extra != 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine != 's390x' and sys_platform != 'linux' and extra != 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine == 'aarch64' and sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128') or (platform_machine == 'aarch64' and sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine == 'aarch64' and sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine == 'aarch64' and sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra != 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128') or (platform_machine == 'aarch64' and sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra != 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine == 'aarch64' and sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra != 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine == 'aarch64' and sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra != 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine == 'aarch64' and sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra != 'extra-16-inference-models-torch-cu124' and extra != 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu118') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu12') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-cu12') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu118') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu124') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu126') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu128') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu124') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu126') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu128') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu126') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu128') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu118') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu12') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-cu12') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu118') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu124') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu126') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu128') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu124') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu126') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu128') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu126') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu128') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cu130' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine != 'aarch64' and extra != 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine != 'aarch64' and extra != 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'darwin' and extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu118') or (sys_platform == 'darwin' and extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu12') or (sys_platform == 'darwin' and extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (sys_platform == 'darwin' and extra == 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-cu12') or (sys_platform == 'darwin' and extra == 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (sys_platform == 'darwin' and extra == 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu118') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu12') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-cu12') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu118') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu124') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu126') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu128') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu124') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu126') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu128') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu126') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu128') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu118') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu124') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu128') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu124') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu128') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu128') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu130' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'linux' and extra != 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform == 'linux' and extra != 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu118') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu124') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu128') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu124') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu128') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu128') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu130' and extra == 'extra-16-inference-models-torch-jp6-cu126')" },
-]
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/c8/32/f7cd6ce8a7690544d084ea21c26e910a97e077c9b7f07bf5de623ee19981/nvidia_cusolver_cu12-11.7.3.90-py3-none-manylinux_2_27_aarch64.whl", hash = "sha256:db9ed69dbef9715071232caa9b69c52ac7de3a95773c2db65bdba85916e4e5c0", size = 267229841, upload-time = "2025-03-07T01:46:54.356Z" },
-    { url = "https://files.pythonhosted.org/packages/85/48/9a13d2975803e8cf2777d5ed57b87a0b6ca2cc795f9a4f59796a910bfb80/nvidia_cusolver_cu12-11.7.3.90-py3-none-manylinux_2_27_x86_64.whl", hash = "sha256:4376c11ad263152bd50ea295c05370360776f8c3427b30991df774f9fb26c450", size = 267506905, upload-time = "2025-03-07T01:47:16.273Z" },
-    { url = "https://files.pythonhosted.org/packages/13/c0/76ca8551b8a84146ffa189fec81c26d04adba4bc0dbe09cd6e6fd9b7de04/nvidia_cusolver_cu12-11.7.3.90-py3-none-win_amd64.whl", hash = "sha256:4a550db115fcabc4d495eb7d39ac8b58d4ab5d8e63274d3754df1c0ad6a22d34", size = 256720438, upload-time = "2025-03-07T01:54:39.898Z" },
-]
-
 [[package]]
 name = "nvidia-cusparse-cu11"
 version = "11.7.5.86"
@@ -2697,24 +2547,6 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/7c/48/64b01653919a3d1d9b5117c156806ab0db8312c7496ff646477a5c1545bf/nvidia_cusparse_cu12-12.5.7.53-py3-none-win_amd64.whl", hash = "sha256:82c201d6781bacf6bb7c654f0446728d0fe596dfdd82ef4a04c204ce3e107441", size = 288767123, upload-time = "2025-01-23T18:11:01.543Z" },
 ]
 
-[[package]]
-name = "nvidia-cusparse-cu12"
-version = "12.5.8.93"
-source = { registry = "https://pypi.org/simple" }
-resolution-markers = [
-    "(python_full_version >= '3.12' and platform_machine != 'aarch64' and platform_machine != 's390x' and sys_platform == 'linux') or (python_full_version >= '3.12' and platform_machine != 's390x' and sys_platform != 'darwin' and sys_platform != 'linux')",
-    "(python_full_version == '3.11.*' and platform_machine != 'aarch64' and platform_machine != 's390x' and sys_platform == 'linux') or (python_full_version == '3.11.*' and platform_machine != 's390x' and sys_platform != 'darwin' and sys_platform != 'linux')",
-    "(python_full_version < '3.11' and platform_machine != 'aarch64' and platform_machine != 's390x' and sys_platform == 'linux') or (python_full_version < '3.11' and platform_machine != 's390x' and sys_platform != 'darwin' and sys_platform != 'linux')",
-]
-dependencies = [
-    { name = "nvidia-nvjitlink-cu12", version = "12.8.93", source = { registry = "https://pypi.org/simple" }, marker = "(platform_machine != 'aarch64' and platform_machine != 's390x' and sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu126') or (platform_machine != 'aarch64' and platform_machine != 's390x' and sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra != 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu126') or (platform_machine != 'aarch64' and platform_machine != 's390x' and sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra != 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine != 'aarch64' and platform_machine != 's390x' and sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra != 'extra-16-inference-models-torch-cu124' and extra != 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine != 'aarch64' and platform_machine != 's390x' and sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra != 'extra-16-inference-models-torch-cu124' and extra != 'extra-16-inference-models-torch-cu128' and extra != 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine != 'aarch64' and platform_machine != 's390x' and sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra != 'extra-16-inference-models-torch-cu124' and extra != 'extra-16-inference-models-torch-cu128' and extra != 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine != 's390x' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu126') or (platform_machine != 's390x' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra != 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu126') or (platform_machine != 's390x' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra != 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine != 's390x' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra != 'extra-16-inference-models-torch-cu124' and extra != 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine != 's390x' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra != 'extra-16-inference-models-torch-cu124' and extra != 'extra-16-inference-models-torch-cu128' and extra != 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine != 's390x' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra != 'extra-16-inference-models-torch-cu124' and extra != 'extra-16-inference-models-torch-cu128' and extra != 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu118') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu12') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-cu12') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu118') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu124') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu126') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu128') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu124') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu126') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu128') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu126') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu128') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine != 's390x' and sys_platform != 'linux' and extra != 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine != 's390x' and sys_platform != 'linux' and extra != 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine == 'aarch64' and sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128') or (platform_machine == 'aarch64' and sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine == 'aarch64' and sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine == 'aarch64' and sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra != 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128') or (platform_machine == 'aarch64' and sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra != 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine == 'aarch64' and sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra != 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine == 'aarch64' and sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra != 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine == 'aarch64' and sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra != 'extra-16-inference-models-torch-cu124' and extra != 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu118') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu12') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-cu12') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu118') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu124') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu126') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu128') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu124') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu126') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu128') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu126') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu128') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu118') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu12') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-cu12') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu118') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu124') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu126') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu128') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu124') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu126') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu128') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu126') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu128') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cu130' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine != 'aarch64' and extra != 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine != 'aarch64' and extra != 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'darwin' and extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu118') or (sys_platform == 'darwin' and extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu12') or (sys_platform == 'darwin' and extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (sys_platform == 'darwin' and extra == 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-cu12') or (sys_platform == 'darwin' and extra == 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (sys_platform == 'darwin' and extra == 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu118') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu12') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-cu12') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu118') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu124') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu126') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu128') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu124') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu126') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu128') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu126') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu128') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu118') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu124') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu128') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu124') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu128') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu128') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu130' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'linux' and extra != 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform == 'linux' and extra != 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu118') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu124') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu128') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu124') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu128') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu128') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu130' and extra == 'extra-16-inference-models-torch-jp6-cu126')" },
-]
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/bc/f7/cd777c4109681367721b00a106f491e0d0d15cfa1fd59672ce580ce42a97/nvidia_cusparse_cu12-12.5.8.93-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:9b6c161cb130be1a07a27ea6923df8141f3c295852f4b260c65f18f3e0a091dc", size = 288117129, upload-time = "2025-03-07T01:47:40.407Z" },
-    { url = "https://files.pythonhosted.org/packages/c2/f5/e1854cb2f2bcd4280c44736c93550cc300ff4b8c95ebe370d0aa7d2b473d/nvidia_cusparse_cu12-12.5.8.93-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:1ec05d76bbbd8b61b06a80e1eaf8cf4959c3d4ce8e711b65ebd0443bb0ebb13b", size = 288216466, upload-time = "2025-03-07T01:48:13.779Z" },
-    { url = "https://files.pythonhosted.org/packages/62/07/f3b2ad63f8e3d257a599f422ae34eb565e70c41031aecefa3d18b62cabd1/nvidia_cusparse_cu12-12.5.8.93-py3-none-win_amd64.whl", hash = "sha256:9a33604331cb2cac199f2e7f5104dfbb8a5a898c367a53dfda9ff2acb6b6b4dd", size = 284937404, upload-time = "2025-03-07T01:55:07.742Z" },
-]
-
 [[package]]
 name = "nvidia-cusparselt-cu12"
 version = "0.6.2"
@@ -2745,21 +2577,6 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/46/3e/9e1e394a02a06f694be2c97bbe47288bb7c90ea84c7e9cf88f7b28afe165/nvidia_cusparselt_cu12-0.6.3-py3-none-win_amd64.whl", hash = "sha256:3b325bcbd9b754ba43df5a311488fca11a6b5dc3d11df4d190c000cf1a0765c7", size = 155595972, upload-time = "2024-10-15T22:58:35.426Z" },
 ]
 
-[[package]]
-name = "nvidia-cusparselt-cu12"
-version = "0.7.1"
-source = { registry = "https://pypi.org/simple" }
-resolution-markers = [
-    "(python_full_version >= '3.12' and platform_machine != 'aarch64' and platform_machine != 's390x' and sys_platform == 'linux') or (python_full_version >= '3.12' and platform_machine != 's390x' and sys_platform != 'darwin' and sys_platform != 'linux')",
-    "(python_full_version == '3.11.*' and platform_machine != 'aarch64' and platform_machine != 's390x' and sys_platform == 'linux') or (python_full_version == '3.11.*' and platform_machine != 's390x' and sys_platform != 'darwin' and sys_platform != 'linux')",
-    "(python_full_version < '3.11' and platform_machine != 'aarch64' and platform_machine != 's390x' and sys_platform == 'linux') or (python_full_version < '3.11' and platform_machine != 's390x' and sys_platform != 'darwin' and sys_platform != 'linux')",
-]
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/73/b9/598f6ff36faaece4b3c50d26f50e38661499ff34346f00e057760b35cc9d/nvidia_cusparselt_cu12-0.7.1-py3-none-manylinux2014_aarch64.whl", hash = "sha256:8878dce784d0fac90131b6817b607e803c36e629ba34dc5b433471382196b6a5", size = 283835557, upload-time = "2025-02-26T00:16:54.265Z" },
-    { url = "https://files.pythonhosted.org/packages/56/79/12978b96bd44274fe38b5dde5cfb660b1d114f70a65ef962bcbbed99b549/nvidia_cusparselt_cu12-0.7.1-py3-none-manylinux2014_x86_64.whl", hash = "sha256:f1bb701d6b930d5a7cea44c19ceb973311500847f81b634d802b7b539dc55623", size = 287193691, upload-time = "2025-02-26T00:15:44.104Z" },
-    { url = "https://files.pythonhosted.org/packages/2f/d8/a6b0d0d0c2435e9310f3e2bb0d9c9dd4c33daef86aa5f30b3681defd37ea/nvidia_cusparselt_cu12-0.7.1-py3-none-win_amd64.whl", hash = "sha256:f67fbb5831940ec829c9117b7f33807db9f9678dc2a617fbe781cac17b4e1075", size = 271020911, upload-time = "2025-02-26T00:14:47.204Z" },
-]
-
 [[package]]
 name = "nvidia-nccl-cu11"
 version = "2.21.5"
@@ -2795,20 +2612,6 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/67/ca/f42388aed0fddd64ade7493dbba36e1f534d4e6fdbdd355c6a90030ae028/nvidia_nccl_cu12-2.26.2-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:694cf3879a206553cc9d7dbda76b13efaf610fdb70a50cba303de1b0d1530ac6", size = 201319755, upload-time = "2025-03-13T00:29:55.296Z" },
 ]
 
-[[package]]
-name = "nvidia-nccl-cu12"
-version = "2.27.3"
-source = { registry = "https://pypi.org/simple" }
-resolution-markers = [
-    "(python_full_version >= '3.12' and platform_machine != 'aarch64' and platform_machine != 's390x' and sys_platform == 'linux') or (python_full_version >= '3.12' and platform_machine != 's390x' and sys_platform != 'darwin' and sys_platform != 'linux')",
-    "(python_full_version == '3.11.*' and platform_machine != 'aarch64' and platform_machine != 's390x' and sys_platform == 'linux') or (python_full_version == '3.11.*' and platform_machine != 's390x' and sys_platform != 'darwin' and sys_platform != 'linux')",
-    "(python_full_version < '3.11' and platform_machine != 'aarch64' and platform_machine != 's390x' and sys_platform == 'linux') or (python_full_version < '3.11' and platform_machine != 's390x' and sys_platform != 'darwin' and sys_platform != 'linux')",
-]
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/4b/7b/8354b784cf73b0ba51e566b4baba3ddd44fe8288a3d39ef1e06cd5417226/nvidia_nccl_cu12-2.27.3-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:9ddf1a245abc36c550870f26d537a9b6087fb2e2e3d6e0ef03374c6fd19d984f", size = 322397768, upload-time = "2025-06-03T21:57:30.234Z" },
-    { url = "https://files.pythonhosted.org/packages/5c/5b/4e4fff7bad39adf89f735f2bc87248c81db71205b62bcc0d5ca5b606b3c3/nvidia_nccl_cu12-2.27.3-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:adf27ccf4238253e0b826bce3ff5fa532d65fc42322c8bfdfaf28024c0fbe039", size = 322364134, upload-time = "2025-06-03T21:58:04.013Z" },
-]
-
 [[package]]
 name = "nvidia-nvjitlink-cu12"
 version = "12.4.127"
@@ -2839,21 +2642,6 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/7f/c6/0d1b2bfeb2ef42c06db0570c4d081e5cde4450b54c09e43165126cfe6ff6/nvidia_nvjitlink_cu12-12.8.61-py3-none-win_amd64.whl", hash = "sha256:1166a964d25fdc0eae497574d38824305195a5283324a21ccb0ce0c802cbf41c", size = 268514099, upload-time = "2025-01-23T18:12:33.874Z" },
 ]
 
-[[package]]
-name = "nvidia-nvjitlink-cu12"
-version = "12.8.93"
-source = { registry = "https://pypi.org/simple" }
-resolution-markers = [
-    "(python_full_version >= '3.12' and platform_machine != 'aarch64' and platform_machine != 's390x' and sys_platform == 'linux') or (python_full_version >= '3.12' and platform_machine != 's390x' and sys_platform != 'darwin' and sys_platform != 'linux')",
-    "(python_full_version == '3.11.*' and platform_machine != 'aarch64' and platform_machine != 's390x' and sys_platform == 'linux') or (python_full_version == '3.11.*' and platform_machine != 's390x' and sys_platform != 'darwin' and sys_platform != 'linux')",
-    "(python_full_version < '3.11' and platform_machine != 'aarch64' and platform_machine != 's390x' and sys_platform == 'linux') or (python_full_version < '3.11' and platform_machine != 's390x' and sys_platform != 'darwin' and sys_platform != 'linux')",
-]
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/f6/74/86a07f1d0f42998ca31312f998bd3b9a7eff7f52378f4f270c8679c77fb9/nvidia_nvjitlink_cu12-12.8.93-py3-none-manylinux2010_x86_64.manylinux_2_12_x86_64.whl", hash = "sha256:81ff63371a7ebd6e6451970684f916be2eab07321b73c9d244dc2b4da7f73b88", size = 39254836, upload-time = "2025-03-07T01:49:55.661Z" },
-    { url = "https://files.pythonhosted.org/packages/2a/a2/8cee5da30d13430e87bf99bb33455d2724d0a4a9cb5d7926d80ccb96d008/nvidia_nvjitlink_cu12-12.8.93-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:adccd7161ace7261e01bb91e44e88da350895c270d23f744f0820c818b7229e7", size = 38386204, upload-time = "2025-03-07T01:49:43.612Z" },
-    { url = "https://files.pythonhosted.org/packages/ed/d7/34f02dad2e30c31b10a51f6b04e025e5dd60e5f936af9045a9b858a05383/nvidia_nvjitlink_cu12-12.8.93-py3-none-win_amd64.whl", hash = "sha256:bd93fbeeee850917903583587f4fc3a4eafa022e34572251368238ab5e6bd67f", size = 268553710, upload-time = "2025-03-07T01:56:24.13Z" },
-]
-
 [[package]]
 name = "nvidia-nvtx-cu11"
 version = "11.8.86"
@@ -2895,21 +2683,6 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/e5/14/84d46e62bfde46dd20cfb041e0bb5c2ec454fd6a384696e7fa3463c5bb59/nvidia_nvtx_cu12-12.8.55-py3-none-win_amd64.whl", hash = "sha256:9022681677aef1313458f88353ad9c0d2fbbe6402d6b07c9f00ba0e3ca8774d3", size = 56435, upload-time = "2025-01-23T18:06:06.268Z" },
 ]
 
-[[package]]
-name = "nvidia-nvtx-cu12"
-version = "12.8.90"
-source = { registry = "https://pypi.org/simple" }
-resolution-markers = [
-    "(python_full_version >= '3.12' and platform_machine != 'aarch64' and platform_machine != 's390x' and sys_platform == 'linux') or (python_full_version >= '3.12' and platform_machine != 's390x' and sys_platform != 'darwin' and sys_platform != 'linux')",
-    "(python_full_version == '3.11.*' and platform_machine != 'aarch64' and platform_machine != 's390x' and sys_platform == 'linux') or (python_full_version == '3.11.*' and platform_machine != 's390x' and sys_platform != 'darwin' and sys_platform != 'linux')",
-    "(python_full_version < '3.11' and platform_machine != 'aarch64' and platform_machine != 's390x' and sys_platform == 'linux') or (python_full_version < '3.11' and platform_machine != 's390x' and sys_platform != 'darwin' and sys_platform != 'linux')",
-]
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/10/c0/1b303feea90d296f6176f32a2a70b5ef230f9bdeb3a72bddb0dc922dc137/nvidia_nvtx_cu12-12.8.90-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:d7ad891da111ebafbf7e015d34879f7112832fc239ff0d7d776b6cb685274615", size = 91161, upload-time = "2025-03-07T01:42:23.922Z" },
-    { url = "https://files.pythonhosted.org/packages/a2/eb/86626c1bbc2edb86323022371c39aa48df6fd8b0a1647bc274577f72e90b/nvidia_nvtx_cu12-12.8.90-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:5b17e2001cc0d751a5bc2c6ec6d26ad95913324a4adb86788c944f8ce9ba441f", size = 89954, upload-time = "2025-03-07T01:42:44.131Z" },
-    { url = "https://files.pythonhosted.org/packages/9f/99/4c9c0c329bf9fc125008c3b54c7c94c0023518d06fc025ae36431375e1fe/nvidia_nvtx_cu12-12.8.90-py3-none-win_amd64.whl", hash = "sha256:619c8304aedc69f02ea82dd244541a83c3d9d40993381b3b590f1adaed3db41e", size = 56492, upload-time = "2025-03-07T01:52:24.69Z" },
-]
-
 [[package]]
 name = "omegaconf"
 version = "2.3.0"
@@ -3249,6 +3022,15 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/bc/60/5382c03e1970de634027cee8e1b7d39776b778b81812aaf45b694dfe9e28/pillow-12.2.0-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:bfa9c230d2fe991bed5318a5f119bd6780cda2915cca595393649fc118ab895e", size = 7080946, upload-time = "2026-04-01T14:46:11.734Z" },
 ]
 
+[[package]]
+name = "pkgconfig"
+version = "1.6.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/52/fd/0adde075cd3bfecd557bc7d757e00e231d34d8a6edb4c8d1642759254c21/pkgconfig-1.6.0.tar.gz", hash = "sha256:4a5a6631ce937fafac457104a40d558785a658bbdca5c49b6295bc3fd651907f", size = 5691, upload-time = "2026-03-06T11:26:01.194Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/75/6f/f7ec07fba48f07c555cc4099481df644fbbc12067879072c17ac229f6556/pkgconfig-1.6.0-py3-none-any.whl", hash = "sha256:98e71754855e9563838d952a160eb577edabb57782e49853edb5381927e6bea1", size = 7086, upload-time = "2026-03-06T11:26:07.688Z" },
+]
+
 [[package]]
 name = "platformdirs"
 version = "4.3.8"
@@ -3272,7 +3054,7 @@ name = "portalocker"
 version = "3.2.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "pywin32", marker = "sys_platform == 'win32' or (extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu118') or (extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu12') or (extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (extra == 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-cu12') or (extra == 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (extra == 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu118') or (extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu124') or (extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu126') or (extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu128') or (extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu130') or (extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu124') or (extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu126') or (extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu128') or (extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu130') or (extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu126') or (extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu128') or (extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu130') or (extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128') or (extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu130') or (extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (extra == 'extra-16-inference-models-torch-cu130' and extra == 'extra-16-inference-models-torch-jp6-cu126')" },
+    { name = "pywin32", marker = "sys_platform == 'win32' or (extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu118') or (extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu12') or (extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (extra == 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-cu12') or (extra == 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (extra == 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-onnx-jp6-cu126')" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/5e/77/65b857a69ed876e1951e88aaba60f5ce6120c33703f7cb61a3c894b8c1b6/portalocker-3.2.0.tar.gz", hash = "sha256:1f3002956a54a8c3730586c5c77bf18fae4149e07eaf1c29fc3faf4d5a3f89ac", size = 95644, upload-time = "2025-06-14T13:20:40.03Z" }
 wheels = [
@@ -3747,6 +3529,7 @@ version = "2.2.3"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "cffi" },
+    { name = "pkgconfig" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/7b/88/f73dae807ec68b228fba72507105e3ba80a561dc0bade0004ce24fd118fc/pyvips-2.2.3.tar.gz", hash = "sha256:43bceced0db492654c93008246a58a508e0373ae1621116b87b322f2ac72212f", size = 56626, upload-time = "2024-04-28T11:19:58.158Z" }
 
@@ -4631,8 +4414,7 @@ source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "nvidia-cuda-runtime-cu12", version = "12.4.127", source = { registry = "https://pypi.org/simple" }, marker = "(platform_machine != 'aarch64' and platform_machine != 's390x' and sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu124') or (platform_machine != 'aarch64' and platform_machine != 's390x' and sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu124') or (platform_machine != 's390x' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu124') or (platform_machine != 's390x' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu124') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu118') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu12') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-cu12') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu118') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu124') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu126') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu128') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu124') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu126') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu128') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine != 's390x' and sys_platform != 'linux' and extra != 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128') or (platform_machine != 's390x' and sys_platform != 'linux' and extra != 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine != 's390x' and sys_platform != 'linux' and extra != 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine != 's390x' and sys_platform != 'linux' and extra != 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine != 's390x' and sys_platform != 'linux' and extra != 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine != 's390x' and sys_platform != 'linux' and extra != 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu130' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine == 'aarch64' and sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu126') or (platform_machine == 'aarch64' and sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu128') or (platform_machine == 'aarch64' and sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine == 'aarch64' and sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine == 'aarch64' and sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu126') or (platform_machine == 'aarch64' and sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu128') or (platform_machine == 'aarch64' and sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine == 'aarch64' and sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu118') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu12') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-cu12') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu118') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu124') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu126') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu128') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu124') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu126') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu128') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu118') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu12') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-cu12') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu118') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu124') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu126') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu128') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu124') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu126') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu128') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu126') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu128') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cu130' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine != 'aarch64' and extra != 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128') or (platform_machine != 'aarch64' and extra != 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine != 'aarch64' and extra != 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine != 'aarch64' and extra != 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine != 'aarch64' and extra != 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine != 'aarch64' and extra != 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu130' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'darwin' and extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu118') or (sys_platform == 'darwin' and extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu12') or (sys_platform == 'darwin' and extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (sys_platform == 'darwin' and extra == 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-cu12') or (sys_platform == 'darwin' and extra == 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (sys_platform == 'darwin' and extra == 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu118') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu12') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-cu12') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu118') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu124') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu126') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu128') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu124') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu126') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu128') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu118') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu124') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu128') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu124') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu128') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu128') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu130' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'linux' and extra != 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128') or (sys_platform == 'linux' and extra != 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform == 'linux' and extra != 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'linux' and extra != 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform == 'linux' and extra != 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'linux' and extra != 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu130' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu118') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu124') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu128') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu124') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu128') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu128') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu130' and extra == 'extra-16-inference-models-torch-jp6-cu126')" },
     { name = "nvidia-cuda-runtime-cu12", version = "12.8.57", source = { registry = "https://pypi.org/simple" }, marker = "(platform_machine != 'aarch64' and platform_machine != 's390x' and sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu128') or (platform_machine != 'aarch64' and platform_machine != 's390x' and sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra != 'extra-16-inference-models-torch-cu124' and extra != 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128') or (platform_machine != 's390x' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu128') or (platform_machine != 's390x' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra != 'extra-16-inference-models-torch-cu124' and extra != 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu118') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu12') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-cu12') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu118') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu124') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu126') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu128') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu124') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu126') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu128') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu126') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu128') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine != 's390x' and sys_platform != 'linux' and extra != 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine == 'aarch64' and sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine == 'aarch64' and sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine == 'aarch64' and sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra != 'extra-16-inference-models-torch-cu124' and extra != 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine == 'aarch64' and sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra != 'extra-16-inference-models-torch-cu124' and extra != 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu118') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu12') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-cu12') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu118') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu124') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu126') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu128') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu124') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu126') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu128') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu126') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu128') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu118') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu12') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-cu12') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu118') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu124') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu126') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu128') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu124') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu126') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu128') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu126') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu128') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cu130' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine != 'aarch64' and extra != 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'darwin' and extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu118') or (sys_platform == 'darwin' and extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu12') or (sys_platform == 'darwin' and extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (sys_platform == 'darwin' and extra == 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-cu12') or (sys_platform == 'darwin' and extra == 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (sys_platform == 'darwin' and extra == 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu118') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu12') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-cu12') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu118') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu124') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu126') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu128') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu124') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu126') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu128') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu126') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu128') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu118') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu124') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu128') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu124') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu128') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu128') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu130' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'linux' and extra != 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu118') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu124') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu128') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu124') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu128') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu128') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu130' and extra == 'extra-16-inference-models-torch-jp6-cu126')" },
-    { name = "nvidia-cuda-runtime-cu12", version = "12.8.90", source = { registry = "https://pypi.org/simple" }, marker = "(platform_machine != 'aarch64' and platform_machine != 's390x' and sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu126') or (platform_machine != 'aarch64' and platform_machine != 's390x' and sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra != 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu126') or (platform_machine != 'aarch64' and platform_machine != 's390x' and sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra != 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine != 'aarch64' and platform_machine != 's390x' and sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra != 'extra-16-inference-models-torch-cu124' and extra != 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine != 'aarch64' and platform_machine != 's390x' and sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra != 'extra-16-inference-models-torch-cu124' and extra != 'extra-16-inference-models-torch-cu128' and extra != 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine != 'aarch64' and platform_machine != 's390x' and sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra != 'extra-16-inference-models-torch-cu124' and extra != 'extra-16-inference-models-torch-cu128' and extra != 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine != 's390x' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu126') or (platform_machine != 's390x' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra != 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu126') or (platform_machine != 's390x' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra != 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine != 's390x' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra != 'extra-16-inference-models-torch-cu124' and extra != 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine != 's390x' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra != 'extra-16-inference-models-torch-cu124' and extra != 'extra-16-inference-models-torch-cu128' and extra != 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine != 's390x' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra != 'extra-16-inference-models-torch-cu124' and extra != 'extra-16-inference-models-torch-cu128' and extra != 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu118') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu12') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-cu12') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu118') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu124') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu126') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu128') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu124') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu126') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu128') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu126') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu128') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine != 's390x' and sys_platform != 'linux' and extra != 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine != 's390x' and sys_platform != 'linux' and extra != 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine == 'aarch64' and sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128') or (platform_machine == 'aarch64' and sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine == 'aarch64' and sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine == 'aarch64' and sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra != 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128') or (platform_machine == 'aarch64' and sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra != 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine == 'aarch64' and sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra != 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine == 'aarch64' and sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra != 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine == 'aarch64' and sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra != 'extra-16-inference-models-torch-cu124' and extra != 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu118') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu12') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-cu12') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu118') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu124') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu126') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu128') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu124') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu126') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu128') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu126') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu128') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu118') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu12') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-cu12') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu118') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu124') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu126') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu128') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu124') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu126') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu128') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu126') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu128') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cu130' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine != 'aarch64' and extra != 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine != 'aarch64' and extra != 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'darwin' and extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu118') or (sys_platform == 'darwin' and extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu12') or (sys_platform == 'darwin' and extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (sys_platform == 'darwin' and extra == 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-cu12') or (sys_platform == 'darwin' and extra == 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (sys_platform == 'darwin' and extra == 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu118') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu12') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-cu12') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu118') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu124') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu126') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu128') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu124') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu126') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu128') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu126') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu128') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu118') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu124') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu128') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu124') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu128') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu128') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu130' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'linux' and extra != 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform == 'linux' and extra != 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu118') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu124') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu128') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu124') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu128') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu128') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu130' and extra == 'extra-16-inference-models-torch-jp6-cu126')" },
-    { name = "nvidia-cuda-runtime-cu12", version = "12.9.79", source = { registry = "https://pypi.org/simple" }, marker = "(platform_machine != 'aarch64' and platform_machine != 's390x' and extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu118') or (platform_machine != 'aarch64' and platform_machine != 's390x' and extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu12') or (platform_machine != 'aarch64' and platform_machine != 's390x' and extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (platform_machine != 'aarch64' and platform_machine != 's390x' and extra == 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-cu12') or (platform_machine != 'aarch64' and platform_machine != 's390x' and extra == 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (platform_machine != 'aarch64' and platform_machine != 's390x' and extra == 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (platform_machine != 'aarch64' and platform_machine != 's390x' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu126') or (platform_machine != 'aarch64' and platform_machine != 's390x' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu128') or (platform_machine != 'aarch64' and platform_machine != 's390x' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine != 'aarch64' and platform_machine != 's390x' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine != 'aarch64' and platform_machine != 's390x' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128') or (platform_machine != 'aarch64' and platform_machine != 's390x' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine != 'aarch64' and platform_machine != 's390x' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine != 'aarch64' and platform_machine != 's390x' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine != 'aarch64' and platform_machine != 's390x' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine == 's390x' and sys_platform != 'darwin') or (platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 's390x' and sys_platform == 'linux') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu118') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu12') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-cu12') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (platform_machine != 's390x' and sys_platform != 'linux' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu126') or (platform_machine != 's390x' and sys_platform != 'linux' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu128') or (platform_machine != 's390x' and sys_platform != 'linux' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine != 's390x' and sys_platform != 'linux' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine != 's390x' and sys_platform != 'linux' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128') or (platform_machine != 's390x' and sys_platform != 'linux' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine != 's390x' and sys_platform != 'linux' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine != 's390x' and sys_platform != 'linux' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine != 's390x' and sys_platform != 'linux' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cpu') or (sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu118') or (sys_platform != 'darwin' and sys_platform != 'linux' and extra == 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cpu') or (sys_platform != 'darwin' and sys_platform != 'linux' and extra == 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu118') or (sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cpu') or (sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu118') or (sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-cu12' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cpu') or (sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-cu12' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu118') or (sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra != 'extra-16-inference-models-torch-cu124' and extra != 'extra-16-inference-models-torch-cu126' and extra != 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform != 'darwin' and sys_platform != 'linux' and extra == 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra != 'extra-16-inference-models-torch-cu124' and extra != 'extra-16-inference-models-torch-cu126' and extra != 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra != 'extra-16-inference-models-torch-cu124' and extra != 'extra-16-inference-models-torch-cu126' and extra != 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-cu12' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra != 'extra-16-inference-models-torch-cu124' and extra != 'extra-16-inference-models-torch-cu126' and extra != 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'darwin' and extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu118') or (sys_platform == 'darwin' and extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu12') or (sys_platform == 'darwin' and extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (sys_platform == 'darwin' and extra == 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-cu12') or (sys_platform == 'darwin' and extra == 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (sys_platform == 'darwin' and extra == 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu118') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu124') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu128') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu124') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu128') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu128') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu130' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cpu') or (sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu118') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu118') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu124') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu128') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu124') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu128') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu128') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu130' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cpu') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu118') or (sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cpu') or (sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu118') or (sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-cu12' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cpu') or (sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-cu12' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu118') or (sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra != 'extra-16-inference-models-torch-cu124' and extra != 'extra-16-inference-models-torch-cu126' and extra != 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra != 'extra-16-inference-models-torch-cu124' and extra != 'extra-16-inference-models-torch-cu126' and extra != 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra != 'extra-16-inference-models-torch-cu124' and extra != 'extra-16-inference-models-torch-cu126' and extra != 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-cu12' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra != 'extra-16-inference-models-torch-cu124' and extra != 'extra-16-inference-models-torch-cu126' and extra != 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-jp6-cu126')" },
+    { name = "nvidia-cuda-runtime-cu12", version = "12.9.79", source = { registry = "https://pypi.org/simple" }, marker = "(platform_machine != 'aarch64' and platform_machine != 's390x' and extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu118') or (platform_machine != 'aarch64' and platform_machine != 's390x' and extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu12') or (platform_machine != 'aarch64' and platform_machine != 's390x' and extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (platform_machine != 'aarch64' and platform_machine != 's390x' and extra == 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-cu12') or (platform_machine != 'aarch64' and platform_machine != 's390x' and extra == 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (platform_machine != 'aarch64' and platform_machine != 's390x' and extra == 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (platform_machine != 'aarch64' and platform_machine != 's390x' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu126') or (platform_machine != 'aarch64' and platform_machine != 's390x' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu128') or (platform_machine != 'aarch64' and platform_machine != 's390x' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine != 'aarch64' and platform_machine != 's390x' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine != 'aarch64' and platform_machine != 's390x' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra != 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine != 'aarch64' and platform_machine != 's390x' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra != 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine == 's390x' and sys_platform != 'darwin') or (platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 's390x' and sys_platform == 'linux') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu118') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu12') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-cu12') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (platform_machine != 's390x' and sys_platform != 'linux' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu126') or (platform_machine != 's390x' and sys_platform != 'linux' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu128') or (platform_machine != 's390x' and sys_platform != 'linux' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine != 's390x' and sys_platform != 'linux' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine != 's390x' and sys_platform != 'linux' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra != 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine != 's390x' and sys_platform != 'linux' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra != 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cpu') or (sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu118') or (sys_platform != 'darwin' and sys_platform != 'linux' and extra == 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cpu') or (sys_platform != 'darwin' and sys_platform != 'linux' and extra == 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu118') or (sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cpu') or (sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu118') or (sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-cu12' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cpu') or (sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-cu12' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu118') or (sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra != 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu126') or (sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra != 'extra-16-inference-models-torch-cu124' and extra != 'extra-16-inference-models-torch-cu128') or (sys_platform != 'darwin' and sys_platform != 'linux' and extra == 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra != 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu126') or (sys_platform != 'darwin' and sys_platform != 'linux' and extra == 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra != 'extra-16-inference-models-torch-cu124' and extra != 'extra-16-inference-models-torch-cu128') or (sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra != 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu126') or (sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra != 'extra-16-inference-models-torch-cu124' and extra != 'extra-16-inference-models-torch-cu128') or (sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-cu12' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra != 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu126') or (sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-cu12' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra != 'extra-16-inference-models-torch-cu124' and extra != 'extra-16-inference-models-torch-cu128') or (sys_platform == 'darwin' and extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu118') or (sys_platform == 'darwin' and extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu12') or (sys_platform == 'darwin' and extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (sys_platform == 'darwin' and extra == 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-cu12') or (sys_platform == 'darwin' and extra == 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (sys_platform == 'darwin' and extra == 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu118') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu124') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu128') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu124') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu128') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu128') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu130' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cpu') or (sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu118') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu118') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu124') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu128') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu124') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu128') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu128') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu130' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cpu') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu118') or (sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cpu') or (sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu118') or (sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-cu12' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cpu') or (sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-cu12' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu118') or (sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra != 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu126') or (sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra != 'extra-16-inference-models-torch-cu124' and extra != 'extra-16-inference-models-torch-cu128') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra != 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu126') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra != 'extra-16-inference-models-torch-cu124' and extra != 'extra-16-inference-models-torch-cu128') or (sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra != 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu126') or (sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra != 'extra-16-inference-models-torch-cu124' and extra != 'extra-16-inference-models-torch-cu128') or (sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-cu12' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra != 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu126') or (sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-cu12' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra != 'extra-16-inference-models-torch-cu124' and extra != 'extra-16-inference-models-torch-cu128')" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/1e/bd/31d7dbd2d23a3558f7f4b3447586b4ef9141a0f3a0748c41c730e99cf6a6/tensorrt_cu12_libs-10.12.0.36.tar.gz", hash = "sha256:d26af485ad452599016bde631f4cd223b97f240afb647162ab0c5e8f89708934", size = 709, upload-time = "2025-06-12T21:22:47.972Z" }
 
@@ -4669,8 +4451,7 @@ source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "nvidia-cuda-runtime-cu12", version = "12.4.127", source = { registry = "https://pypi.org/simple" }, marker = "(platform_machine != 'aarch64' and platform_machine != 's390x' and sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu124') or (platform_machine != 'aarch64' and platform_machine != 's390x' and sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu124') or (platform_machine != 's390x' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu124') or (platform_machine != 's390x' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu124') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu118') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu12') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-cu12') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu118') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu124') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu126') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu128') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu124') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu126') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu128') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine != 's390x' and sys_platform != 'linux' and extra != 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128') or (platform_machine != 's390x' and sys_platform != 'linux' and extra != 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine != 's390x' and sys_platform != 'linux' and extra != 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine != 's390x' and sys_platform != 'linux' and extra != 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine != 's390x' and sys_platform != 'linux' and extra != 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine != 's390x' and sys_platform != 'linux' and extra != 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu130' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine == 'aarch64' and sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu126') or (platform_machine == 'aarch64' and sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu128') or (platform_machine == 'aarch64' and sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine == 'aarch64' and sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine == 'aarch64' and sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu126') or (platform_machine == 'aarch64' and sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu128') or (platform_machine == 'aarch64' and sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine == 'aarch64' and sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu118') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu12') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-cu12') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu118') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu124') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu126') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu128') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu124') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu126') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu128') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu118') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu12') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-cu12') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu118') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu124') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu126') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu128') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu124') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu126') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu128') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu126') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu128') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cu130' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine != 'aarch64' and extra != 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128') or (platform_machine != 'aarch64' and extra != 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine != 'aarch64' and extra != 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine != 'aarch64' and extra != 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine != 'aarch64' and extra != 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine != 'aarch64' and extra != 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu130' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'darwin' and extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu118') or (sys_platform == 'darwin' and extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu12') or (sys_platform == 'darwin' and extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (sys_platform == 'darwin' and extra == 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-cu12') or (sys_platform == 'darwin' and extra == 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (sys_platform == 'darwin' and extra == 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu118') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu12') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-cu12') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu118') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu124') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu126') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu128') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu124') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu126') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu128') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu118') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu124') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu128') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu124') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu128') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu128') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu130' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'linux' and extra != 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128') or (sys_platform == 'linux' and extra != 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform == 'linux' and extra != 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'linux' and extra != 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform == 'linux' and extra != 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'linux' and extra != 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu130' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu118') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu124') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu128') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu124') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu128') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu128') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu130' and extra == 'extra-16-inference-models-torch-jp6-cu126')" },
     { name = "nvidia-cuda-runtime-cu12", version = "12.8.57", source = { registry = "https://pypi.org/simple" }, marker = "(platform_machine != 'aarch64' and platform_machine != 's390x' and sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu128') or (platform_machine != 'aarch64' and platform_machine != 's390x' and sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra != 'extra-16-inference-models-torch-cu124' and extra != 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128') or (platform_machine != 's390x' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu128') or (platform_machine != 's390x' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra != 'extra-16-inference-models-torch-cu124' and extra != 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu118') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu12') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-cu12') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu118') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu124') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu126') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu128') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu124') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu126') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu128') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu126') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu128') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine != 's390x' and sys_platform != 'linux' and extra != 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine == 'aarch64' and sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine == 'aarch64' and sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine == 'aarch64' and sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra != 'extra-16-inference-models-torch-cu124' and extra != 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine == 'aarch64' and sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra != 'extra-16-inference-models-torch-cu124' and extra != 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu118') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu12') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-cu12') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu118') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu124') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu126') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu128') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu124') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu126') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu128') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu126') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu128') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu118') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu12') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-cu12') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu118') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu124') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu126') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu128') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu124') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu126') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu128') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu126') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu128') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cu130' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine != 'aarch64' and extra != 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'darwin' and extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu118') or (sys_platform == 'darwin' and extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu12') or (sys_platform == 'darwin' and extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (sys_platform == 'darwin' and extra == 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-cu12') or (sys_platform == 'darwin' and extra == 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (sys_platform == 'darwin' and extra == 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu118') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu12') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-cu12') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu118') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu124') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu126') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu128') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu124') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu126') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu128') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu126') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu128') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu118') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu124') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu128') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu124') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu128') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu128') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu130' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'linux' and extra != 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu118') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu124') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu128') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu124') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu128') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu128') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu130' and extra == 'extra-16-inference-models-torch-jp6-cu126')" },
-    { name = "nvidia-cuda-runtime-cu12", version = "12.8.90", source = { registry = "https://pypi.org/simple" }, marker = "(platform_machine != 'aarch64' and platform_machine != 's390x' and sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu126') or (platform_machine != 'aarch64' and platform_machine != 's390x' and sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra != 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu126') or (platform_machine != 'aarch64' and platform_machine != 's390x' and sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra != 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine != 'aarch64' and platform_machine != 's390x' and sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra != 'extra-16-inference-models-torch-cu124' and extra != 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine != 'aarch64' and platform_machine != 's390x' and sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra != 'extra-16-inference-models-torch-cu124' and extra != 'extra-16-inference-models-torch-cu128' and extra != 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine != 'aarch64' and platform_machine != 's390x' and sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra != 'extra-16-inference-models-torch-cu124' and extra != 'extra-16-inference-models-torch-cu128' and extra != 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine != 's390x' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu126') or (platform_machine != 's390x' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra != 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu126') or (platform_machine != 's390x' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra != 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine != 's390x' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra != 'extra-16-inference-models-torch-cu124' and extra != 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine != 's390x' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra != 'extra-16-inference-models-torch-cu124' and extra != 'extra-16-inference-models-torch-cu128' and extra != 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine != 's390x' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra != 'extra-16-inference-models-torch-cu124' and extra != 'extra-16-inference-models-torch-cu128' and extra != 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu118') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu12') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-cu12') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu118') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu124') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu126') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu128') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu124') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu126') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu128') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu126') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu128') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine != 's390x' and sys_platform != 'linux' and extra != 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine != 's390x' and sys_platform != 'linux' and extra != 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine == 'aarch64' and sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128') or (platform_machine == 'aarch64' and sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine == 'aarch64' and sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine == 'aarch64' and sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra != 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128') or (platform_machine == 'aarch64' and sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra != 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine == 'aarch64' and sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra != 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine == 'aarch64' and sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra != 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine == 'aarch64' and sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra != 'extra-16-inference-models-torch-cu124' and extra != 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu118') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu12') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-cu12') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu118') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu124') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu126') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu128') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu124') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu126') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu128') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu126') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu128') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu118') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu12') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-cu12') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu118') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu124') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu126') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu128') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu124') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu126') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu128') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu126') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu128') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cu130' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine != 'aarch64' and extra != 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine != 'aarch64' and extra != 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'darwin' and extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu118') or (sys_platform == 'darwin' and extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu12') or (sys_platform == 'darwin' and extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (sys_platform == 'darwin' and extra == 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-cu12') or (sys_platform == 'darwin' and extra == 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (sys_platform == 'darwin' and extra == 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu118') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu12') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-cu12') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu118') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu124') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu126') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu128') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu124') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu126') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu128') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu126') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu128') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu118') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu124') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu128') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu124') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu128') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu128') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu130' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'linux' and extra != 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform == 'linux' and extra != 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu118') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu124') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu128') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu124') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu128') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu128') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu130' and extra == 'extra-16-inference-models-torch-jp6-cu126')" },
-    { name = "nvidia-cuda-runtime-cu12", version = "12.9.79", source = { registry = "https://pypi.org/simple" }, marker = "(platform_machine != 'aarch64' and platform_machine != 's390x' and extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu118') or (platform_machine != 'aarch64' and platform_machine != 's390x' and extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu12') or (platform_machine != 'aarch64' and platform_machine != 's390x' and extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (platform_machine != 'aarch64' and platform_machine != 's390x' and extra == 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-cu12') or (platform_machine != 'aarch64' and platform_machine != 's390x' and extra == 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (platform_machine != 'aarch64' and platform_machine != 's390x' and extra == 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (platform_machine != 'aarch64' and platform_machine != 's390x' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu126') or (platform_machine != 'aarch64' and platform_machine != 's390x' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu128') or (platform_machine != 'aarch64' and platform_machine != 's390x' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine != 'aarch64' and platform_machine != 's390x' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine != 'aarch64' and platform_machine != 's390x' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128') or (platform_machine != 'aarch64' and platform_machine != 's390x' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine != 'aarch64' and platform_machine != 's390x' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine != 'aarch64' and platform_machine != 's390x' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine != 'aarch64' and platform_machine != 's390x' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine == 's390x' and sys_platform != 'darwin') or (platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 's390x' and sys_platform == 'linux') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu118') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu12') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-cu12') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (platform_machine != 's390x' and sys_platform != 'linux' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu126') or (platform_machine != 's390x' and sys_platform != 'linux' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu128') or (platform_machine != 's390x' and sys_platform != 'linux' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine != 's390x' and sys_platform != 'linux' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine != 's390x' and sys_platform != 'linux' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128') or (platform_machine != 's390x' and sys_platform != 'linux' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine != 's390x' and sys_platform != 'linux' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine != 's390x' and sys_platform != 'linux' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine != 's390x' and sys_platform != 'linux' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cpu') or (sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu118') or (sys_platform != 'darwin' and sys_platform != 'linux' and extra == 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cpu') or (sys_platform != 'darwin' and sys_platform != 'linux' and extra == 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu118') or (sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cpu') or (sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu118') or (sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-cu12' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cpu') or (sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-cu12' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu118') or (sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra != 'extra-16-inference-models-torch-cu124' and extra != 'extra-16-inference-models-torch-cu126' and extra != 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform != 'darwin' and sys_platform != 'linux' and extra == 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra != 'extra-16-inference-models-torch-cu124' and extra != 'extra-16-inference-models-torch-cu126' and extra != 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra != 'extra-16-inference-models-torch-cu124' and extra != 'extra-16-inference-models-torch-cu126' and extra != 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-cu12' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra != 'extra-16-inference-models-torch-cu124' and extra != 'extra-16-inference-models-torch-cu126' and extra != 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'darwin' and extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu118') or (sys_platform == 'darwin' and extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu12') or (sys_platform == 'darwin' and extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (sys_platform == 'darwin' and extra == 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-cu12') or (sys_platform == 'darwin' and extra == 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (sys_platform == 'darwin' and extra == 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu118') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu124') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu128') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu124') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu128') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu128') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu130' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cpu') or (sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu118') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu118') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu124') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu128') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu124') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu128') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu128') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu130' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cpu') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu118') or (sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cpu') or (sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu118') or (sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-cu12' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cpu') or (sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-cu12' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu118') or (sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra != 'extra-16-inference-models-torch-cu124' and extra != 'extra-16-inference-models-torch-cu126' and extra != 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra != 'extra-16-inference-models-torch-cu124' and extra != 'extra-16-inference-models-torch-cu126' and extra != 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra != 'extra-16-inference-models-torch-cu124' and extra != 'extra-16-inference-models-torch-cu126' and extra != 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-cu12' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra != 'extra-16-inference-models-torch-cu124' and extra != 'extra-16-inference-models-torch-cu126' and extra != 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-jp6-cu126')" },
+    { name = "nvidia-cuda-runtime-cu12", version = "12.9.79", source = { registry = "https://pypi.org/simple" }, marker = "(platform_machine != 'aarch64' and platform_machine != 's390x' and extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu118') or (platform_machine != 'aarch64' and platform_machine != 's390x' and extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu12') or (platform_machine != 'aarch64' and platform_machine != 's390x' and extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (platform_machine != 'aarch64' and platform_machine != 's390x' and extra == 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-cu12') or (platform_machine != 'aarch64' and platform_machine != 's390x' and extra == 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (platform_machine != 'aarch64' and platform_machine != 's390x' and extra == 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (platform_machine != 'aarch64' and platform_machine != 's390x' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu126') or (platform_machine != 'aarch64' and platform_machine != 's390x' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu128') or (platform_machine != 'aarch64' and platform_machine != 's390x' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine != 'aarch64' and platform_machine != 's390x' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine != 'aarch64' and platform_machine != 's390x' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra != 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine != 'aarch64' and platform_machine != 's390x' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra != 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine == 's390x' and sys_platform != 'darwin') or (platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 's390x' and sys_platform == 'linux') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu118') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu12') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-cu12') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (platform_machine != 's390x' and sys_platform != 'linux' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu126') or (platform_machine != 's390x' and sys_platform != 'linux' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu128') or (platform_machine != 's390x' and sys_platform != 'linux' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine != 's390x' and sys_platform != 'linux' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine != 's390x' and sys_platform != 'linux' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra != 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine != 's390x' and sys_platform != 'linux' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra != 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cpu') or (sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu118') or (sys_platform != 'darwin' and sys_platform != 'linux' and extra == 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cpu') or (sys_platform != 'darwin' and sys_platform != 'linux' and extra == 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu118') or (sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cpu') or (sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu118') or (sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-cu12' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cpu') or (sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-cu12' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu118') or (sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra != 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu126') or (sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra != 'extra-16-inference-models-torch-cu124' and extra != 'extra-16-inference-models-torch-cu128') or (sys_platform != 'darwin' and sys_platform != 'linux' and extra == 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra != 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu126') or (sys_platform != 'darwin' and sys_platform != 'linux' and extra == 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra != 'extra-16-inference-models-torch-cu124' and extra != 'extra-16-inference-models-torch-cu128') or (sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra != 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu126') or (sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra != 'extra-16-inference-models-torch-cu124' and extra != 'extra-16-inference-models-torch-cu128') or (sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-cu12' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra != 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu126') or (sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-cu12' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra != 'extra-16-inference-models-torch-cu124' and extra != 'extra-16-inference-models-torch-cu128') or (sys_platform == 'darwin' and extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu118') or (sys_platform == 'darwin' and extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu12') or (sys_platform == 'darwin' and extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (sys_platform == 'darwin' and extra == 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-cu12') or (sys_platform == 'darwin' and extra == 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (sys_platform == 'darwin' and extra == 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu118') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu124') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu128') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu124') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu128') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu128') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu130' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cpu') or (sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu118') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu118') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu124') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu128') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu124') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu128') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu128') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu130' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cpu') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu118') or (sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cpu') or (sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu118') or (sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-cu12' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cpu') or (sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-cu12' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu118') or (sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra != 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu126') or (sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra != 'extra-16-inference-models-torch-cu124' and extra != 'extra-16-inference-models-torch-cu128') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra != 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu126') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra != 'extra-16-inference-models-torch-cu124' and extra != 'extra-16-inference-models-torch-cu128') or (sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra != 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu126') or (sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra != 'extra-16-inference-models-torch-cu124' and extra != 'extra-16-inference-models-torch-cu128') or (sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-cu12' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra != 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu126') or (sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-cu12' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra != 'extra-16-inference-models-torch-cu124' and extra != 'extra-16-inference-models-torch-cu128')" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/25/13/f623c012b4a5933dd7384ac68631342f89cd659dc67742841687baa26c78/tensorrt_lean_cu12_libs-10.12.0.36.tar.gz", hash = "sha256:3ac037cce0ba06e6bafb2d9b88bf5dba87356a1a11df66754e940cfea15305d8", size = 716, upload-time = "2025-06-12T21:25:02.299Z" }
 
@@ -4769,9 +4550,9 @@ resolution-markers = [
     "python_full_version < '3.11' and platform_machine == 's390x' and sys_platform != 'darwin'",
 ]
 dependencies = [
-    { name = "huggingface-hub", marker = "sys_platform != 'darwin' or (extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu118') or (extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu12') or (extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (extra == 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-cu12') or (extra == 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (extra == 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu118') or (extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu124') or (extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu126') or (extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu128') or (extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu130') or (extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu124') or (extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu126') or (extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu128') or (extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu130') or (extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu126') or (extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu128') or (extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu130') or (extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128') or (extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu130') or (extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (extra == 'extra-16-inference-models-torch-cu130' and extra == 'extra-16-inference-models-torch-jp6-cu126')" },
-    { name = "pyyaml", marker = "sys_platform != 'darwin' or (extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu118') or (extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu12') or (extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (extra == 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-cu12') or (extra == 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (extra == 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu118') or (extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu124') or (extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu126') or (extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu128') or (extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu130') or (extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu124') or (extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu126') or (extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu128') or (extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu130') or (extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu126') or (extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu128') or (extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu130') or (extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128') or (extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu130') or (extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (extra == 'extra-16-inference-models-torch-cu130' and extra == 'extra-16-inference-models-torch-jp6-cu126')" },
-    { name = "safetensors", marker = "sys_platform != 'darwin' or (extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu118') or (extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu12') or (extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (extra == 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-cu12') or (extra == 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (extra == 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu118') or (extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu124') or (extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu126') or (extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu128') or (extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu130') or (extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu124') or (extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu126') or (extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu128') or (extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu130') or (extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu126') or (extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu128') or (extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu130') or (extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128') or (extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu130') or (extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (extra == 'extra-16-inference-models-torch-cu130' and extra == 'extra-16-inference-models-torch-jp6-cu126')" },
+    { name = "huggingface-hub", marker = "sys_platform != 'darwin' or (extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu118') or (extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu12') or (extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (extra == 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-cu12') or (extra == 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (extra == 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-onnx-jp6-cu126')" },
+    { name = "pyyaml", marker = "sys_platform != 'darwin' or (extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu118') or (extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu12') or (extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (extra == 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-cu12') or (extra == 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (extra == 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-onnx-jp6-cu126')" },
+    { name = "safetensors", marker = "sys_platform != 'darwin' or (extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu118') or (extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu12') or (extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (extra == 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-cu12') or (extra == 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (extra == 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-onnx-jp6-cu126')" },
     { name = "torch", version = "2.6.0+cu124", source = { registry = "https://download.pytorch.org/whl/cu124" }, marker = "(sys_platform != 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu124') or (sys_platform != 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu124') or (extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu118') or (extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu12') or (extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (extra == 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-cu12') or (extra == 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (extra == 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu118') or (extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu124') or (extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu126') or (extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu128') or (extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu130') or (extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu124') or (extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu126') or (extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu128') or (extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu130') or (extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu126') or (extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu128') or (extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu130') or (extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128') or (extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu130') or (extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (extra == 'extra-16-inference-models-torch-cu130' and extra == 'extra-16-inference-models-torch-jp6-cu126')" },
     { name = "torch", version = "2.7.1+cpu", source = { registry = "https://download.pytorch.org/whl/cpu" }, marker = "(sys_platform != 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cpu') or (sys_platform != 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cpu') or (extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu118') or (extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu12') or (extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (extra == 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-cu12') or (extra == 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (extra == 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu118') or (extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu124') or (extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu126') or (extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu128') or (extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu130') or (extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu124') or (extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu126') or (extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu128') or (extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu130') or (extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu126') or (extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu128') or (extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu130') or (extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128') or (extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu130') or (extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (extra == 'extra-16-inference-models-torch-cu130' and extra == 'extra-16-inference-models-torch-jp6-cu126')" },
     { name = "torch", version = "2.7.1+cu118", source = { registry = "https://download.pytorch.org/whl/cu118" }, marker = "(sys_platform != 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra != 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu118') or (sys_platform != 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu118') or (extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu118') or (extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu12') or (extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (extra == 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-cu12') or (extra == 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (extra == 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu118') or (extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu124') or (extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu126') or (extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu128') or (extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu130') or (extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu124') or (extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu126') or (extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu128') or (extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu130') or (extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu126') or (extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu128') or (extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu130') or (extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128') or (extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu130') or (extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (extra == 'extra-16-inference-models-torch-cu130' and extra == 'extra-16-inference-models-torch-jp6-cu126')" },
@@ -5083,7 +4864,7 @@ dependencies = [
     { name = "nvidia-cuda-runtime-cu12", version = "12.8.57", source = { registry = "https://pypi.org/simple" }, marker = "(platform_machine == 'x86_64' and sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu128') or (platform_machine == 'x86_64' and sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra != 'extra-16-inference-models-torch-cu124' and extra != 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128') or (platform_machine != 'x86_64' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine != 'x86_64' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine != 'x86_64' and extra == 'extra-16-inference-models-torch-cu130' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu130' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu130' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu118') or (extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu12') or (extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (extra == 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-cu12') or (extra == 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (extra == 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu118') or (extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu124') or (extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu126') or (extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu128') or (extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu130') or (extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu124') or (extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu126') or (extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu128') or (extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu130') or (extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu126') or (extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu128') or (extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu130') or (extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128') or (extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu130') or (extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (extra != 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130' and extra == 'extra-16-inference-models-torch-jp6-cu126')" },
     { name = "nvidia-cudnn-cu12", version = "9.7.1.26", source = { registry = "https://pypi.org/simple" }, marker = "(platform_machine == 'x86_64' and sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu128') or (platform_machine == 'x86_64' and sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra != 'extra-16-inference-models-torch-cu124' and extra != 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128') or (platform_machine != 'x86_64' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine != 'x86_64' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine != 'x86_64' and extra == 'extra-16-inference-models-torch-cu130' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu130' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu130' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu118') or (extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu12') or (extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (extra == 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-cu12') or (extra == 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (extra == 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu118') or (extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu124') or (extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu126') or (extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu128') or (extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu130') or (extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu124') or (extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu126') or (extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu128') or (extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu130') or (extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu126') or (extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu128') or (extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu130') or (extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128') or (extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu130') or (extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (extra != 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130' and extra == 'extra-16-inference-models-torch-jp6-cu126')" },
     { name = "nvidia-cufft-cu12", version = "11.3.3.41", source = { registry = "https://pypi.org/simple" }, marker = "(platform_machine == 'x86_64' and sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu128') or (platform_machine == 'x86_64' and sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra != 'extra-16-inference-models-torch-cu124' and extra != 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128') or (platform_machine != 'x86_64' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine != 'x86_64' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine != 'x86_64' and extra == 'extra-16-inference-models-torch-cu130' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu130' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu130' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu118') or (extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu12') or (extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (extra == 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-cu12') or (extra == 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (extra == 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu118') or (extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu124') or (extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu126') or (extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu128') or (extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu130') or (extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu124') or (extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu126') or (extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu128') or (extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu130') or (extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu126') or (extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu128') or (extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu130') or (extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128') or (extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu130') or (extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (extra != 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130' and extra == 'extra-16-inference-models-torch-jp6-cu126')" },
-    { name = "nvidia-cufile-cu12", version = "1.13.0.11", source = { registry = "https://pypi.org/simple" }, marker = "(platform_machine == 'x86_64' and sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu128') or (platform_machine == 'x86_64' and sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra != 'extra-16-inference-models-torch-cu124' and extra != 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128') or (platform_machine != 'x86_64' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine != 'x86_64' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine != 'x86_64' and extra == 'extra-16-inference-models-torch-cu130' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu130' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu130' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu118') or (extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu12') or (extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (extra == 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-cu12') or (extra == 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (extra == 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu118') or (extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu124') or (extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu126') or (extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu128') or (extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu130') or (extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu124') or (extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu126') or (extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu128') or (extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu130') or (extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu126') or (extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu128') or (extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu130') or (extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128') or (extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu130') or (extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (extra != 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130' and extra == 'extra-16-inference-models-torch-jp6-cu126')" },
+    { name = "nvidia-cufile-cu12", marker = "(platform_machine == 'x86_64' and sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu128') or (platform_machine == 'x86_64' and sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra != 'extra-16-inference-models-torch-cu124' and extra != 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128') or (platform_machine != 'x86_64' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine != 'x86_64' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine != 'x86_64' and extra == 'extra-16-inference-models-torch-cu130' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu130' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu130' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu118') or (extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu12') or (extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (extra == 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-cu12') or (extra == 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (extra == 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu118') or (extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu124') or (extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu126') or (extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu128') or (extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu130') or (extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu124') or (extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu126') or (extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu128') or (extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu130') or (extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu126') or (extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu128') or (extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu130') or (extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128') or (extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu130') or (extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (extra != 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130' and extra == 'extra-16-inference-models-torch-jp6-cu126')" },
     { name = "nvidia-curand-cu12", version = "10.3.9.55", source = { registry = "https://pypi.org/simple" }, marker = "(platform_machine == 'x86_64' and sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu128') or (platform_machine == 'x86_64' and sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra != 'extra-16-inference-models-torch-cu124' and extra != 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128') or (platform_machine != 'x86_64' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine != 'x86_64' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine != 'x86_64' and extra == 'extra-16-inference-models-torch-cu130' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu130' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu130' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu118') or (extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu12') or (extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (extra == 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-cu12') or (extra == 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (extra == 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu118') or (extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu124') or (extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu126') or (extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu128') or (extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu130') or (extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu124') or (extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu126') or (extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu128') or (extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu130') or (extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu126') or (extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu128') or (extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu130') or (extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128') or (extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu130') or (extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (extra != 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130' and extra == 'extra-16-inference-models-torch-jp6-cu126')" },
     { name = "nvidia-cusolver-cu12", version = "11.7.2.55", source = { registry = "https://pypi.org/simple" }, marker = "(platform_machine == 'x86_64' and sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu128') or (platform_machine == 'x86_64' and sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra != 'extra-16-inference-models-torch-cu124' and extra != 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128') or (platform_machine != 'x86_64' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine != 'x86_64' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine != 'x86_64' and extra == 'extra-16-inference-models-torch-cu130' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu130' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu130' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu118') or (extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu12') or (extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (extra == 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-cu12') or (extra == 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (extra == 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu118') or (extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu124') or (extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu126') or (extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu128') or (extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu130') or (extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu124') or (extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu126') or (extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu128') or (extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu130') or (extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu126') or (extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu128') or (extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu130') or (extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128') or (extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu130') or (extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (extra != 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130' and extra == 'extra-16-inference-models-torch-jp6-cu126')" },
     { name = "nvidia-cusparse-cu12", version = "12.5.7.53", source = { registry = "https://pypi.org/simple" }, marker = "(platform_machine == 'x86_64' and sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu128') or (platform_machine == 'x86_64' and sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra != 'extra-16-inference-models-torch-cu124' and extra != 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128') or (platform_machine != 'x86_64' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine != 'x86_64' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine != 'x86_64' and extra == 'extra-16-inference-models-torch-cu130' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu130' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu130' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu118') or (extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu12') or (extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (extra == 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-cu12') or (extra == 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (extra == 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu118') or (extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu124') or (extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu126') or (extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu128') or (extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu130') or (extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu124') or (extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu126') or (extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu128') or (extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu130') or (extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu126') or (extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu128') or (extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu130') or (extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128') or (extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu130') or (extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (extra != 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130' and extra == 'extra-16-inference-models-torch-jp6-cu126')" },
@@ -5170,23 +4951,8 @@ dependencies = [
     { name = "jinja2", marker = "(extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu118') or (extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu12') or (extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (extra == 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-cu12') or (extra == 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (extra == 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu126') or (extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu130') or (extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu118') or (extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu124') or (extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu128') or (extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu124') or (extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu128') or (extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu128') or (extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu126') or (extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu130') or (extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu118') or (extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu124') or (extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu124') or (extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu128') or (extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra != 'extra-16-inference-models-torch-cu124' and extra != 'extra-16-inference-models-torch-cu128' and extra != 'extra-16-inference-models-torch-jp6-cu126') or (extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128') or (extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu118' and extra != 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128') or (extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu124' and extra != 'extra-16-inference-models-torch-cu130' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu126' and extra != 'extra-16-inference-models-torch-cu130' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu118' and extra != 'extra-16-inference-models-torch-cu126' and extra != 'extra-16-inference-models-torch-cu130' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra != 'extra-16-inference-models-torch-cu124' and extra != 'extra-16-inference-models-torch-cu128' and extra != 'extra-16-inference-models-torch-jp6-cu126')" },
     { name = "networkx", version = "3.4.2", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version < '3.11' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu126') or (python_full_version < '3.11' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra != 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu126') or (python_full_version < '3.11' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra != 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (python_full_version < '3.11' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra != 'extra-16-inference-models-torch-cu124' and extra != 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (python_full_version < '3.11' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra != 'extra-16-inference-models-torch-cu124' and extra != 'extra-16-inference-models-torch-cu128' and extra != 'extra-16-inference-models-torch-jp6-cu126') or (python_full_version < '3.11' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra != 'extra-16-inference-models-torch-cu124' and extra != 'extra-16-inference-models-torch-cu128' and extra != 'extra-16-inference-models-torch-jp6-cu126') or (extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu118') or (extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu12') or (extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (extra == 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-cu12') or (extra == 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (extra == 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu118') or (extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu124') or (extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu126') or (extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu128') or (extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu130') or (extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu124') or (extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu126') or (extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu128') or (extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu130') or (extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu126') or (extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu128') or (extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu130') or (extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128') or (extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu130') or (extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (extra == 'extra-16-inference-models-torch-cu130' and extra == 'extra-16-inference-models-torch-jp6-cu126')" },
     { name = "networkx", version = "3.5", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.11' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu126') or (python_full_version >= '3.11' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu130') or (python_full_version >= '3.11' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu126') or (python_full_version >= '3.11' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu130') or (python_full_version >= '3.11' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra != 'extra-16-inference-models-torch-cu124' and extra != 'extra-16-inference-models-torch-cu128' and extra != 'extra-16-inference-models-torch-jp6-cu126') or (python_full_version >= '3.11' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra != 'extra-16-inference-models-torch-cu124' and extra != 'extra-16-inference-models-torch-cu128' and extra != 'extra-16-inference-models-torch-jp6-cu126') or (extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128') or (extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu130') or (extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (extra == 'extra-16-inference-models-torch-cu130' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu118') or (extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu12') or (extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (extra == 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-cu12') or (extra == 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (extra == 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu118') or (extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu124') or (extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu126') or (extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu128') or (extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu130') or (extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu124') or (extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu126') or (extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu128') or (extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu130') or (extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu126') or (extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu128') or (extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu130') or (extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-jp6-cu126')" },
-    { name = "nvidia-cublas-cu12", version = "12.8.4.1", source = { registry = "https://pypi.org/simple" }, marker = "(platform_machine == 'x86_64' and sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu126') or (platform_machine == 'x86_64' and sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra != 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu126') or (platform_machine == 'x86_64' and sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra != 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine == 'x86_64' and sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra != 'extra-16-inference-models-torch-cu124' and extra != 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine == 'x86_64' and sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra != 'extra-16-inference-models-torch-cu124' and extra != 'extra-16-inference-models-torch-cu128' and extra != 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine == 'x86_64' and sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra != 'extra-16-inference-models-torch-cu124' and extra != 'extra-16-inference-models-torch-cu128' and extra != 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine != 'x86_64' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128') or (platform_machine != 'x86_64' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine != 'x86_64' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine != 'x86_64' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine != 'x86_64' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine != 'x86_64' and extra == 'extra-16-inference-models-torch-cu130' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128') or (sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu130' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128') or (sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu130' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu118') or (extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu12') or (extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (extra == 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-cu12') or (extra == 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (extra == 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu118') or (extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu124') or (extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu126') or (extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu128') or (extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu130') or (extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu124') or (extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu126') or (extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu128') or (extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu130') or (extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu126') or (extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu128') or (extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu130') or (extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (extra != 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (extra != 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-jp6-cu126')" },
-    { name = "nvidia-cuda-cupti-cu12", version = "12.8.90", source = { registry = "https://pypi.org/simple" }, marker = "(platform_machine == 'x86_64' and sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu126') or (platform_machine == 'x86_64' and sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra != 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu126') or (platform_machine == 'x86_64' and sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra != 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine == 'x86_64' and sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra != 'extra-16-inference-models-torch-cu124' and extra != 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine == 'x86_64' and sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra != 'extra-16-inference-models-torch-cu124' and extra != 'extra-16-inference-models-torch-cu128' and extra != 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine == 'x86_64' and sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra != 'extra-16-inference-models-torch-cu124' and extra != 'extra-16-inference-models-torch-cu128' and extra != 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine != 'x86_64' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128') or (platform_machine != 'x86_64' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine != 'x86_64' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine != 'x86_64' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine != 'x86_64' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine != 'x86_64' and extra == 'extra-16-inference-models-torch-cu130' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128') or (sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu130' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128') or (sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu130' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu118') or (extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu12') or (extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (extra == 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-cu12') or (extra == 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (extra == 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu118') or (extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu124') or (extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu126') or (extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu128') or (extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu130') or (extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu124') or (extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu126') or (extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu128') or (extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu130') or (extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu126') or (extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu128') or (extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu130') or (extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (extra != 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (extra != 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-jp6-cu126')" },
-    { name = "nvidia-cuda-nvrtc-cu12", version = "12.8.93", source = { registry = "https://pypi.org/simple" }, marker = "(platform_machine == 'x86_64' and sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu126') or (platform_machine == 'x86_64' and sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra != 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu126') or (platform_machine == 'x86_64' and sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra != 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine == 'x86_64' and sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra != 'extra-16-inference-models-torch-cu124' and extra != 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine == 'x86_64' and sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra != 'extra-16-inference-models-torch-cu124' and extra != 'extra-16-inference-models-torch-cu128' and extra != 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine == 'x86_64' and sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra != 'extra-16-inference-models-torch-cu124' and extra != 'extra-16-inference-models-torch-cu128' and extra != 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine != 'x86_64' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128') or (platform_machine != 'x86_64' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine != 'x86_64' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine != 'x86_64' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine != 'x86_64' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine != 'x86_64' and extra == 'extra-16-inference-models-torch-cu130' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128') or (sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu130' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128') or (sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu130' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu118') or (extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu12') or (extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (extra == 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-cu12') or (extra == 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (extra == 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu118') or (extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu124') or (extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu126') or (extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu128') or (extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu130') or (extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu124') or (extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu126') or (extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu128') or (extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu130') or (extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu126') or (extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu128') or (extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu130') or (extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (extra != 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (extra != 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-jp6-cu126')" },
-    { name = "nvidia-cuda-runtime-cu12", version = "12.8.90", source = { registry = "https://pypi.org/simple" }, marker = "(platform_machine == 'x86_64' and sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu126') or (platform_machine == 'x86_64' and sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra != 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu126') or (platform_machine == 'x86_64' and sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra != 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine == 'x86_64' and sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra != 'extra-16-inference-models-torch-cu124' and extra != 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine == 'x86_64' and sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra != 'extra-16-inference-models-torch-cu124' and extra != 'extra-16-inference-models-torch-cu128' and extra != 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine == 'x86_64' and sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra != 'extra-16-inference-models-torch-cu124' and extra != 'extra-16-inference-models-torch-cu128' and extra != 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine != 'x86_64' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128') or (platform_machine != 'x86_64' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine != 'x86_64' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine != 'x86_64' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine != 'x86_64' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine != 'x86_64' and extra == 'extra-16-inference-models-torch-cu130' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128') or (sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu130' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128') or (sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu130' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu118') or (extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu12') or (extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (extra == 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-cu12') or (extra == 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (extra == 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu118') or (extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu124') or (extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu126') or (extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu128') or (extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu130') or (extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu124') or (extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu126') or (extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu128') or (extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu130') or (extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu126') or (extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu128') or (extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu130') or (extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (extra != 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (extra != 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-jp6-cu126')" },
-    { name = "nvidia-cudnn-cu12", version = "9.10.2.21", source = { registry = "https://pypi.org/simple" }, marker = "(platform_machine == 'x86_64' and sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu126') or (platform_machine == 'x86_64' and sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra != 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu126') or (platform_machine == 'x86_64' and sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra != 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine == 'x86_64' and sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra != 'extra-16-inference-models-torch-cu124' and extra != 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine == 'x86_64' and sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra != 'extra-16-inference-models-torch-cu124' and extra != 'extra-16-inference-models-torch-cu128' and extra != 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine == 'x86_64' and sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra != 'extra-16-inference-models-torch-cu124' and extra != 'extra-16-inference-models-torch-cu128' and extra != 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine != 'x86_64' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128') or (platform_machine != 'x86_64' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine != 'x86_64' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine != 'x86_64' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine != 'x86_64' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine != 'x86_64' and extra == 'extra-16-inference-models-torch-cu130' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128') or (sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu130' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128') or (sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu130' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu118') or (extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu12') or (extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (extra == 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-cu12') or (extra == 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (extra == 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu118') or (extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu124') or (extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu126') or (extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu128') or (extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu130') or (extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu124') or (extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu126') or (extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu128') or (extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu130') or (extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu126') or (extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu128') or (extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu130') or (extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (extra != 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (extra != 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-jp6-cu126')" },
-    { name = "nvidia-cufft-cu12", version = "11.3.3.83", source = { registry = "https://pypi.org/simple" }, marker = "(platform_machine == 'x86_64' and sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu126') or (platform_machine == 'x86_64' and sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra != 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu126') or (platform_machine == 'x86_64' and sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra != 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine == 'x86_64' and sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra != 'extra-16-inference-models-torch-cu124' and extra != 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine == 'x86_64' and sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra != 'extra-16-inference-models-torch-cu124' and extra != 'extra-16-inference-models-torch-cu128' and extra != 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine == 'x86_64' and sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra != 'extra-16-inference-models-torch-cu124' and extra != 'extra-16-inference-models-torch-cu128' and extra != 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine != 'x86_64' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128') or (platform_machine != 'x86_64' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine != 'x86_64' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine != 'x86_64' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine != 'x86_64' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine != 'x86_64' and extra == 'extra-16-inference-models-torch-cu130' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128') or (sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu130' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128') or (sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu130' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu118') or (extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu12') or (extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (extra == 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-cu12') or (extra == 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (extra == 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu118') or (extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu124') or (extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu126') or (extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu128') or (extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu130') or (extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu124') or (extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu126') or (extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu128') or (extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu130') or (extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu126') or (extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu128') or (extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu130') or (extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (extra != 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (extra != 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-jp6-cu126')" },
-    { name = "nvidia-cufile-cu12", version = "1.13.1.3", source = { registry = "https://pypi.org/simple" }, marker = "(platform_machine == 'x86_64' and sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu126') or (platform_machine == 'x86_64' and sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra != 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu126') or (platform_machine == 'x86_64' and sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra != 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine == 'x86_64' and sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra != 'extra-16-inference-models-torch-cu124' and extra != 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine == 'x86_64' and sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra != 'extra-16-inference-models-torch-cu124' and extra != 'extra-16-inference-models-torch-cu128' and extra != 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine == 'x86_64' and sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra != 'extra-16-inference-models-torch-cu124' and extra != 'extra-16-inference-models-torch-cu128' and extra != 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine != 'x86_64' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128') or (platform_machine != 'x86_64' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine != 'x86_64' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine != 'x86_64' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine != 'x86_64' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine != 'x86_64' and extra == 'extra-16-inference-models-torch-cu130' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128') or (sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu130' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128') or (sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu130' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu118') or (extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu12') or (extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (extra == 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-cu12') or (extra == 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (extra == 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu118') or (extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu124') or (extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu126') or (extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu128') or (extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu130') or (extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu124') or (extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu126') or (extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu128') or (extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu130') or (extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu126') or (extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu128') or (extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu130') or (extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (extra != 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (extra != 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-jp6-cu126')" },
-    { name = "nvidia-curand-cu12", version = "10.3.9.90", source = { registry = "https://pypi.org/simple" }, marker = "(platform_machine == 'x86_64' and sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu126') or (platform_machine == 'x86_64' and sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra != 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu126') or (platform_machine == 'x86_64' and sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra != 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine == 'x86_64' and sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra != 'extra-16-inference-models-torch-cu124' and extra != 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine == 'x86_64' and sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra != 'extra-16-inference-models-torch-cu124' and extra != 'extra-16-inference-models-torch-cu128' and extra != 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine == 'x86_64' and sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra != 'extra-16-inference-models-torch-cu124' and extra != 'extra-16-inference-models-torch-cu128' and extra != 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine != 'x86_64' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128') or (platform_machine != 'x86_64' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine != 'x86_64' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine != 'x86_64' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine != 'x86_64' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine != 'x86_64' and extra == 'extra-16-inference-models-torch-cu130' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128') or (sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu130' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128') or (sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu130' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu118') or (extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu12') or (extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (extra == 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-cu12') or (extra == 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (extra == 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu118') or (extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu124') or (extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu126') or (extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu128') or (extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu130') or (extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu124') or (extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu126') or (extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu128') or (extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu130') or (extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu126') or (extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu128') or (extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu130') or (extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (extra != 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (extra != 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-jp6-cu126')" },
-    { name = "nvidia-cusolver-cu12", version = "11.7.3.90", source = { registry = "https://pypi.org/simple" }, marker = "(platform_machine == 'x86_64' and sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu126') or (platform_machine == 'x86_64' and sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra != 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu126') or (platform_machine == 'x86_64' and sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra != 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine == 'x86_64' and sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra != 'extra-16-inference-models-torch-cu124' and extra != 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine == 'x86_64' and sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra != 'extra-16-inference-models-torch-cu124' and extra != 'extra-16-inference-models-torch-cu128' and extra != 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine == 'x86_64' and sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra != 'extra-16-inference-models-torch-cu124' and extra != 'extra-16-inference-models-torch-cu128' and extra != 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine != 'x86_64' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128') or (platform_machine != 'x86_64' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine != 'x86_64' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine != 'x86_64' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine != 'x86_64' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine != 'x86_64' and extra == 'extra-16-inference-models-torch-cu130' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128') or (sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu130' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128') or (sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu130' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu118') or (extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu12') or (extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (extra == 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-cu12') or (extra == 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (extra == 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu118') or (extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu124') or (extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu126') or (extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu128') or (extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu130') or (extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu124') or (extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu126') or (extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu128') or (extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu130') or (extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu126') or (extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu128') or (extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu130') or (extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (extra != 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (extra != 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-jp6-cu126')" },
-    { name = "nvidia-cusparse-cu12", version = "12.5.8.93", source = { registry = "https://pypi.org/simple" }, marker = "(platform_machine == 'x86_64' and sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu126') or (platform_machine == 'x86_64' and sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra != 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu126') or (platform_machine == 'x86_64' and sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra != 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine == 'x86_64' and sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra != 'extra-16-inference-models-torch-cu124' and extra != 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine == 'x86_64' and sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra != 'extra-16-inference-models-torch-cu124' and extra != 'extra-16-inference-models-torch-cu128' and extra != 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine == 'x86_64' and sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra != 'extra-16-inference-models-torch-cu124' and extra != 'extra-16-inference-models-torch-cu128' and extra != 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine != 'x86_64' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128') or (platform_machine != 'x86_64' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine != 'x86_64' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine != 'x86_64' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine != 'x86_64' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine != 'x86_64' and extra == 'extra-16-inference-models-torch-cu130' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128') or (sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu130' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128') or (sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu130' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu118') or (extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu12') or (extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (extra == 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-cu12') or (extra == 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (extra == 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu118') or (extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu124') or (extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu126') or (extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu128') or (extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu130') or (extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu124') or (extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu126') or (extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu128') or (extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu130') or (extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu126') or (extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu128') or (extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu130') or (extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (extra != 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (extra != 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-jp6-cu126')" },
-    { name = "nvidia-cusparselt-cu12", version = "0.7.1", source = { registry = "https://pypi.org/simple" }, marker = "(platform_machine == 'x86_64' and sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu126') or (platform_machine == 'x86_64' and sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra != 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu126') or (platform_machine == 'x86_64' and sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra != 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine == 'x86_64' and sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra != 'extra-16-inference-models-torch-cu124' and extra != 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine == 'x86_64' and sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra != 'extra-16-inference-models-torch-cu124' and extra != 'extra-16-inference-models-torch-cu128' and extra != 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine == 'x86_64' and sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra != 'extra-16-inference-models-torch-cu124' and extra != 'extra-16-inference-models-torch-cu128' and extra != 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine != 'x86_64' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128') or (platform_machine != 'x86_64' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine != 'x86_64' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine != 'x86_64' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine != 'x86_64' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine != 'x86_64' and extra == 'extra-16-inference-models-torch-cu130' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128') or (sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu130' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128') or (sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu130' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu118') or (extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu12') or (extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (extra == 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-cu12') or (extra == 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (extra == 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu118') or (extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu124') or (extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu126') or (extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu128') or (extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu130') or (extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu124') or (extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu126') or (extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu128') or (extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu130') or (extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu126') or (extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu128') or (extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu130') or (extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (extra != 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (extra != 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-jp6-cu126')" },
-    { name = "nvidia-nccl-cu12", version = "2.27.3", source = { registry = "https://pypi.org/simple" }, marker = "(platform_machine == 'x86_64' and sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu126') or (platform_machine == 'x86_64' and sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra != 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu126') or (platform_machine == 'x86_64' and sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra != 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine == 'x86_64' and sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra != 'extra-16-inference-models-torch-cu124' and extra != 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine == 'x86_64' and sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra != 'extra-16-inference-models-torch-cu124' and extra != 'extra-16-inference-models-torch-cu128' and extra != 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine == 'x86_64' and sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra != 'extra-16-inference-models-torch-cu124' and extra != 'extra-16-inference-models-torch-cu128' and extra != 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine != 'x86_64' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128') or (platform_machine != 'x86_64' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine != 'x86_64' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine != 'x86_64' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine != 'x86_64' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine != 'x86_64' and extra == 'extra-16-inference-models-torch-cu130' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128') or (sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu130' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128') or (sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu130' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu118') or (extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu12') or (extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (extra == 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-cu12') or (extra == 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (extra == 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu118') or (extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu124') or (extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu126') or (extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu128') or (extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu130') or (extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu124') or (extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu126') or (extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu128') or (extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu130') or (extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu126') or (extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu128') or (extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu130') or (extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (extra != 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (extra != 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-jp6-cu126')" },
-    { name = "nvidia-nvjitlink-cu12", version = "12.8.93", source = { registry = "https://pypi.org/simple" }, marker = "(platform_machine == 'x86_64' and sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu126') or (platform_machine == 'x86_64' and sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra != 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu126') or (platform_machine == 'x86_64' and sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra != 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine == 'x86_64' and sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra != 'extra-16-inference-models-torch-cu124' and extra != 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine == 'x86_64' and sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra != 'extra-16-inference-models-torch-cu124' and extra != 'extra-16-inference-models-torch-cu128' and extra != 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine == 'x86_64' and sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra != 'extra-16-inference-models-torch-cu124' and extra != 'extra-16-inference-models-torch-cu128' and extra != 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine != 'x86_64' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128') or (platform_machine != 'x86_64' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine != 'x86_64' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine != 'x86_64' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine != 'x86_64' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine != 'x86_64' and extra == 'extra-16-inference-models-torch-cu130' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128') or (sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu130' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128') or (sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu130' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu118') or (extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu12') or (extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (extra == 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-cu12') or (extra == 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (extra == 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu118') or (extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu124') or (extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu126') or (extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu128') or (extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu130') or (extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu124') or (extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu126') or (extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu128') or (extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu130') or (extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu126') or (extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu128') or (extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu130') or (extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (extra != 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (extra != 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-jp6-cu126')" },
-    { name = "nvidia-nvtx-cu12", version = "12.8.90", source = { registry = "https://pypi.org/simple" }, marker = "(platform_machine == 'x86_64' and sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu126') or (platform_machine == 'x86_64' and sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra != 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu126') or (platform_machine == 'x86_64' and sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra != 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine == 'x86_64' and sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra != 'extra-16-inference-models-torch-cu124' and extra != 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine == 'x86_64' and sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra != 'extra-16-inference-models-torch-cu124' and extra != 'extra-16-inference-models-torch-cu128' and extra != 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine == 'x86_64' and sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra != 'extra-16-inference-models-torch-cu124' and extra != 'extra-16-inference-models-torch-cu128' and extra != 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine != 'x86_64' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128') or (platform_machine != 'x86_64' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine != 'x86_64' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine != 'x86_64' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine != 'x86_64' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine != 'x86_64' and extra == 'extra-16-inference-models-torch-cu130' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128') or (sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu130' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128') or (sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu130' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu118') or (extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu12') or (extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (extra == 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-cu12') or (extra == 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (extra == 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu118') or (extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu124') or (extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu126') or (extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu128') or (extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu130') or (extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu124') or (extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu126') or (extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu128') or (extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu130') or (extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu126') or (extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu128') or (extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu130') or (extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (extra != 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (extra != 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-jp6-cu126')" },
     { name = "setuptools", marker = "(python_full_version >= '3.12' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu126') or (python_full_version >= '3.12' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu130') or (python_full_version >= '3.12' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu126') or (python_full_version >= '3.12' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu130') or (python_full_version >= '3.12' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra != 'extra-16-inference-models-torch-cu124' and extra != 'extra-16-inference-models-torch-cu128' and extra != 'extra-16-inference-models-torch-jp6-cu126') or (python_full_version >= '3.12' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra != 'extra-16-inference-models-torch-cu124' and extra != 'extra-16-inference-models-torch-cu128' and extra != 'extra-16-inference-models-torch-jp6-cu126') or (extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128') or (extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu130') or (extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (extra == 'extra-16-inference-models-torch-cu130' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu118') or (extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu12') or (extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (extra == 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-cu12') or (extra == 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (extra == 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu118') or (extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu124') or (extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu126') or (extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu128') or (extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu130') or (extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu124') or (extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu126') or (extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu128') or (extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu130') or (extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu126') or (extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu128') or (extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu130') or (extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-jp6-cu126')" },
     { name = "sympy", version = "1.14.0", source = { registry = "https://pypi.org/simple" }, marker = "(extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu118') or (extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu12') or (extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (extra == 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-cu12') or (extra == 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (extra == 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu126') or (extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu130') or (extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu118') or (extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu124') or (extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu128') or (extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu124') or (extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu128') or (extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu128') or (extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu126') or (extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu130') or (extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu118') or (extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu124') or (extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu124') or (extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu128') or (extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra != 'extra-16-inference-models-torch-cu124' and extra != 'extra-16-inference-models-torch-cu128' and extra != 'extra-16-inference-models-torch-jp6-cu126') or (extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128') or (extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu118' and extra != 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128') or (extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu124' and extra != 'extra-16-inference-models-torch-cu130' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu126' and extra != 'extra-16-inference-models-torch-cu130' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu118' and extra != 'extra-16-inference-models-torch-cu126' and extra != 'extra-16-inference-models-torch-cu130' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra != 'extra-16-inference-models-torch-cu124' and extra != 'extra-16-inference-models-torch-cu128' and extra != 'extra-16-inference-models-torch-jp6-cu126')" },
-    { name = "triton", version = "3.4.0", source = { registry = "https://pypi.org/simple" }, marker = "(platform_machine == 'x86_64' and sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu126') or (platform_machine == 'x86_64' and sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra != 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu126') or (platform_machine == 'x86_64' and sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra != 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine == 'x86_64' and sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra != 'extra-16-inference-models-torch-cu124' and extra != 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine == 'x86_64' and sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra != 'extra-16-inference-models-torch-cu124' and extra != 'extra-16-inference-models-torch-cu128' and extra != 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine == 'x86_64' and sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra != 'extra-16-inference-models-torch-cu124' and extra != 'extra-16-inference-models-torch-cu128' and extra != 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine != 'x86_64' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128') or (platform_machine != 'x86_64' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine != 'x86_64' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine != 'x86_64' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine != 'x86_64' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine != 'x86_64' and extra == 'extra-16-inference-models-torch-cu130' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128') or (sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu130' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128') or (sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu130' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu118') or (extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu12') or (extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (extra == 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-cu12') or (extra == 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (extra == 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu118') or (extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu124') or (extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu126') or (extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu128') or (extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu130') or (extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu124') or (extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu126') or (extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu128') or (extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu130') or (extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu126') or (extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu128') or (extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu130') or (extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (extra != 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (extra != 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-jp6-cu126')" },
     { name = "typing-extensions", marker = "(extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu118') or (extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu12') or (extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (extra == 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-cu12') or (extra == 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (extra == 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu126') or (extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu130') or (extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu118') or (extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu124') or (extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu128') or (extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu124') or (extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu128') or (extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu128') or (extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu126') or (extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu130') or (extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu118') or (extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu124') or (extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu124') or (extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu128') or (extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra != 'extra-16-inference-models-torch-cu124' and extra != 'extra-16-inference-models-torch-cu128' and extra != 'extra-16-inference-models-torch-jp6-cu126') or (extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128') or (extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu118' and extra != 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128') or (extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu124' and extra != 'extra-16-inference-models-torch-cu130' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu126' and extra != 'extra-16-inference-models-torch-cu130' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu118' and extra != 'extra-16-inference-models-torch-cu126' and extra != 'extra-16-inference-models-torch-cu130' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra != 'extra-16-inference-models-torch-cu124' and extra != 'extra-16-inference-models-torch-cu128' and extra != 'extra-16-inference-models-torch-jp6-cu126')" },
 ]
 wheels = [
@@ -5475,7 +5241,7 @@ name = "tqdm"
 version = "4.67.1"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "colorama", marker = "sys_platform == 'win32' or (extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu118') or (extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu12') or (extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (extra == 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-cu12') or (extra == 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (extra == 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu118') or (extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu124') or (extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu126') or (extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu128') or (extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu130') or (extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu124') or (extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu126') or (extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu128') or (extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu130') or (extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu126') or (extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu128') or (extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu130') or (extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128') or (extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu130') or (extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (extra == 'extra-16-inference-models-torch-cu130' and extra == 'extra-16-inference-models-torch-jp6-cu126')" },
+    { name = "colorama", marker = "sys_platform == 'win32' or (extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu118') or (extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu12') or (extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (extra == 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-cu12') or (extra == 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (extra == 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-onnx-jp6-cu126')" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/a8/4b/29b4ef32e036bb34e4ab51796dd745cdba7ed47ad142a9f4a1eb8e0c744d/tqdm-4.67.1.tar.gz", hash = "sha256:f8aef9c52c08c13a65f30ea34f4e5aac3fd1a34959879d7e59e63027286627f2", size = 169737, upload-time = "2024-11-24T20:12:22.481Z" }
 wheels = [
@@ -5550,24 +5316,6 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/24/5f/950fb373bf9c01ad4eb5a8cd5eaf32cdf9e238c02f9293557a2129b9c4ac/triton-3.3.1-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:9999e83aba21e1a78c1f36f21bce621b77bcaa530277a50484a7cb4a822f6e43", size = 155669138, upload-time = "2025-05-29T23:39:51.771Z" },
 ]
 
-[[package]]
-name = "triton"
-version = "3.4.0"
-source = { registry = "https://pypi.org/simple" }
-resolution-markers = [
-    "(python_full_version >= '3.12' and platform_machine != 'aarch64' and platform_machine != 's390x' and sys_platform == 'linux') or (python_full_version >= '3.12' and platform_machine != 's390x' and sys_platform != 'darwin' and sys_platform != 'linux')",
-    "(python_full_version == '3.11.*' and platform_machine != 'aarch64' and platform_machine != 's390x' and sys_platform == 'linux') or (python_full_version == '3.11.*' and platform_machine != 's390x' and sys_platform != 'darwin' and sys_platform != 'linux')",
-    "(python_full_version < '3.11' and platform_machine != 'aarch64' and platform_machine != 's390x' and sys_platform == 'linux') or (python_full_version < '3.11' and platform_machine != 's390x' and sys_platform != 'darwin' and sys_platform != 'linux')",
-]
-dependencies = [
-    { name = "setuptools", marker = "(platform_machine != 'aarch64' and platform_machine != 's390x' and sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu126') or (platform_machine != 'aarch64' and platform_machine != 's390x' and sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra != 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu126') or (platform_machine != 'aarch64' and platform_machine != 's390x' and sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra != 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine != 'aarch64' and platform_machine != 's390x' and sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra != 'extra-16-inference-models-torch-cu124' and extra != 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine != 'aarch64' and platform_machine != 's390x' and sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra != 'extra-16-inference-models-torch-cu124' and extra != 'extra-16-inference-models-torch-cu128' and extra != 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine != 'aarch64' and platform_machine != 's390x' and sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra != 'extra-16-inference-models-torch-cu124' and extra != 'extra-16-inference-models-torch-cu128' and extra != 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine != 's390x' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu126') or (platform_machine != 's390x' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra != 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu126') or (platform_machine != 's390x' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra != 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine != 's390x' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra != 'extra-16-inference-models-torch-cu124' and extra != 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine != 's390x' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra != 'extra-16-inference-models-torch-cu124' and extra != 'extra-16-inference-models-torch-cu128' and extra != 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine != 's390x' and sys_platform != 'darwin' and sys_platform != 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra != 'extra-16-inference-models-torch-cu124' and extra != 'extra-16-inference-models-torch-cu128' and extra != 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu118') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu12') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-cu12') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu118') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu124') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu126') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu128') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu124') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu126') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu128') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu126') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu128') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine != 's390x' and sys_platform != 'linux' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine != 's390x' and sys_platform != 'linux' and extra != 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine != 's390x' and sys_platform != 'linux' and extra != 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine == 'aarch64' and sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128') or (platform_machine == 'aarch64' and sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine == 'aarch64' and sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine == 'aarch64' and sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra != 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128') or (platform_machine == 'aarch64' and sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra != 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine == 'aarch64' and sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra != 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine == 'aarch64' and sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra != 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine == 'aarch64' and sys_platform == 'linux' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra != 'extra-16-inference-models-torch-cpu' and extra != 'extra-16-inference-models-torch-cu118' and extra != 'extra-16-inference-models-torch-cu124' and extra != 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu118') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu12') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-cu12') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu118') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu124') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu126') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu128') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu124') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu126') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu128') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu126') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu128') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine != 'aarch64' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu118') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu12') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-cu12') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu118') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu124') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu126') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu128') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu124') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu126') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu128') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu126') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu128') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine == 's390x' and extra == 'extra-16-inference-models-torch-cu130' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (platform_machine != 'aarch64' and extra != 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (platform_machine != 'aarch64' and extra != 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'darwin' and extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu118') or (sys_platform == 'darwin' and extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu12') or (sys_platform == 'darwin' and extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (sys_platform == 'darwin' and extra == 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-cu12') or (sys_platform == 'darwin' and extra == 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (sys_platform == 'darwin' and extra == 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu118') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-cu12') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-onnx-cpu' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-cu12') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-onnx-cu118' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-onnx-jp6-cu126') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu118') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu124') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu126') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu128') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu124') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu126') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu128') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu126') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu128') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform == 'linux' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu118') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu124') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu128') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu124') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu128') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu128') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-jp6-cu126' and extra == 'extra-16-inference-models-torch-cu130' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'linux' and extra != 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform == 'linux' and extra != 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu118') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu124') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu128') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cpu' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu124') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu128') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu118' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu128') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu124' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu128') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu126' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-cu130') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu128' and extra == 'extra-16-inference-models-torch-jp6-cu126') or (sys_platform == 'darwin' and extra != 'extra-16-inference-models-onnx-cpu' and extra != 'extra-16-inference-models-onnx-cu118' and extra != 'extra-16-inference-models-onnx-cu12' and extra == 'extra-16-inference-models-torch-cu130' and extra == 'extra-16-inference-models-torch-jp6-cu126')" },
-]
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/62/ee/0ee5f64a87eeda19bbad9bc54ae5ca5b98186ed00055281fd40fb4beb10e/triton-3.4.0-cp310-cp310-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:7ff2785de9bc02f500e085420273bb5cc9c9bb767584a4aa28d6e360cec70128", size = 155430069, upload-time = "2025-07-30T19:58:21.715Z" },
-    { url = "https://files.pythonhosted.org/packages/7d/39/43325b3b651d50187e591eefa22e236b2981afcebaefd4f2fc0ea99df191/triton-3.4.0-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:7b70f5e6a41e52e48cfc087436c8a28c17ff98db369447bcaff3b887a3ab4467", size = 155531138, upload-time = "2025-07-30T19:58:29.908Z" },
-    { url = "https://files.pythonhosted.org/packages/d0/66/b1eb52839f563623d185f0927eb3530ee4d5ffe9d377cdaf5346b306689e/triton-3.4.0-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:31c1d84a5c0ec2c0f8e8a072d7fd150cab84a9c239eaddc6706c081bfae4eb04", size = 155560068, upload-time = "2025-07-30T19:58:37.081Z" },
-]
-
 [[package]]
 name = "typer"
 version = "0.24.1"
diff --git a/requirements/requirements.transformers.txt b/requirements/requirements.transformers.txt
index 446e59c65d..5984aab5cd 100644
--- a/requirements/requirements.transformers.txt
+++ b/requirements/requirements.transformers.txt
@@ -1,6 +1,6 @@
 torch>=2.0.1,<3.0.0
 torchvision>=0.15.0
-transformers>=4.57.3
+transformers>=4.57.3,<=5.9.0
 timm>=1.0.0
 accelerate>=1.0.0,<2.0.0
 einops>=0.7.0

From d82595f2e288a168f4cda0a4b58fef9f5a27efe2 Mon Sep 17 00:00:00 2001
From: Aseem Saxena <aseem.bits@gmail.com>
Date: Thu, 4 Jun 2026 19:56:37 +0000
Subject: [PATCH 32/76] Gate RF-DETR fast preprocess by pipeline depth

---
 .../inference_models/configuration.py         | 38 +++++++++++++++
 .../rfdetr_instance_segmentation_trt.py       | 40 +++++++++++-----
 .../tests/unit_tests/test_configuration.py    | 48 +++++++++++++++++++
 3 files changed, 114 insertions(+), 12 deletions(-)
 create mode 100644 inference_models/tests/unit_tests/test_configuration.py

diff --git a/inference_models/inference_models/configuration.py b/inference_models/inference_models/configuration.py
index 5b10d0daf3..6c89d889cd 100644
--- a/inference_models/inference_models/configuration.py
+++ b/inference_models/inference_models/configuration.py
@@ -1,8 +1,10 @@
 import os
 import warnings
+from typing import Optional
 
 import torch
 
+from inference_models.errors import InvalidEnvVariable
 from inference_models.utils.environment import (
     get_boolean_from_env,
     get_comma_separated_list_of_integers_from_env,
@@ -299,6 +301,42 @@
     variable_name="INFERENCE_MODELS_RFDETR_TRITON_PREPROC_ENABLED",
     default=DEFAULT_INFERENCE_MODELS_RFDETR_TRITON_PREPROC_ENABLED,
 )
+RFDETR_PIPELINE_DEPTH_ENV_NAME = "RFDETR_PIPELINE_DEPTH"
+DEFAULT_RFDETR_PIPELINE_DEPTH = 1
+MIN_RFDETR_PIPELINE_DEPTH = 1
+
+
+def parse_rfdetr_pipeline_depth(value: Optional[str]) -> int:
+    """Parse and validate the RF-DETR streaming pipeline depth."""
+    if value is None:
+        return DEFAULT_RFDETR_PIPELINE_DEPTH
+    try:
+        parsed = int(value)
+    except (TypeError, ValueError):
+        raise InvalidEnvVariable(
+            message=(
+                f"Expected environment variable `{RFDETR_PIPELINE_DEPTH_ENV_NAME}` "
+                f"to be an integer but got '{value}'"
+            ),
+            help_url="https://inference-models.roboflow.com/errors/runtime-environment/#invalidenvvariable",
+        )
+    if parsed < MIN_RFDETR_PIPELINE_DEPTH:
+        raise InvalidEnvVariable(
+            message=(
+                f"Expected environment variable `{RFDETR_PIPELINE_DEPTH_ENV_NAME}` "
+                f"to be >= {MIN_RFDETR_PIPELINE_DEPTH} but got '{value}'"
+            ),
+            help_url="https://inference-models.roboflow.com/errors/runtime-environment/#invalidenvvariable",
+        )
+    return parsed
+
+
+def get_rfdetr_pipeline_depth() -> int:
+    """Read and validate ``RFDETR_PIPELINE_DEPTH`` from the environment."""
+    return parse_rfdetr_pipeline_depth(os.getenv(RFDETR_PIPELINE_DEPTH_ENV_NAME))
+
+
+RFDETR_PIPELINE_DEPTH = get_rfdetr_pipeline_depth()
 INFERENCE_MODELS_ROBOFLOW_INSTANT_DEFAULT_CONFIDENCE = get_float_from_env(
     variable_name="INFERENCE_MODELS_ROBOFLOW_INSTANT_DEFAULT_CONFIDENCE",
     default=0.99,
diff --git a/inference_models/inference_models/models/rfdetr/rfdetr_instance_segmentation_trt.py b/inference_models/inference_models/models/rfdetr/rfdetr_instance_segmentation_trt.py
index 1e570e3f9f..7c0fea07ff 100644
--- a/inference_models/inference_models/models/rfdetr/rfdetr_instance_segmentation_trt.py
+++ b/inference_models/inference_models/models/rfdetr/rfdetr_instance_segmentation_trt.py
@@ -13,6 +13,7 @@
 from inference_models.configuration import (
     DEFAULT_DEVICE,
     INFERENCE_MODELS_RFDETR_DEFAULT_CONFIDENCE,
+    get_rfdetr_pipeline_depth,
 )
 from inference_models.entities import ColorFormat, Confidence
 from inference_models.errors import (
@@ -221,10 +222,12 @@ def __init__(
         self._trt_cuda_graph_cache = trt_cuda_graph_cache
         self._lock = threading.Lock()
         self._inference_stream = torch.cuda.Stream(device=self._device)
-        self._pre_process_cuda_stream = torch.cuda.Stream(device=self._device)
         self._thread_local_storage = threading.local()
         self.recommended_parameters = recommended_parameters
-        self._fast_preprocess_runtime = FastPreprocessRuntime(device=self._device)
+        self._stream_pipeline_enabled = get_rfdetr_pipeline_depth() > 1
+        if self._stream_pipeline_enabled:
+            self._pre_process_cuda_stream = torch.cuda.Stream(device=self._device)
+            self._fast_preprocess_runtime = FastPreprocessRuntime(device=self._device)
 
     @property
     def class_names(self) -> List[str]:
@@ -242,14 +245,16 @@ def pre_process(
         pre_processing_overrides: Optional[PreProcessingOverrides] = None,
         **kwargs,
     ) -> Tuple[torch.Tensor, List[PreProcessingMetadata]]:
-        fast = self._fast_preprocess_runtime.try_preprocess(
-            images=images,
-            input_color_format=input_color_format,
-            image_size=image_size,
-            image_pre_processing=self._inference_config.image_pre_processing,
-            network_input=self._inference_config.network_input,
-            stream=self._pre_process_stream,
-        )
+        fast = None
+        if self._stream_pipeline_enabled:
+            fast = self._fast_preprocess_runtime.try_preprocess(
+                images=images,
+                input_color_format=input_color_format,
+                image_size=image_size,
+                image_pre_processing=self._inference_config.image_pre_processing,
+                network_input=self._inference_config.network_input,
+                stream=self._pre_process_stream,
+            )
         if fast is not None:
             self._fast_preproc_event = fast.ready_event
             return fast.tensor, fast.metadata
@@ -264,7 +269,12 @@ def pre_process(
                 pre_processing_overrides=pre_processing_overrides,
             )
         self._pre_process_stream.synchronize()
-        pre_processed_images._pre_processing_meta = pre_processing_meta  # type: ignore[attr-defined]
+        if self._stream_pipeline_enabled:
+            setattr(
+                pre_processed_images,
+                "_pre_processing_meta",
+                pre_processing_meta,
+            )
         return pre_processed_images, pre_processing_meta
 
     def forward(
@@ -345,7 +355,13 @@ def post_process(
 
     @property
     def _pre_process_stream(self) -> torch.cuda.Stream:
-        return self._pre_process_cuda_stream
+        if self._stream_pipeline_enabled:
+            return self._pre_process_cuda_stream
+        if not hasattr(self._thread_local_storage, "pre_process_stream"):
+            self._thread_local_storage.pre_process_stream = torch.cuda.Stream(
+                device=self._device
+            )
+        return self._thread_local_storage.pre_process_stream
 
     @property
     def _post_process_stream(self) -> torch.cuda.Stream:
diff --git a/inference_models/tests/unit_tests/test_configuration.py b/inference_models/tests/unit_tests/test_configuration.py
new file mode 100644
index 0000000000..e1c5f55355
--- /dev/null
+++ b/inference_models/tests/unit_tests/test_configuration.py
@@ -0,0 +1,48 @@
+import pytest
+
+from inference_models.configuration import (
+    DEFAULT_RFDETR_PIPELINE_DEPTH,
+    get_rfdetr_pipeline_depth,
+    parse_rfdetr_pipeline_depth,
+)
+from inference_models.errors import InvalidEnvVariable
+
+
+def test_parse_rfdetr_pipeline_depth_uses_default_when_env_missing() -> None:
+    assert parse_rfdetr_pipeline_depth(None) == DEFAULT_RFDETR_PIPELINE_DEPTH
+
+
+@pytest.mark.parametrize(
+    "value, expected",
+    [
+        ("1", 1),
+        ("2", 2),
+        (" 3 ", 3),
+    ],
+)
+def test_parse_rfdetr_pipeline_depth_accepts_positive_integers(
+    value: str,
+    expected: int,
+) -> None:
+    assert parse_rfdetr_pipeline_depth(value) == expected
+
+
+@pytest.mark.parametrize("value", ["invalid", "1.5", "", "0", "-1"])
+def test_parse_rfdetr_pipeline_depth_rejects_invalid_values(value: str) -> None:
+    with pytest.raises(InvalidEnvVariable):
+        parse_rfdetr_pipeline_depth(value)
+
+
+def test_get_rfdetr_pipeline_depth_reads_environment(monkeypatch) -> None:
+    monkeypatch.setenv("RFDETR_PIPELINE_DEPTH", "2")
+    assert get_rfdetr_pipeline_depth() == 2
+
+
+@pytest.mark.parametrize("value", ["0", "-4", "invalid"])
+def test_get_rfdetr_pipeline_depth_rejects_invalid_environment(
+    monkeypatch,
+    value: str,
+) -> None:
+    monkeypatch.setenv("RFDETR_PIPELINE_DEPTH", value)
+    with pytest.raises(InvalidEnvVariable):
+        get_rfdetr_pipeline_depth()

From d6d131c5cf02d373b4e74a642ca17150bcb60300 Mon Sep 17 00:00:00 2001
From: Aseem Saxena <aseem.bits@gmail.com>
Date: Thu, 4 Jun 2026 20:06:32 +0000
Subject: [PATCH 33/76] Gate RF-DETR preprocess by preproc flag

---
 .../inference_models/configuration.py         | 38 ---------------
 .../rfdetr_instance_segmentation_trt.py       | 13 ++---
 .../tests/unit_tests/test_configuration.py    | 48 -------------------
 3 files changed, 5 insertions(+), 94 deletions(-)
 delete mode 100644 inference_models/tests/unit_tests/test_configuration.py

diff --git a/inference_models/inference_models/configuration.py b/inference_models/inference_models/configuration.py
index 6c89d889cd..5b10d0daf3 100644
--- a/inference_models/inference_models/configuration.py
+++ b/inference_models/inference_models/configuration.py
@@ -1,10 +1,8 @@
 import os
 import warnings
-from typing import Optional
 
 import torch
 
-from inference_models.errors import InvalidEnvVariable
 from inference_models.utils.environment import (
     get_boolean_from_env,
     get_comma_separated_list_of_integers_from_env,
@@ -301,42 +299,6 @@
     variable_name="INFERENCE_MODELS_RFDETR_TRITON_PREPROC_ENABLED",
     default=DEFAULT_INFERENCE_MODELS_RFDETR_TRITON_PREPROC_ENABLED,
 )
-RFDETR_PIPELINE_DEPTH_ENV_NAME = "RFDETR_PIPELINE_DEPTH"
-DEFAULT_RFDETR_PIPELINE_DEPTH = 1
-MIN_RFDETR_PIPELINE_DEPTH = 1
-
-
-def parse_rfdetr_pipeline_depth(value: Optional[str]) -> int:
-    """Parse and validate the RF-DETR streaming pipeline depth."""
-    if value is None:
-        return DEFAULT_RFDETR_PIPELINE_DEPTH
-    try:
-        parsed = int(value)
-    except (TypeError, ValueError):
-        raise InvalidEnvVariable(
-            message=(
-                f"Expected environment variable `{RFDETR_PIPELINE_DEPTH_ENV_NAME}` "
-                f"to be an integer but got '{value}'"
-            ),
-            help_url="https://inference-models.roboflow.com/errors/runtime-environment/#invalidenvvariable",
-        )
-    if parsed < MIN_RFDETR_PIPELINE_DEPTH:
-        raise InvalidEnvVariable(
-            message=(
-                f"Expected environment variable `{RFDETR_PIPELINE_DEPTH_ENV_NAME}` "
-                f"to be >= {MIN_RFDETR_PIPELINE_DEPTH} but got '{value}'"
-            ),
-            help_url="https://inference-models.roboflow.com/errors/runtime-environment/#invalidenvvariable",
-        )
-    return parsed
-
-
-def get_rfdetr_pipeline_depth() -> int:
-    """Read and validate ``RFDETR_PIPELINE_DEPTH`` from the environment."""
-    return parse_rfdetr_pipeline_depth(os.getenv(RFDETR_PIPELINE_DEPTH_ENV_NAME))
-
-
-RFDETR_PIPELINE_DEPTH = get_rfdetr_pipeline_depth()
 INFERENCE_MODELS_ROBOFLOW_INSTANT_DEFAULT_CONFIDENCE = get_float_from_env(
     variable_name="INFERENCE_MODELS_ROBOFLOW_INSTANT_DEFAULT_CONFIDENCE",
     default=0.99,
diff --git a/inference_models/inference_models/models/rfdetr/rfdetr_instance_segmentation_trt.py b/inference_models/inference_models/models/rfdetr/rfdetr_instance_segmentation_trt.py
index 7c0fea07ff..4ec56543e2 100644
--- a/inference_models/inference_models/models/rfdetr/rfdetr_instance_segmentation_trt.py
+++ b/inference_models/inference_models/models/rfdetr/rfdetr_instance_segmentation_trt.py
@@ -13,7 +13,7 @@
 from inference_models.configuration import (
     DEFAULT_DEVICE,
     INFERENCE_MODELS_RFDETR_DEFAULT_CONFIDENCE,
-    get_rfdetr_pipeline_depth,
+    INFERENCE_MODELS_RFDETR_TRITON_PREPROC_ENABLED,
 )
 from inference_models.entities import ColorFormat, Confidence
 from inference_models.errors import (
@@ -224,9 +224,8 @@ def __init__(
         self._inference_stream = torch.cuda.Stream(device=self._device)
         self._thread_local_storage = threading.local()
         self.recommended_parameters = recommended_parameters
-        self._stream_pipeline_enabled = get_rfdetr_pipeline_depth() > 1
-        if self._stream_pipeline_enabled:
-            self._pre_process_cuda_stream = torch.cuda.Stream(device=self._device)
+        self._fast_preprocess_enabled = INFERENCE_MODELS_RFDETR_TRITON_PREPROC_ENABLED
+        if self._fast_preprocess_enabled:
             self._fast_preprocess_runtime = FastPreprocessRuntime(device=self._device)
 
     @property
@@ -246,7 +245,7 @@ def pre_process(
         **kwargs,
     ) -> Tuple[torch.Tensor, List[PreProcessingMetadata]]:
         fast = None
-        if self._stream_pipeline_enabled:
+        if self._fast_preprocess_enabled:
             fast = self._fast_preprocess_runtime.try_preprocess(
                 images=images,
                 input_color_format=input_color_format,
@@ -269,7 +268,7 @@ def pre_process(
                 pre_processing_overrides=pre_processing_overrides,
             )
         self._pre_process_stream.synchronize()
-        if self._stream_pipeline_enabled:
+        if self._fast_preprocess_enabled:
             setattr(
                 pre_processed_images,
                 "_pre_processing_meta",
@@ -355,8 +354,6 @@ def post_process(
 
     @property
     def _pre_process_stream(self) -> torch.cuda.Stream:
-        if self._stream_pipeline_enabled:
-            return self._pre_process_cuda_stream
         if not hasattr(self._thread_local_storage, "pre_process_stream"):
             self._thread_local_storage.pre_process_stream = torch.cuda.Stream(
                 device=self._device
diff --git a/inference_models/tests/unit_tests/test_configuration.py b/inference_models/tests/unit_tests/test_configuration.py
deleted file mode 100644
index e1c5f55355..0000000000
--- a/inference_models/tests/unit_tests/test_configuration.py
+++ /dev/null
@@ -1,48 +0,0 @@
-import pytest
-
-from inference_models.configuration import (
-    DEFAULT_RFDETR_PIPELINE_DEPTH,
-    get_rfdetr_pipeline_depth,
-    parse_rfdetr_pipeline_depth,
-)
-from inference_models.errors import InvalidEnvVariable
-
-
-def test_parse_rfdetr_pipeline_depth_uses_default_when_env_missing() -> None:
-    assert parse_rfdetr_pipeline_depth(None) == DEFAULT_RFDETR_PIPELINE_DEPTH
-
-
-@pytest.mark.parametrize(
-    "value, expected",
-    [
-        ("1", 1),
-        ("2", 2),
-        (" 3 ", 3),
-    ],
-)
-def test_parse_rfdetr_pipeline_depth_accepts_positive_integers(
-    value: str,
-    expected: int,
-) -> None:
-    assert parse_rfdetr_pipeline_depth(value) == expected
-
-
-@pytest.mark.parametrize("value", ["invalid", "1.5", "", "0", "-1"])
-def test_parse_rfdetr_pipeline_depth_rejects_invalid_values(value: str) -> None:
-    with pytest.raises(InvalidEnvVariable):
-        parse_rfdetr_pipeline_depth(value)
-
-
-def test_get_rfdetr_pipeline_depth_reads_environment(monkeypatch) -> None:
-    monkeypatch.setenv("RFDETR_PIPELINE_DEPTH", "2")
-    assert get_rfdetr_pipeline_depth() == 2
-
-
-@pytest.mark.parametrize("value", ["0", "-4", "invalid"])
-def test_get_rfdetr_pipeline_depth_rejects_invalid_environment(
-    monkeypatch,
-    value: str,
-) -> None:
-    monkeypatch.setenv("RFDETR_PIPELINE_DEPTH", value)
-    with pytest.raises(InvalidEnvVariable):
-        get_rfdetr_pipeline_depth()

From 80fe467b6b6501af6cc076f94dfcd196284dca18 Mon Sep 17 00:00:00 2001
From: Sergii Bondariev <sergii@roboflow.com>
Date: Thu, 4 Jun 2026 13:21:01 -0700
Subject: [PATCH 34/76] Support RF-DETR Keypoints model (#2401)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* init rfdetr keypoints

* post process keypoints

* bump version

* update inference-model version to 0.29.0-rc1

* update requirements cpu and gpu with inference_models 0.29.0rc1

* upd uv lock

* add inf adapter for rfdetr keypoint preview

* normalize kp scores

* upd version

* Bump inference-models version

* Update inference dependencies

---------

Co-authored-by: Paweł Pęczek <146137186+PawelPeczek-Roboflow@users.noreply.github.com>
Co-authored-by: Paweł Pęczek <pawel@roboflow.com>
---
 inference/core/version.py                     |   2 +-
 inference/models/utils.py                     |   6 +
 .../inference_models/configuration.py         |   4 +
 .../models/auto_loaders/models_registry.py    |  14 +
 .../models/common/roboflow/model_packages.py  |  20 +-
 .../inference_models/models/rfdetr/common.py  | 216 +++++++++++++-
 .../rfdetr_key_points_detection_onnx.py       | 269 ++++++++++++++++++
 inference_models/pyproject.toml               |   2 +-
 inference_models/uv.lock                      |   2 +-
 requirements/requirements.cpu.txt             |   2 +-
 requirements/requirements.gpu.txt             |   2 +-
 requirements/requirements.jetson.txt          |   2 +-
 requirements/requirements.vino.txt            |   2 +-
 13 files changed, 533 insertions(+), 10 deletions(-)
 create mode 100644 inference_models/inference_models/models/rfdetr/rfdetr_key_points_detection_onnx.py

diff --git a/inference/core/version.py b/inference/core/version.py
index 8099760fb0..692e5cca3f 100644
--- a/inference/core/version.py
+++ b/inference/core/version.py
@@ -1,4 +1,4 @@
-__version__ = "1.2.13"
+__version__ = "1.3.0"
 
 
 if __name__ == "__main__":
diff --git a/inference/models/utils.py b/inference/models/utils.py
index 188f01e657..4ee4516e71 100644
--- a/inference/models/utils.py
+++ b/inference/models/utils.py
@@ -996,6 +996,12 @@ def get_roboflow_model(*args, **kwargs):
         InferenceModelsSemanticSegmentationAdapter
     )
 
+    # RFDETR keypoint detection is inference_models-only (no legacy implementation),
+    # so we add entries directly rather than swapping existing ones.
+    ROBOFLOW_MODEL_TYPES[("keypoint-detection", "rfdetr-keypoint-preview")] = (
+        InferenceModelsKeyPointsDetectionAdapter
+    )
+
     # YOLOLite is inference_models-only (no legacy implementation),
     # so we add entries directly rather than swapping existing ones.
     for variant in [
diff --git a/inference_models/inference_models/configuration.py b/inference_models/inference_models/configuration.py
index 3b7fb89bce..5b8c7dd699 100644
--- a/inference_models/inference_models/configuration.py
+++ b/inference_models/inference_models/configuration.py
@@ -289,6 +289,10 @@
     variable_name="INFERENCE_MODELS_RFDETR_DEFAULT_CONFIDENCE",
     default=INFERENCE_MODELS_DEFAULT_CONFIDENCE,
 )
+INFERENCE_MODELS_RFDETR_DEFAULT_KEY_POINTS_THRESHOLD = get_float_from_env(
+    variable_name="INFERENCE_MODELS_DETR_DEFAULT_KEY_POINTS_THRESHOLD",
+    default=0.3,
+)
 INFERENCE_MODELS_ROBOFLOW_INSTANT_DEFAULT_CONFIDENCE = get_float_from_env(
     variable_name="INFERENCE_MODELS_ROBOFLOW_INSTANT_DEFAULT_CONFIDENCE",
     default=0.99,
diff --git a/inference_models/inference_models/models/auto_loaders/models_registry.py b/inference_models/inference_models/models/auto_loaders/models_registry.py
index bd3567ae65..43aa78fc15 100644
--- a/inference_models/inference_models/models/auto_loaders/models_registry.py
+++ b/inference_models/inference_models/models/auto_loaders/models_registry.py
@@ -362,6 +362,20 @@ class RegistryEntry:
         module_name="inference_models.models.rfdetr.rfdetr_instance_segmentation_trt",
         class_name="RFDetrForInstanceSegmentationTRT",
     ),
+    ("rfdetr", KEYPOINT_DETECTION_TASK, BackendType.ONNX): RegistryEntry(
+        model_class=LazyClass(
+            module_name="inference_models.models.rfdetr.rfdetr_key_points_detection_onnx",
+            class_name="RFDetrForKeyPointsONNX",
+        ),
+        supported_model_features={
+            "resolution",
+            "patch_size",
+            "num_windows",
+            "dec_layers",
+            "num_queries",
+            "num_select",
+        },
+    ),
     ("moondream2", VLM_TASK, BackendType.HF): LazyClass(
         module_name="inference_models.models.moondream2.moondream2_hf",
         class_name="MoonDream2HF",
diff --git a/inference_models/inference_models/models/common/roboflow/model_packages.py b/inference_models/inference_models/models/common/roboflow/model_packages.py
index 80ee469ff2..cc45a31f25 100644
--- a/inference_models/inference_models/models/common/roboflow/model_packages.py
+++ b/inference_models/inference_models/models/common/roboflow/model_packages.py
@@ -86,6 +86,7 @@ class via negative indexing in downstream consumers (`class_names[-1]`,
 
 def parse_key_points_metadata(
     key_points_metadata_path: str,
+    classes_re_mapping=None,
 ) -> Tuple[List[List[str]], List[List[Tuple[int, int]]]]:
     try:
         parsed_config = read_json(path=key_points_metadata_path)
@@ -93,24 +94,39 @@ def parse_key_points_metadata(
             raise ValueError(
                 "config should contain list of key points descriptions for each instance"
             )
-        class_names: List[Optional[List[str]]] = [None] * len(parsed_config)
-        skeletons: List[Optional[List[Tuple[int, int]]]] = [None] * len(parsed_config)
+        if classes_re_mapping is not None:
+            class_names = [None] * len(classes_re_mapping.remaining_class_ids)
+            skeletons = [None] * len(classes_re_mapping.remaining_class_ids)
+        else:
+            class_names: List[Optional[List[str]]] = [None] * len(parsed_config)
+            skeletons: List[Optional[List[Tuple[int, int]]]] = [None] * len(parsed_config)
+
         for instance_key_point_description in parsed_config:
             if "object_class_id" not in instance_key_point_description:
                 raise ValueError(
                     "instance key point description lack 'object_class_id' key"
                 )
             object_class_id: int = instance_key_point_description["object_class_id"]
+
+            if classes_re_mapping is not None:
+                object_class_id = int(classes_re_mapping.class_mapping[object_class_id])
+
+                if object_class_id == -1:
+                    continue
+
             if not 0 <= object_class_id < len(class_names):
                 raise ValueError("`object_class_id` field point invalid class")
+
             if "keypoints" not in instance_key_point_description:
                 raise ValueError(
                     f"`keypoints` field not available in config for class with id {object_class_id}"
                 )
+
             class_names[object_class_id] = _retrieve_key_points_names(
                 key_points=instance_key_point_description["keypoints"],
             )
             key_points_count = len(class_names[object_class_id])
+
             if "edges" not in instance_key_point_description:
                 raise ValueError(
                     f"`edges` field not available in config for class with id {object_class_id}"
diff --git a/inference_models/inference_models/models/rfdetr/common.py b/inference_models/inference_models/models/rfdetr/common.py
index a3ae26cd29..d353b33955 100644
--- a/inference_models/inference_models/models/rfdetr/common.py
+++ b/inference_models/inference_models/models/rfdetr/common.py
@@ -3,7 +3,7 @@
 import torch
 from torchvision.transforms import functional
 
-from inference_models import Detections, InstanceDetections, InstancesRLEMasks
+from inference_models import Detections, InstanceDetections, InstancesRLEMasks, KeyPoints
 from inference_models.entities import ImageDimensions
 from inference_models.errors import CorruptedModelPackageError
 from inference_models.models.common.roboflow.model_packages import (
@@ -335,3 +335,217 @@ def post_process_instance_segmentation_results_to_rle_masks(
                 )
             )
     return final_results
+
+
+def cxcywh_to_xyxy(boxes):
+    boxes = boxes.clone()
+    boxes[..., 0] = boxes[..., 0] - boxes[..., 2] / 2
+    boxes[..., 1] = boxes[..., 1] - boxes[..., 3] / 2
+    boxes[..., 2] = boxes[..., 0] + boxes[..., 2]
+    boxes[..., 3] = boxes[..., 1] + boxes[..., 3]
+    return boxes
+
+
+def post_process_keypoint_detection_results(
+    bboxes: torch.Tensor,     # [B, N_q, 4] cxcywh, normalized [0, 1]
+    out_logits: torch.Tensor,     # [B, N_q, C]
+    out_keypoints: torch.Tensor,  # [B, N_q, K_padded, D]
+    pre_processing_meta: List[PreProcessingMetadata],
+    threshold: Union[float, torch.Tensor],
+    key_points_threshold: float,
+    num_classes: int,
+    classes_re_mapping: Optional[ClassesReMapping],
+    key_points_classes_for_instances,
+    key_points_slots_in_prediction,
+    device: torch.device,
+) -> Tuple[List[KeyPoints], Optional[List[Detections]]]:
+    # RF-DETR keypoint heads emit one slot per (class, max_keypoint), padded with zeros for
+    # classes that have fewer keypoints than the max. The preview model is trained with
+    # number of keypoints per class including background [0, 17], so K_padded = 17 * 2 = 34.
+    B, N_q, C = out_logits.shape
+    K_padded = out_keypoints.shape[2]
+    D = out_keypoints.shape[3]
+    assert K_padded % C == 0, f"K_padded={K_padded} not divisible by num_classes={C}"
+    K_per_class = K_padded // C
+
+    scores = out_logits.sigmoid()
+    flat_scores = scores.view(B, -1)
+    num_select = flat_scores.shape[1]
+
+    topk_values, topk_indexes = torch.topk(flat_scores, num_select, dim=1)
+    scores = topk_values
+    topk_boxes = topk_indexes // C  # [B, num_select] query indices
+    labels = topk_indexes % C       # [B, num_select] class indices
+
+    bboxes = torch.gather(bboxes, 1, topk_boxes.unsqueeze(-1).repeat(1, 1, 4))
+    bboxes = cxcywh_to_xyxy(bboxes)
+
+    # Gather keypoints per top-k query, then select the per-class slot.
+    # Keep all D=8 dims: [x, y, findable_logit, visible_logit, log_l11, l21, log_l22, class_logit]
+    # The model trains a 2D Gaussian per keypoint with precision matrix P = L L^T;
+    # log(sqrt(det P)) = log_l11 + log_l22 — the model's own predicted localization sharpness.
+    kp_gather_idx = topk_boxes.unsqueeze(-1).unsqueeze(-1).expand(B, num_select, K_padded, D)
+    keypoints_g = torch.gather(out_keypoints, 1, kp_gather_idx)  # [B, num_select, K_padded, D]
+    keypoints_g = keypoints_g.view(B, num_select, C, K_per_class, D)
+
+    batch_idx = torch.arange(B, device=labels.device).unsqueeze(-1).expand_as(labels)
+    query_idx = torch.arange(num_select, device=labels.device).unsqueeze(0).expand_as(labels)
+    keypoints_sel = keypoints_g[batch_idx, query_idx, labels]  # [B, num_select, K_per_class, D=8]
+
+    keypoints_xy = keypoints_sel[..., :2]
+    keypoints_conf = keypoints_sel[..., 2:3].sigmoid()  # findable [0,1] per kp
+
+    # Score fusion: object confidence × inverse mean expected squared error of findable kps.
+    #   score = cs · (Σ_k w_k · trace(Σ_k) / Σ_k w_k)^(-α)
+    # where Σ_k = (L_k L_k^T)^{-1} (model's predicted per-keypoint covariance) and
+    # trace(Σ_k) = 1/l11² + 1/l22² + l21²/(l11·l22)² = E[(x-μ_x)² + (y-μ_y)²].
+    # Note: σ_k (COCO bandwidths) NOT used — model's L already encodes per-kp difficulty
+    # implicitly. Sigma-free form transfers to any keypoint domain. α=0.20 seems to work well.
+    log_l11 = keypoints_sel[..., 4]
+    l21     = keypoints_sel[..., 5]
+    log_l22 = keypoints_sel[..., 6]
+    # log(trace) per kp via logsumexp over the three log-terms (numerical stability)
+    log_t1 = -2.0 * log_l11                                                   # log(1/l11²)
+    log_t2 = -2.0 * log_l22                                                   # log(1/l22²)
+    log_t3 = 2.0 * torch.log(l21.abs().clamp(min=1e-12)) + log_t1 + log_t2    # log(l21²/(l11·l22)²)
+    log_trace = torch.logsumexp(torch.stack([log_t1, log_t2, log_t3], dim=-1), dim=-1)
+    # Findable-weighted arithmetic mean of trace, in log space
+    w_find = keypoints_conf.squeeze(-1)
+    log_w = torch.log(w_find.clamp(min=1e-12))
+    log_mean_trace = torch.logsumexp(log_trace + log_w, dim=-1) - torch.logsumexp(log_w, dim=-1)
+    scores = scores * torch.exp(-0.20 * log_mean_trace)
+
+    # normalize
+    scores = scores / (1 + scores)
+
+    keypoints_final = torch.cat([keypoints_xy, keypoints_conf], dim=-1)  # [B, num_select, K_per_class, 3]
+
+    # iterate over batch and collect detections above thresholds
+    all_key_points, detections = [], []
+
+    if isinstance(threshold, torch.Tensor):
+        threshold = threshold.to(device=device, dtype=keypoints_final.dtype)
+
+    for bidx in range(len(keypoints_final)):
+        predicted_confidence = scores[bidx]
+        top_classes = labels[bidx]
+        image_bboxes = bboxes[bidx]
+        image_keypoints = keypoints_final[bidx]
+        image_meta = pre_processing_meta[bidx]
+
+        if classes_re_mapping is not None:
+            remapping_mask = torch.isin(
+                top_classes, classes_re_mapping.remaining_class_ids
+            )
+            top_classes = classes_re_mapping.class_mapping[top_classes[remapping_mask]]
+            predicted_confidence = predicted_confidence[remapping_mask]
+            image_bboxes = image_bboxes[remapping_mask]
+            image_keypoints = image_keypoints[remapping_mask]
+        else:
+            # similar 'else' block for object detection is not correct
+            raise ValueError("Not implemented")
+
+        confidence_mask = predicted_confidence > (
+            threshold[top_classes.long()]
+            if isinstance(threshold, torch.Tensor)
+            else threshold
+        )
+
+        predicted_confidence = predicted_confidence[confidence_mask]
+        top_classes = top_classes[confidence_mask]
+        selected_boxes = image_bboxes[confidence_mask]
+        selected_keypoints = image_keypoints[confidence_mask] 
+        predicted_confidence, sorted_indices = torch.sort(
+            predicted_confidence, descending=True
+        )
+        top_classes = top_classes[sorted_indices]
+        selected_boxes_xyxy_pct = selected_boxes[sorted_indices]
+        selected_keypoints_xy_pct_conf = selected_keypoints[sorted_indices]
+        selected_keypoints_xy_pct = selected_keypoints_xy_pct_conf[:, :, :2] 
+        selected_keypoints_conf = selected_keypoints_xy_pct_conf[:, :, 2]
+
+        denorm_size = (
+            image_meta.nonsquare_intermediate_size or image_meta.inference_size
+        )
+        inference_size_whwh = torch.tensor(
+            [
+                denorm_size.width,
+                denorm_size.height,
+                denorm_size.width,
+                denorm_size.height,
+            ],
+            device=device,
+        )
+        selected_boxes_xyxy = selected_boxes_xyxy_pct * inference_size_whwh
+        selected_keypoints_xy = selected_keypoints_xy_pct * inference_size_whwh[:2]
+
+        selected_boxes_xyxy = rescale_image_detections(
+            image_detections=selected_boxes_xyxy,
+            image_metadata=image_meta,
+        )
+        detections.append(
+            Detections(
+                xyxy=selected_boxes_xyxy.round().int(),
+                confidence=predicted_confidence,
+                class_id=top_classes.int(),
+            )
+        )
+
+        # Similar to rescale_image_detections function, for keypoints. 
+        offsets = torch.as_tensor([image_meta.pad_left, image_meta.pad_top],
+            dtype=selected_keypoints_xy.dtype,
+            device=selected_keypoints_xy.device,
+        )
+        selected_keypoints_xy.sub_(offsets)
+        scale = torch.as_tensor([image_meta.scale_width, image_meta.scale_height],
+            dtype=selected_keypoints_xy.dtype,
+            device=selected_keypoints_xy.device,
+        )
+        selected_keypoints_xy.div_(scale)
+
+        if (
+            image_meta.static_crop_offset.offset_x != 0
+            or image_meta.static_crop_offset.offset_y != 0
+        ):
+            static_crop_offsets = torch.as_tensor(
+                [
+                    image_meta.static_crop_offset.offset_x,
+                    image_meta.static_crop_offset.offset_y,
+                ],
+                dtype=selected_keypoints_xy.dtype,
+                device=selected_keypoints_xy.device,
+            )
+            selected_keypoints_xy.add_(static_crop_offsets)
+
+        xy_max = torch.as_tensor(
+            [image_meta.original_size.width, image_meta.original_size.height],
+            dtype=selected_keypoints_xy.dtype,
+            device=selected_keypoints_xy.device,
+        )
+        selected_keypoints_xy.clamp_(min=torch.zeros_like(xy_max), max=xy_max)
+
+        # this is similar to the end of yolo26 keypoint postprocessing
+        key_points_classes_for_instance_class = (
+            (key_points_classes_for_instances[top_classes])
+            .unsqueeze(1)
+            .to(device=selected_keypoints_xy.device)
+        )
+        invalid_slot_keypoints = (
+            torch.arange(key_points_slots_in_prediction, device=selected_keypoints_xy.device)
+            .unsqueeze(0)
+            .repeat(selected_keypoints_xy.shape[0], 1)
+            >= key_points_classes_for_instance_class
+        )
+        keypoints_below_threshold = selected_keypoints_conf < key_points_threshold
+        mask = invalid_slot_keypoints | keypoints_below_threshold
+        selected_keypoints_xy[mask] = 0.0
+        selected_keypoints_conf[mask] = 0.0
+        all_key_points.append(
+            KeyPoints(
+                xy=selected_keypoints_xy.round().int(), 
+                class_id=top_classes.int(),
+                confidence=selected_keypoints_conf,
+            )
+        )
+
+    return all_key_points, detections
diff --git a/inference_models/inference_models/models/rfdetr/rfdetr_key_points_detection_onnx.py b/inference_models/inference_models/models/rfdetr/rfdetr_key_points_detection_onnx.py
new file mode 100644
index 0000000000..9c6e9596c0
--- /dev/null
+++ b/inference_models/inference_models/models/rfdetr/rfdetr_key_points_detection_onnx.py
@@ -0,0 +1,269 @@
+import threading
+from typing import List, Optional, Tuple, Union
+
+import numpy as np
+import torch
+
+from inference_models import (
+    Detections,
+    KeyPoints,
+    KeyPointsDetectionModel,
+    PreProcessingOverrides,
+)
+from inference_models.configuration import (
+    DEFAULT_DEVICE,
+    INFERENCE_MODELS_RFDETR_DEFAULT_CONFIDENCE,
+    INFERENCE_MODELS_RFDETR_DEFAULT_KEY_POINTS_THRESHOLD,
+)
+from inference_models.entities import ColorFormat, Confidence
+from inference_models.errors import (
+    EnvironmentConfigurationError,
+    MissingDependencyError,
+)
+from inference_models.models.common.model_packages import get_model_package_contents
+from inference_models.models.common.onnx import (
+    run_onnx_session_with_batch_size_limit,
+    set_onnx_execution_provider_defaults,
+)
+from inference_models.models.common.roboflow.model_packages import (
+    InferenceConfig,
+    PreProcessingMetadata,
+    ResizeMode,
+    parse_class_names_file,
+    parse_inference_config,
+    parse_key_points_metadata,
+)
+from inference_models.models.common.roboflow.post_processing import ConfidenceFilter
+from inference_models.models.rfdetr.class_remapping import (
+    ClassesReMapping,
+    prepare_class_remapping,
+)
+from inference_models.models.rfdetr.common import post_process_keypoint_detection_results
+from inference_models.models.rfdetr.pre_processing import pre_process_network_input
+from inference_models.utils.onnx_introspection import (
+    get_selected_onnx_execution_providers,
+)
+from inference_models.weights_providers.entities import RecommendedParameters
+
+try:
+    import onnxruntime
+except ImportError as import_error:
+    raise MissingDependencyError(
+        message="Running RFDETR model with ONNX backend requires pycuda installation, which is brought with "
+        "`onnx-*` extras of `inference-models` library. If you see this error running locally, "
+        "please follow our installation guide: https://inference-models.roboflow.com/getting-started/installation/"
+        " If you see this error using Roboflow infrastructure, make sure the service you use does support the "
+        f"model, You can also contact Roboflow to get support."
+        "Additionally - if AutoModel.from_pretrained(...) "
+        f"automatically selects model package which does not match your environment - that's a serious problem and "
+        f"we will really appreciate letting us know - https://github.com/roboflow/inference/issues",
+        help_url="https://inference-models.roboflow.com/errors/runtime-environment/#missingdependencyerror",
+    ) from import_error
+
+
+class RFDetrForKeyPointsONNX(
+    (
+        KeyPointsDetectionModel[
+            torch.Tensor, PreProcessingMetadata, Tuple[torch.Tensor, torch.Tensor, torch.Tensor]
+        ]
+    )
+):
+
+    @classmethod
+    def from_pretrained(
+        cls,
+        model_name_or_path: str,
+        onnx_execution_providers: Optional[List[Union[str, tuple]]] = None,
+        default_onnx_trt_options: bool = True,
+        device: torch.device = DEFAULT_DEVICE,
+        rf_detr_max_input_resolution: Optional[Union[int, Tuple[int, int]]] = None,
+        recommended_parameters: Optional[RecommendedParameters] = None,
+        **kwargs,
+    ) -> "RFDetrForObjectDetectionONNX":
+        if onnx_execution_providers is None:
+            onnx_execution_providers = get_selected_onnx_execution_providers()
+        if not onnx_execution_providers:
+            raise EnvironmentConfigurationError(
+                message=f"Could not initialize model - selected backend is ONNX which requires execution provider to "
+                f"be specified - explicitly in `from_pretrained(...)` method or via env variable "
+                f"`ONNXRUNTIME_EXECUTION_PROVIDERS`. If you run model locally - adjust your setup, otherwise "
+                f"contact the platform support.",
+                help_url="https://inference-models.roboflow.com/errors/runtime-environment/#environmentconfigurationerror",
+            )
+        onnx_execution_providers = set_onnx_execution_provider_defaults(
+            providers=onnx_execution_providers,
+            model_package_path=model_name_or_path,
+            device=device,
+            default_onnx_trt_options=default_onnx_trt_options,
+        )
+        model_package_content = get_model_package_contents(
+            model_package_dir=model_name_or_path,
+            elements=[
+                "class_names.txt",
+                "inference_config.json",
+                "weights.onnx",
+                "keypoints_metadata.json",
+            ],
+        )
+        class_names = parse_class_names_file(
+            class_names_path=model_package_content["class_names.txt"]
+        )
+        inference_config = parse_inference_config(
+            config_path=model_package_content["inference_config.json"],
+            allowed_resize_modes={
+                ResizeMode.STRETCH_TO,
+                ResizeMode.LETTERBOX,
+                ResizeMode.CENTER_CROP,
+                ResizeMode.LETTERBOX_REFLECT_EDGES,
+            },
+            implicit_resize_mode_substitutions={
+                ResizeMode.FIT_LONGER_EDGE: (
+                    ResizeMode.STRETCH_TO,
+                    None,
+                    "RFDetr Keypoint Detection model running with ONNX backend was trained with "
+                    "`fit-longer-edge` input resize mode. This transform cannot be applied properly for "
+                    "RFDetr models. To ensure interoperability, `stretch` "
+                    "resize mode will be used instead. If model was trained on Roboflow platform, "
+                    "we recommend using preprocessing method different that `fit-longer-edge`.",
+                )
+            },
+            max_allowed_input_size=rf_detr_max_input_resolution,
+        )
+        classes_re_mapping = None
+        if inference_config.class_names_operations:
+            class_names, classes_re_mapping = prepare_class_remapping(
+                class_names=class_names,
+                class_names_operations=inference_config.class_names_operations,
+                device=device,
+            )
+        session = onnxruntime.InferenceSession(
+            path_or_bytes=model_package_content["weights.onnx"],
+            providers=onnx_execution_providers,
+        )
+        input_batch_size = session.get_inputs()[0].shape[0]
+        if isinstance(input_batch_size, str):
+            input_batch_size = None
+        input_name = session.get_inputs()[0].name
+
+        parsed_key_points_metadata, skeletons = parse_key_points_metadata(
+            key_points_metadata_path=model_package_content["keypoints_metadata.json"],
+            classes_re_mapping=classes_re_mapping,
+        )
+
+        return cls(
+            session=session,
+            input_name=input_name,
+            class_names=class_names,
+            classes_re_mapping=classes_re_mapping,
+            inference_config=inference_config,
+            device=device,
+            input_batch_size=input_batch_size,
+            parsed_key_points_metadata=parsed_key_points_metadata,
+            skeletons=skeletons,
+            recommended_parameters=recommended_parameters,
+        )
+
+    def __init__(
+        self,
+        session: onnxruntime.InferenceSession,
+        input_name: str,
+        class_names: List[str],
+        classes_re_mapping: Optional[ClassesReMapping],
+        inference_config: InferenceConfig,
+        device: torch.device,
+        input_batch_size: Optional[int],
+        parsed_key_points_metadata: List[List[str]],
+        skeletons: List[List[Tuple[int, int]]],
+        recommended_parameters=None,
+    ):
+        self._session = session
+        self._input_name = input_name
+        self._inference_config = inference_config
+        self._class_names = class_names
+        self._classes_re_mapping = classes_re_mapping
+        self._skeletons = skeletons
+        self._parsed_key_points_metadata = parsed_key_points_metadata
+        self._device = device
+        self._min_batch_size = input_batch_size
+        self._max_batch_size = (
+            input_batch_size
+            if input_batch_size is not None
+            else inference_config.forward_pass.max_dynamic_batch_size
+        )
+        self._session_thread_lock = threading.Lock()
+        self.recommended_parameters = recommended_parameters
+        self._key_points_classes_for_instances = torch.tensor(
+            [len(e) for e in self._parsed_key_points_metadata], device=device
+        )
+        self._key_points_slots_in_prediction = max(
+            len(e) for e in parsed_key_points_metadata
+        )
+
+    @property
+    def class_names(self) -> List[str]:
+        return self._class_names
+
+    @property
+    def key_points_classes(self) -> List[List[str]]:
+        return self._parsed_key_points_metadata
+
+    @property
+    def skeletons(self) -> List[List[Tuple[int, int]]]:
+        return self._skeletons
+
+    def pre_process(
+        self,
+        images: Union[torch.Tensor, List[torch.Tensor], np.ndarray, List[np.ndarray]],
+        input_color_format: Optional[ColorFormat] = None,
+        pre_processing_overrides: Optional[PreProcessingOverrides] = None,
+        **kwargs,
+    ) -> Tuple[torch.Tensor, List[PreProcessingMetadata]]:
+        return pre_process_network_input(
+            images=images,
+            image_pre_processing=self._inference_config.image_pre_processing,
+            network_input=self._inference_config.network_input,
+            target_device=self._device,
+            input_color_format=input_color_format,
+            pre_processing_overrides=pre_processing_overrides,
+        )
+
+    def forward(
+        self, pre_processed_images: torch.Tensor, **kwargs
+    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
+        with self._session_thread_lock:
+            bboxes, logits, keypoints = run_onnx_session_with_batch_size_limit(
+                session=self._session,
+                inputs={self._input_name: pre_processed_images},
+                min_batch_size=self._min_batch_size,
+                max_batch_size=self._max_batch_size,
+            )
+            return bboxes, logits, keypoints
+
+    def post_process(
+        self,
+        model_results: Tuple[torch.Tensor, torch.Tensor, torch.Tensor],
+        pre_processing_meta: List[PreProcessingMetadata],
+        confidence: Confidence = "default",
+        key_points_threshold: float = INFERENCE_MODELS_RFDETR_DEFAULT_KEY_POINTS_THRESHOLD,
+        **kwargs,
+    ) -> Tuple[List[KeyPoints], Optional[List[Detections]]]:
+        confidence_filter = ConfidenceFilter(
+            confidence=confidence,
+            recommended_parameters=self.recommended_parameters,
+            default_confidence=INFERENCE_MODELS_RFDETR_DEFAULT_CONFIDENCE,
+        )
+        bboxes, logits, keypoints = model_results
+        return post_process_keypoint_detection_results(
+            bboxes=bboxes,
+            out_logits=logits,
+            out_keypoints=keypoints,
+            pre_processing_meta=pre_processing_meta,
+            threshold=confidence_filter.get_threshold(self.class_names),
+            key_points_threshold=key_points_threshold,
+            num_classes=len(self.class_names),
+            classes_re_mapping=self._classes_re_mapping,
+            key_points_classes_for_instances=self._key_points_classes_for_instances,
+            key_points_slots_in_prediction=self._key_points_slots_in_prediction, 
+            device=self._device,
+        )
+
diff --git a/inference_models/pyproject.toml b/inference_models/pyproject.toml
index ea42d510cf..6ddf576025 100644
--- a/inference_models/pyproject.toml
+++ b/inference_models/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "inference-models"
-version = "0.28.7"
+version = "0.29.0-rc3"
 description = "The new inference engine for Computer Vision models"
 readme = "README.md"
 requires-python = ">=3.10,<3.13"
diff --git a/inference_models/uv.lock b/inference_models/uv.lock
index 3a49b3a0c3..ecff7942cd 100644
--- a/inference_models/uv.lock
+++ b/inference_models/uv.lock
@@ -913,7 +913,7 @@ wheels = [
 
 [[package]]
 name = "inference-models"
-version = "0.28.7"
+version = "0.29.0rc3"
 source = { virtual = "." }
 dependencies = [
     { name = "accelerate" },
diff --git a/requirements/requirements.cpu.txt b/requirements/requirements.cpu.txt
index c5cbd242e4..88a212affa 100644
--- a/requirements/requirements.cpu.txt
+++ b/requirements/requirements.cpu.txt
@@ -1,3 +1,3 @@
 onnxruntime>=1.15.1,<1.22.0
 nvidia-ml-py<13.0.0
-inference-models[torch-cpu,onnx-cpu]~=0.28.7  # keep in sync between requirements.gpu.txt, requirements.cpu.txt, requirements.vino.txt
+inference-models[torch-cpu,onnx-cpu]~=0.29.0rc3  # keep in sync between requirements.gpu.txt, requirements.cpu.txt, requirements.vino.txt
diff --git a/requirements/requirements.gpu.txt b/requirements/requirements.gpu.txt
index 67751b65bf..1c85c7d48a 100644
--- a/requirements/requirements.gpu.txt
+++ b/requirements/requirements.gpu.txt
@@ -1,2 +1,2 @@
 onnxruntime-gpu>=1.15.1,<1.22.0
-inference-models[torch-cu124,onnx-cu12]~=0.28.7  # keep in sync between requirements.jetson requirements.gpu.txt, requirements.cpu.txt, requirements.vino.txt
+inference-models[torch-cu124,onnx-cu12]~=0.29.0rc3 # keep in sync between requirements.jetson requirements.gpu.txt, requirements.cpu.txt, requirements.vino.txt
diff --git a/requirements/requirements.jetson.txt b/requirements/requirements.jetson.txt
index 82133591c2..92ce0fc514 100644
--- a/requirements/requirements.jetson.txt
+++ b/requirements/requirements.jetson.txt
@@ -1,4 +1,4 @@
 pypdfium2>=4.11.0,<5.0.0
 jupyterlab>=4.3.0,<5.0.0
 PyYAML~=6.0.0
-inference-models~=0.28.7  # keep in sync between requirements.jetson requirements.gpu.txt, requirements.cpu.txt, requirements.vino.txt
+inference-models~=0.29.0rc3  # keep in sync between requirements.jetson requirements.gpu.txt, requirements.cpu.txt, requirements.vino.txt
diff --git a/requirements/requirements.vino.txt b/requirements/requirements.vino.txt
index 16f3a4006f..1adf27d5e2 100644
--- a/requirements/requirements.vino.txt
+++ b/requirements/requirements.vino.txt
@@ -1,2 +1,2 @@
 onnxruntime-openvino>=1.15.0,<1.22.0
-inference-models[torch-cpu]~=0.28.7  # keep in sync between requirements.jetson requirements.gpu.txt, requirements.cpu.txt, requirements.vino.txt
+inference-models[torch-cpu]~=0.29.0rc3  # keep in sync between requirements.jetson requirements.gpu.txt, requirements.cpu.txt, requirements.vino.txt

From cf7726cc05d58760bf08a96c17a2f8db82439d5b Mon Sep 17 00:00:00 2001
From: Aseem Saxena <aseem.bits@gmail.com>
Date: Sat, 30 May 2026 01:16:12 +0000
Subject: [PATCH 35/76] Add sparse RF-DETR RLE polygon conversion

---
 .../rfdetr_rle_to_poly_microbenchmark.py      | 442 ++++++++++++++++++
 .../core/models/inference_models_adapters.py  |  19 +-
 inference/core/utils/nsight.py                |  96 ++++
 inference/core/utils/rle_to_polygon.py        | 209 +++++++++
 .../core/utils/test_rle_to_polygon.py         | 194 ++++++++
 5 files changed, 946 insertions(+), 14 deletions(-)
 create mode 100644 development/stream_interface/rfdetr_rle_to_poly_microbenchmark.py
 create mode 100644 inference/core/utils/nsight.py
 create mode 100644 inference/core/utils/rle_to_polygon.py
 create mode 100644 tests/inference/unit_tests/core/utils/test_rle_to_polygon.py

diff --git a/development/stream_interface/rfdetr_rle_to_poly_microbenchmark.py b/development/stream_interface/rfdetr_rle_to_poly_microbenchmark.py
new file mode 100644
index 0000000000..2554cce763
--- /dev/null
+++ b/development/stream_interface/rfdetr_rle_to_poly_microbenchmark.py
@@ -0,0 +1,442 @@
+"""Capture/replay benchmark for RF-DETR RLE-to-polygon conversion.
+
+This targets the CPU path in
+``inference.core.models.inference_models_adapters.rle_masks2poly``:
+
+    COCO RLE counts -> dense mask -> cv2.findContours -> polygon arrays
+
+Default usage captures 100 invocations from the 1080p workflow and immediately
+replays them with exact output checks:
+
+    python development/stream_interface/rfdetr_rle_to_poly_microbenchmark.py \
+        --video_reference vehicles_1080p.mp4
+
+Replay-only usage:
+
+    python development/stream_interface/rfdetr_rle_to_poly_microbenchmark.py \
+        --mode replay --cases-dir temp/rfdetr_rle_to_poly_cases
+
+For Nsight Systems CPU/NVTX tracing, pass ``--nvtx`` during replay and profile
+with ``nsys profile --trace=nvtx,osrt --sample=process-tree``.
+"""
+
+import argparse
+import functools
+import importlib.util
+import json
+import os
+from contextlib import contextmanager
+from pathlib import Path
+import pickle
+import sys
+import threading
+from time import perf_counter, time
+from typing import Any, Dict, List, Optional
+
+import numpy as np
+
+_REPO_ROOT = Path(__file__).resolve().parents[2]
+_INFERENCE_MODELS_ROOT = _REPO_ROOT / "inference_models"
+_WORKFLOW_PATH = (
+    _REPO_ROOT / "development" / "stream_interface" / "rfdetr_nano_seg_trt_workflow.py"
+)
+_TARGET_FUNCTION = "rle_masks2poly"
+_SCHEMA_VERSION = 1
+
+
+def _ensure_local_import_paths() -> None:
+    for path in (str(_INFERENCE_MODELS_ROOT), str(_REPO_ROOT)):
+        if path not in sys.path:
+            sys.path.insert(0, path)
+
+
+def _load_workflow_module() -> Any:
+    spec = importlib.util.spec_from_file_location(
+        "rfdetr_nano_seg_trt_workflow_for_rle_to_poly_microbenchmark",
+        _WORKFLOW_PATH,
+    )
+    if spec is None or spec.loader is None:
+        raise RuntimeError(f"Could not load workflow module from {_WORKFLOW_PATH}")
+    module = importlib.util.module_from_spec(spec)
+    spec.loader.exec_module(module)
+    return module
+
+
+def _snapshot_masks(masks: Any) -> dict:
+    return {
+        "image_size": tuple(masks.image_size),
+        "masks": list(masks.masks),
+        "mask_count": len(masks.masks),
+    }
+
+
+def _snapshot_output(output: List[np.ndarray]) -> List[np.ndarray]:
+    return [np.array(poly, copy=True) for poly in output]
+
+
+def _write_pickle(path: Path, payload: dict) -> None:
+    tmp_path = path.with_suffix(path.suffix + ".tmp")
+    with tmp_path.open("wb") as f:
+        pickle.dump(payload, f, protocol=pickle.HIGHEST_PROTOCOL)
+    os.replace(tmp_path, path)
+
+
+class _CaptureState:
+    def __init__(self, cases_dir: Path, limit: int) -> None:
+        self.cases_dir = cases_dir
+        self.limit = limit
+        self.count = 0
+        self.total_masks = 0
+        self.lock = threading.Lock()
+
+    def maybe_save(self, masks: Any, output: List[np.ndarray]) -> None:
+        with self.lock:
+            if self.count >= self.limit:
+                return
+            case_index = self.count
+            mask_snapshot = _snapshot_masks(masks=masks)
+            payload = {
+                "schema_version": _SCHEMA_VERSION,
+                "case_index": case_index,
+                "inputs": {"masks": mask_snapshot},
+                "expected_output": _snapshot_output(output=output),
+            }
+            _write_pickle(
+                self.cases_dir / f"case_{case_index:04d}.pkl",
+                payload,
+            )
+            self.count += 1
+            self.total_masks += mask_snapshot["mask_count"]
+            if self.count == 1 or self.count % 10 == 0 or self.count == self.limit:
+                print(
+                    f"[capture] saved {self.count}/{self.limit} "
+                    f"rle-to-poly calls masks={self.total_masks}",
+                    flush=True,
+                )
+
+
+def _install_capture_hook(state: _CaptureState) -> None:
+    _ensure_local_import_paths()
+    from inference.core.models import inference_models_adapters as adapters
+
+    original = getattr(adapters, _TARGET_FUNCTION)
+
+    @functools.wraps(original)
+    def wrapper(masks: Any) -> List[np.ndarray]:
+        result = original(masks)
+        state.maybe_save(masks=masks, output=result)
+        return result
+
+    setattr(adapters, _TARGET_FUNCTION, wrapper)
+
+
+def _prepare_cases_dir(cases_dir: Path, overwrite: bool) -> None:
+    cases_dir.mkdir(parents=True, exist_ok=True)
+    existing = list(cases_dir.glob("case_*.pkl"))
+    manifest_path = cases_dir / "manifest.json"
+    if not overwrite and (existing or manifest_path.exists()):
+        raise RuntimeError(
+            f"{cases_dir} already contains captured cases; pass --overwrite "
+            "or choose a different --cases-dir."
+        )
+    if overwrite:
+        for path in existing:
+            path.unlink()
+        if manifest_path.exists():
+            manifest_path.unlink()
+
+
+def _write_manifest(cases_dir: Path, payload: dict) -> None:
+    with (cases_dir / "manifest.json").open("w") as f:
+        json.dump(payload, f, indent=2, sort_keys=True)
+        f.write("\n")
+
+
+def _run_capture(args: argparse.Namespace) -> int:
+    cases_dir = args.cases_dir.resolve()
+    _prepare_cases_dir(cases_dir=cases_dir, overwrite=args.overwrite)
+
+    workflow = _load_workflow_module()
+    model_id = workflow._resolve_model_id(args.model_id, args.backend)
+    workflow._prepare_local_workflow_model_bundle(model_id)
+    if model_id != args.model_id:
+        print(
+            f"[model] using local TRT package via workflow model id: {model_id}",
+            flush=True,
+        )
+
+    state = _CaptureState(cases_dir=cases_dir, limit=args.capture_count)
+    _install_capture_hook(state=state)
+
+    frame_count = 0
+    start_time: Optional[float] = None
+    pipeline_ref: Dict[str, Any] = {}
+
+    def sink(predictions: Any, video_frames: Any) -> None:
+        nonlocal frame_count, start_time
+        del video_frames
+        if not isinstance(predictions, list):
+            predictions = [predictions]
+        frame_count += sum(p is not None for p in predictions)
+        if start_time is None:
+            start_time = perf_counter()
+        if frame_count % args.progress_every == 0:
+            elapsed = perf_counter() - start_time
+            fps = frame_count / elapsed if elapsed > 0 else 0.0
+            print(
+                f"[progress] frames={frame_count} fps={fps:.2f} "
+                f"captures={state.count}/{state.limit}",
+                flush=True,
+            )
+        if state.count >= state.limit and "pipeline" in pipeline_ref:
+            pipeline_ref["pipeline"].terminate()
+
+    pipeline = workflow.InferencePipeline.init_with_workflow(
+        video_reference=args.video_reference,
+        workflow_specification=workflow.build_workflow(model_id, args.confidence),
+        on_prediction=sink,
+    )
+    pipeline_ref["pipeline"] = pipeline
+    pipeline.start()
+    pipeline.join()
+
+    if state.count < args.capture_count:
+        raise RuntimeError(
+            f"Captured only {state.count}/{args.capture_count} invocations. "
+            "Use a longer video or lower --capture-count."
+        )
+
+    elapsed = perf_counter() - start_time if start_time else 0.0
+    _write_manifest(
+        cases_dir=cases_dir,
+        payload={
+            "schema_version": _SCHEMA_VERSION,
+            "function": "inference.core.models.inference_models_adapters.rle_masks2poly",
+            "case_count": state.count,
+            "total_masks": state.total_masks,
+            "video_reference": args.video_reference,
+            "backend": args.backend,
+            "model_id": model_id,
+            "confidence": args.confidence,
+            "frames_seen_by_sink": frame_count,
+            "capture_elapsed_seconds": elapsed,
+            "created_at_unix": time(),
+        },
+    )
+    print(
+        f"[capture] wrote {state.count} cases to {cases_dir} "
+        f"total_masks={state.total_masks}",
+        flush=True,
+    )
+    return state.count
+
+
+def _load_case(path: Path) -> dict:
+    with path.open("rb") as f:
+        payload = pickle.load(f)
+    if payload.get("schema_version") != _SCHEMA_VERSION:
+        raise RuntimeError(
+            f"{path} has schema_version={payload.get('schema_version')}; "
+            f"expected {_SCHEMA_VERSION}."
+        )
+    return payload
+
+
+def _materialize_masks(case: dict) -> Any:
+    _ensure_local_import_paths()
+    from inference_models.models.base.types import InstancesRLEMasks
+
+    payload = case["inputs"]["masks"]
+    return InstancesRLEMasks(
+        image_size=tuple(payload["image_size"]),
+        masks=list(payload["masks"]),
+    )
+
+
+def _assert_outputs_equal(
+    *,
+    actual: List[np.ndarray],
+    expected: List[np.ndarray],
+    case_index: int,
+) -> None:
+    if len(actual) != len(expected):
+        raise AssertionError(
+            f"case {case_index}: output length differs: "
+            f"{len(actual)} != {len(expected)}"
+        )
+    for poly_index, (actual_poly, expected_poly) in enumerate(zip(actual, expected)):
+        if actual_poly.shape != expected_poly.shape:
+            raise AssertionError(
+                f"case {case_index} polygon {poly_index}: shape differs "
+                f"{actual_poly.shape} != {expected_poly.shape}"
+            )
+        if actual_poly.dtype != expected_poly.dtype:
+            raise AssertionError(
+                f"case {case_index} polygon {poly_index}: dtype differs "
+                f"{actual_poly.dtype} != {expected_poly.dtype}"
+            )
+        if not np.array_equal(actual_poly, expected_poly):
+            raise AssertionError(
+                f"case {case_index} polygon {poly_index}: values differ"
+            )
+
+
+@contextmanager
+def _nvtx_range(enabled: bool, message: str):
+    if not enabled:
+        yield
+        return
+    try:
+        import torch
+
+        torch.cuda.nvtx.range_push(message)
+        try:
+            yield
+        finally:
+            torch.cuda.nvtx.range_pop()
+    except Exception:
+        yield
+
+
+def _run_one_replay_case(*, case_path: Path, nvtx: bool) -> float:
+    from inference.core.models.inference_models_adapters import rle_masks2poly
+
+    case = _load_case(case_path)
+    masks = _materialize_masks(case=case)
+    label = f"rfdetr.rle_to_poly.case={case['case_index']}" f".masks={len(masks.masks)}"
+    start = perf_counter()
+    with _nvtx_range(nvtx, label):
+        actual = rle_masks2poly(masks)
+    elapsed = perf_counter() - start
+    _assert_outputs_equal(
+        actual=actual,
+        expected=case["expected_output"],
+        case_index=case["case_index"],
+    )
+    return elapsed
+
+
+def _summarize_timings(timings: List[float]) -> dict:
+    sorted_timings = sorted(timings)
+    total = sum(sorted_timings)
+    count = len(sorted_timings)
+
+    def percentile(p: float) -> float:
+        if count == 0:
+            return 0.0
+        index = min(count - 1, int(round((count - 1) * p)))
+        return sorted_timings[index]
+
+    return {
+        "count": count,
+        "total_seconds": total,
+        "mean_ms": (total / count) * 1000 if count else 0.0,
+        "min_ms": sorted_timings[0] * 1000 if count else 0.0,
+        "p50_ms": percentile(0.50) * 1000,
+        "p90_ms": percentile(0.90) * 1000,
+        "p99_ms": percentile(0.99) * 1000,
+        "max_ms": sorted_timings[-1] * 1000 if count else 0.0,
+    }
+
+
+def _print_timing_summary(summary: dict) -> None:
+    print(
+        "[replay] "
+        f"calls={summary['count']} "
+        f"total={summary['total_seconds']:.3f}s "
+        f"mean={summary['mean_ms']:.3f}ms "
+        f"p50={summary['p50_ms']:.3f}ms "
+        f"p90={summary['p90_ms']:.3f}ms "
+        f"p99={summary['p99_ms']:.3f}ms "
+        f"min={summary['min_ms']:.3f}ms "
+        f"max={summary['max_ms']:.3f}ms",
+        flush=True,
+    )
+
+
+def _run_replay(args: argparse.Namespace) -> dict:
+    _ensure_local_import_paths()
+    cases_dir = args.cases_dir.resolve()
+    case_paths = sorted(cases_dir.glob("case_*.pkl"))
+    if args.max_cases is not None:
+        case_paths = case_paths[: args.max_cases]
+    if not case_paths:
+        raise RuntimeError(f"No case_*.pkl files found in {cases_dir}")
+
+    print(
+        f"[replay] cases={len(case_paths)} repeats={args.repeats} "
+        f"warmup_repeats={args.warmup_repeats} nvtx={args.nvtx}",
+        flush=True,
+    )
+    for _ in range(args.warmup_repeats):
+        for case_path in case_paths:
+            _run_one_replay_case(case_path=case_path, nvtx=args.nvtx)
+
+    timings = []
+    for repeat_index in range(args.repeats):
+        for case_path in case_paths:
+            timings.append(_run_one_replay_case(case_path=case_path, nvtx=args.nvtx))
+        print(
+            f"[replay] completed repeat {repeat_index + 1}/{args.repeats}",
+            flush=True,
+        )
+
+    summary = _summarize_timings(timings)
+    _print_timing_summary(summary)
+    print("[replay] all polygons matched captured e2e outputs", flush=True)
+    return summary
+
+
+def _parse_args() -> argparse.Namespace:
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--mode",
+        choices=("capture", "replay", "capture-and-replay"),
+        default="capture-and-replay",
+    )
+    parser.add_argument("--video_reference", default="vehicles_1080p.mp4")
+    parser.add_argument("--model_id", default="rfdetr-seg-nano")
+    parser.add_argument("--confidence", type=float, default=0.4)
+    parser.add_argument("--backend", choices=("trt", "onnx", "torch"), default="trt")
+    parser.add_argument(
+        "--cases-dir",
+        type=Path,
+        default=Path("temp/rfdetr_rle_to_poly_cases"),
+    )
+    parser.add_argument("--capture-count", type=int, default=100)
+    parser.add_argument("--progress-every", type=int, default=50)
+    parser.add_argument(
+        "--overwrite",
+        action=argparse.BooleanOptionalAction,
+        default=True,
+    )
+    parser.add_argument("--repeats", type=int, default=1)
+    parser.add_argument("--warmup-repeats", type=int, default=0)
+    parser.add_argument("--max-cases", type=int, default=None)
+    parser.add_argument(
+        "--nvtx",
+        action="store_true",
+        help="Add NVTX ranges around each replayed rle_masks2poly call.",
+    )
+    args = parser.parse_args()
+    if args.capture_count <= 0:
+        raise ValueError("--capture-count must be positive")
+    if args.repeats <= 0:
+        raise ValueError("--repeats must be positive")
+    if args.warmup_repeats < 0:
+        raise ValueError("--warmup-repeats must be non-negative")
+    if args.progress_every <= 0:
+        raise ValueError("--progress-every must be positive")
+    return args
+
+
+def main() -> None:
+    args = _parse_args()
+    if args.mode in {"capture", "capture-and-replay"}:
+        _run_capture(args=args)
+    if args.mode in {"replay", "capture-and-replay"}:
+        _run_replay(args=args)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/inference/core/models/inference_models_adapters.py b/inference/core/models/inference_models_adapters.py
index 28e4f7cbd9..310f0a1ff3 100644
--- a/inference/core/models/inference_models_adapters.py
+++ b/inference/core/models/inference_models_adapters.py
@@ -7,7 +7,6 @@
 import numpy as np
 import torch
 from PIL import Image, ImageDraw, ImageFont
-from pycocotools import mask as mask_utils
 
 from inference.core.entities.requests import (
     ClassificationInferenceRequest,
@@ -43,7 +42,9 @@
 from inference.core.models.base import Model
 from inference.core.roboflow_api import get_extra_weights_provider_headers
 from inference.core.utils.image_utils import load_image_bgr, load_image_rgb
-from inference.core.utils.postprocess import mask2poly, masks2poly
+from inference.core.utils.nsight import nsight_range
+from inference.core.utils.postprocess import bitpacked_masks2poly, masks2poly
+from inference.core.utils.rle_to_polygon import rle_masks_to_polygons
 from inference.core.utils.visualisation import draw_detection_predictions
 from inference.models.aliases import resolve_roboflow_model_alias
 from inference_models import (
@@ -446,18 +447,8 @@ def draw_predictions(
 
 
 def rle_masks2poly(masks: InstancesRLEMasks) -> List[np.ndarray]:
-    segments = []
-    h, w = masks.image_size
-    for counts in masks.masks:
-        rle_dict = {"size": [h, w], "counts": counts}
-        decoded_rle = np.ascontiguousarray(
-            mask_utils.decode(rle_dict)
-        )  # (H, W) uint8, already C-contiguous
-        if not np.any(decoded_rle):
-            segments.append(np.zeros((0, 2), dtype=np.float32))
-            continue
-        segments.append(mask2poly(decoded_rle))
-    return segments
+    with nsight_range("rfdetr.rle_masks2poly"):
+        return rle_masks_to_polygons(masks=masks)
 
 
 class InferenceModelsKeyPointsDetectionAdapter(Model):
diff --git a/inference/core/utils/nsight.py b/inference/core/utils/nsight.py
new file mode 100644
index 0000000000..d5c1157bfb
--- /dev/null
+++ b/inference/core/utils/nsight.py
@@ -0,0 +1,96 @@
+import os
+import threading
+from contextlib import contextmanager
+from typing import Optional
+
+_TRACE_CONTEXT = threading.local()
+_NVTX = None
+_NVTX_INIT_ATTEMPTED = False
+
+
+def nsight_markers_enabled() -> bool:
+    return os.getenv("RFDETR_NSIGHT_MARKERS", "").lower() in {
+        "1",
+        "true",
+        "yes",
+        "on",
+    }
+
+
+def _get_nvtx():
+    global _NVTX, _NVTX_INIT_ATTEMPTED
+    if _NVTX_INIT_ATTEMPTED:
+        return _NVTX
+    _NVTX_INIT_ATTEMPTED = True
+    try:
+        import torch
+
+        _NVTX = torch.cuda.nvtx
+    except Exception:
+        _NVTX = None
+    return _NVTX
+
+
+def nsight_mark(message: str) -> None:
+    if not nsight_markers_enabled():
+        return
+    nvtx = _get_nvtx()
+    if nvtx is None:
+        return
+    try:
+        nvtx.mark(message)
+    except Exception:
+        return
+
+
+def nsight_range_push(message: str) -> None:
+    if not nsight_markers_enabled():
+        return
+    nvtx = _get_nvtx()
+    if nvtx is None:
+        return
+    try:
+        nvtx.range_push(message)
+    except Exception:
+        return
+
+
+def nsight_range_pop() -> None:
+    if not nsight_markers_enabled():
+        return
+    nvtx = _get_nvtx()
+    if nvtx is None:
+        return
+    try:
+        nvtx.range_pop()
+    except Exception:
+        return
+
+
+@contextmanager
+def nsight_range(message: str):
+    nsight_range_push(message)
+    try:
+        yield
+    finally:
+        nsight_range_pop()
+
+
+def nsight_current_frame_id() -> Optional[str]:
+    return getattr(_TRACE_CONTEXT, "frame_id", None)
+
+
+@contextmanager
+def nsight_frame_context(frame_id: Optional[str]):
+    previous = getattr(_TRACE_CONTEXT, "frame_id", None)
+    _TRACE_CONTEXT.frame_id = frame_id
+    try:
+        yield
+    finally:
+        _TRACE_CONTEXT.frame_id = previous
+
+
+def nsight_frame_label(frame_id: Optional[str], event: str) -> str:
+    if frame_id is None:
+        return f"rfdetr.{event}"
+    return f"rfdetr.frame={frame_id}.{event}"
diff --git a/inference/core/utils/rle_to_polygon.py b/inference/core/utils/rle_to_polygon.py
new file mode 100644
index 0000000000..f15c4a2d90
--- /dev/null
+++ b/inference/core/utils/rle_to_polygon.py
@@ -0,0 +1,209 @@
+"""COCO RLE to OpenCV-style polygon conversion."""
+
+from typing import Dict, Iterable, List, Optional, Tuple
+
+import cv2
+import numpy as np
+
+_EMPTY_POLYGON = np.zeros((0, 2), dtype=np.float32)
+_ColumnIntervals = Dict[int, List[Tuple[int, int]]]
+
+
+def rle_masks_to_polygons(masks: object) -> List[np.ndarray]:
+    """Convert COCO RLE masks into the legacy largest external polygon.
+
+    The old adapter path decoded every RLE into a full-frame dense mask and then
+    called ``cv2.findContours(mask, RETR_EXTERNAL, CHAIN_APPROX_SIMPLE)``. This
+    path keeps the RLE sparse until the final contour step, where it materializes
+    only the foreground bounding crop needed by OpenCV.
+    """
+
+    height, width = masks.image_size
+    sparse_counts = _get_lazy_uncompressed_counts(masks=masks)
+    if sparse_counts is not None:
+        counts, lengths = sparse_counts
+        return [
+            polygon_from_uncompressed_counts(
+                counts=counts[i, : int(lengths[i])],
+                height=height,
+                width=width,
+            )
+            for i in range(lengths.shape[0])
+        ]
+    return [
+        polygon_from_coco_counts(counts=counts, height=height, width=width)
+        for counts in masks.masks
+    ]
+
+
+def polygon_from_coco_counts(
+    counts: object,
+    height: int,
+    width: int,
+) -> np.ndarray:
+    columns = _coco_counts_to_column_intervals(
+        counts=counts,
+        height=height,
+        width=width,
+    )
+    return _polygon_from_column_intervals(columns=columns)
+
+
+def polygon_from_uncompressed_counts(
+    counts: Iterable[int],
+    height: int,
+    width: int,
+) -> np.ndarray:
+    columns = _counts_to_column_intervals(
+        counts=counts,
+        height=height,
+        width=width,
+    )
+    return _polygon_from_column_intervals(columns=columns)
+
+
+def _get_lazy_uncompressed_counts(
+    masks: object,
+) -> Optional[Tuple[np.ndarray, np.ndarray]]:
+    ensure_rle_cpu = getattr(masks, "_ensure_rle_cpu", None)
+    if callable(ensure_rle_cpu):
+        ensure_rle_cpu()
+    counts = getattr(masks, "_rle_counts_cpu", None)
+    lengths = getattr(masks, "_rle_lengths_cpu", None)
+    if counts is None or lengths is None:
+        return None
+    return counts, lengths
+
+
+def _coco_counts_to_column_intervals(
+    counts: object,
+    height: int,
+    width: int,
+) -> _ColumnIntervals:
+    if isinstance(counts, str):
+        encoded = counts.encode("ascii")
+    elif isinstance(counts, bytes):
+        encoded = counts
+    elif isinstance(counts, bytearray):
+        encoded = bytes(counts)
+    else:
+        return _counts_to_column_intervals(
+            counts=counts,
+            height=height,
+            width=width,
+        )
+
+    total_size = height * width
+    columns: _ColumnIntervals = {}
+    cursor = 0
+    value = 0
+    previous_two = 0
+    previous_one = 0
+    count_index = 0
+    index = 0
+    encoded_len = len(encoded)
+    while index < encoded_len:
+        run_length = 0
+        shift = 0
+        while True:
+            char = encoded[index] - 48
+            index += 1
+            run_length |= (char & 0x1F) << shift
+            shift += 5
+            if char & 0x20:
+                continue
+            if char & 0x10:
+                run_length |= -1 << shift
+            break
+        # Compressed COCO RLE stores deltas from count index 3 onward.
+        if count_index > 2:
+            run_length += previous_two
+        if run_length < 0:
+            raise ValueError("COCO RLE counts must be non-negative")
+        next_cursor = cursor + run_length
+        if next_cursor > total_size:
+            raise ValueError("COCO RLE counts exceed the mask size")
+        if value and run_length:
+            _append_foreground_run(
+                columns=columns,
+                cursor=cursor,
+                run_length=run_length,
+                height=height,
+            )
+        cursor = next_cursor
+        previous_two, previous_one = previous_one, run_length
+        count_index += 1
+        value ^= 1
+    return columns
+
+
+def _counts_to_column_intervals(
+    counts: Iterable[int],
+    height: int,
+    width: int,
+) -> _ColumnIntervals:
+    total_size = height * width
+    columns: _ColumnIntervals = {}
+    cursor = 0
+    value = 0
+    for raw_count in counts:
+        count = int(raw_count)
+        if count < 0:
+            raise ValueError("COCO RLE counts must be non-negative")
+        next_cursor = cursor + count
+        if next_cursor > total_size:
+            raise ValueError("COCO RLE counts exceed the mask size")
+        if value and count:
+            _append_foreground_run(
+                columns=columns,
+                cursor=cursor,
+                run_length=count,
+                height=height,
+            )
+        cursor = next_cursor
+        value ^= 1
+    return columns
+
+
+def _append_foreground_run(
+    columns: _ColumnIntervals,
+    cursor: int,
+    run_length: int,
+    height: int,
+) -> None:
+    end = cursor + run_length
+    run_cursor = cursor
+    while run_cursor < end:
+        x = run_cursor // height
+        y = run_cursor - x * height
+        column_run_length = min(end - run_cursor, height - y)
+        columns.setdefault(x, []).append((y, y + column_run_length))
+        run_cursor += column_run_length
+
+
+def _polygon_from_column_intervals(columns: _ColumnIntervals) -> np.ndarray:
+    if not columns:
+        return _EMPTY_POLYGON.copy()
+
+    x_min = min(columns)
+    x_max = max(columns)
+    y_min = min(y0 for intervals in columns.values() for y0, _ in intervals)
+    y_max = max(y1 for intervals in columns.values() for _, y1 in intervals)
+    crop = np.zeros((y_max - y_min, x_max - x_min + 1), dtype=np.uint8)
+    for x, intervals in columns.items():
+        crop_x = x - x_min
+        for y0, y1 in intervals:
+            crop[y0 - y_min : y1 - y_min, crop_x] = 1
+
+    contours = cv2.findContours(
+        crop,
+        cv2.RETR_EXTERNAL,
+        cv2.CHAIN_APPROX_SIMPLE,
+        offset=(x_min, y_min),
+    )[0]
+    if not contours:
+        return _EMPTY_POLYGON.copy()
+
+    contour_lengths = np.fromiter((len(c) for c in contours), dtype=np.intp)
+    selected_contour = contours[int(contour_lengths.argmax())]
+    return np.asarray(selected_contour, dtype=np.float32).reshape(-1, 2)
diff --git a/tests/inference/unit_tests/core/utils/test_rle_to_polygon.py b/tests/inference/unit_tests/core/utils/test_rle_to_polygon.py
new file mode 100644
index 0000000000..3878ddbdeb
--- /dev/null
+++ b/tests/inference/unit_tests/core/utils/test_rle_to_polygon.py
@@ -0,0 +1,194 @@
+import warnings
+from typing import Iterable, List, Tuple
+
+import numpy as np
+import pytest
+from pycocotools import mask as mask_utils
+
+from inference.core.utils.postprocess import mask2poly
+from inference.core.utils.rle_to_polygon import rle_masks_to_polygons
+from inference_models.models.base.types import InstancesRLEMasks
+
+pytestmark = pytest.mark.filterwarnings(
+    "ignore:__array__ implementation doesn't accept a copy keyword.*:DeprecationWarning"
+)
+
+
+def _legacy_rle_masks2poly(masks: InstancesRLEMasks) -> List[np.ndarray]:
+    segments = []
+    h, w = masks.image_size
+    for counts in masks.masks:
+        rle_dict = {"size": [h, w], "counts": counts}
+        decoded_rle = np.ascontiguousarray(mask_utils.decode(rle_dict))
+        if not np.any(decoded_rle):
+            segments.append(np.zeros((0, 2), dtype=np.float32))
+            continue
+        segments.append(mask2poly(decoded_rle))
+    return segments
+
+
+def _to_instances(masks: Iterable[np.ndarray]) -> InstancesRLEMasks:
+    masks = list(masks)
+    assert masks
+    image_size = tuple(masks[0].shape)
+    rles = [_encode_mask(mask) for mask in masks]
+    return InstancesRLEMasks(image_size=image_size, masks=rles)
+
+
+def _encode_mask(mask: np.ndarray) -> bytes:
+    with warnings.catch_warnings():
+        warnings.filterwarnings(
+            "ignore",
+            message="__array__ implementation doesn't accept a copy keyword.*",
+            category=DeprecationWarning,
+        )
+        return mask_utils.encode(np.asfortranarray(mask.astype(np.uint8)))["counts"]
+
+
+def _assert_polygons_exactly_equal(
+    actual: List[np.ndarray],
+    expected: List[np.ndarray],
+) -> None:
+    assert len(actual) == len(expected)
+    for actual_poly, expected_poly in zip(actual, expected):
+        assert actual_poly.dtype == expected_poly.dtype
+        assert actual_poly.shape == expected_poly.shape
+        assert np.array_equal(actual_poly, expected_poly)
+
+
+def _uncompressed_counts(mask: np.ndarray) -> List[int]:
+    flat = mask.astype(np.uint8).ravel(order="F")
+    if flat.size == 0:
+        return []
+    counts = []
+    current = 0
+    run_length = 0
+    for value in flat:
+        value = int(value)
+        if value == current:
+            run_length += 1
+        else:
+            counts.append(run_length)
+            current = value
+            run_length = 1
+    counts.append(run_length)
+    return counts
+
+
+class _FakeLazyRLEMasks:
+    def __init__(self, image_size: Tuple[int, int], masks: List[np.ndarray]) -> None:
+        self.image_size = image_size
+        counts = [_uncompressed_counts(mask) for mask in masks]
+        max_len = max(len(c) for c in counts)
+        self._rle_counts_cpu = np.zeros((len(counts), max_len), dtype=np.int32)
+        self._rle_lengths_cpu = np.asarray([len(c) for c in counts], dtype=np.int32)
+        for i, count in enumerate(counts):
+            self._rle_counts_cpu[i, : len(count)] = count
+
+    def _ensure_rle_cpu(self) -> None:
+        pass
+
+
+def _deterministic_masks() -> List[np.ndarray]:
+    masks = []
+
+    masks.append(np.zeros((12, 14), dtype=np.uint8))
+
+    single_pixels = np.zeros((12, 14), dtype=np.uint8)
+    single_pixels[0, 0] = 1
+    single_pixels[5, 7] = 1
+    single_pixels[11, 13] = 1
+    masks.append(single_pixels)
+
+    full = np.ones((12, 14), dtype=np.uint8)
+    masks.append(full)
+
+    touching_border = np.zeros((12, 14), dtype=np.uint8)
+    touching_border[0:8, 0:5] = 1
+    touching_border[4:12, 9:14] = 1
+    masks.append(touching_border)
+
+    ring = np.zeros((18, 20), dtype=np.uint8)
+    ring[2:16, 2:18] = 1
+    ring[5:13, 6:14] = 0
+    masks.append(ring)
+
+    side_by_side_holes = np.zeros((18, 22), dtype=np.uint8)
+    side_by_side_holes[2:16, 2:20] = 1
+    side_by_side_holes[5:13, 5:8] = 0
+    side_by_side_holes[5:13, 13:16] = 0
+    masks.append(side_by_side_holes)
+
+    diagonal = np.zeros((16, 16), dtype=np.uint8)
+    for i in range(2, 14):
+        diagonal[i, i] = 1
+        diagonal[i, i - 1] = 1
+    masks.append(diagonal)
+
+    equal_length_components = np.zeros((16, 20), dtype=np.uint8)
+    equal_length_components[2:6, 2:6] = 1
+    equal_length_components[10:14, 14:18] = 1
+    masks.append(equal_length_components)
+
+    jagged = np.zeros((24, 26), dtype=np.uint8)
+    jagged[3:20, 4:22] = 1
+    jagged[7:11, 8:20] = 0
+    jagged[14:18, 9:17] = 0
+    jagged[5:9, 21:24] = 1
+    jagged[18:23, 2:9] = 1
+    masks.append(jagged)
+
+    return masks
+
+
+def test_rle_masks_to_polygons_matches_legacy_dense_path_on_adversarial_masks() -> None:
+    for mask in _deterministic_masks():
+        instances = _to_instances([mask])
+
+        actual = rle_masks_to_polygons(masks=instances)
+        expected = _legacy_rle_masks2poly(masks=instances)
+
+        _assert_polygons_exactly_equal(actual=actual, expected=expected)
+
+
+def test_rle_masks_to_polygons_matches_legacy_dense_path_on_random_masks() -> None:
+    rng = np.random.default_rng(20260530)
+    for height, width in [(1, 1), (2, 3), (5, 7), (16, 17), (31, 29), (64, 64)]:
+        masks = []
+        for density in [0.0, 0.01, 0.05, 0.15, 0.35, 0.65, 1.0]:
+            for _ in range(8):
+                masks.append((rng.random((height, width)) < density).astype(np.uint8))
+        instances = _to_instances(masks)
+
+        actual = rle_masks_to_polygons(masks=instances)
+        expected = _legacy_rle_masks2poly(masks=instances)
+
+        _assert_polygons_exactly_equal(actual=actual, expected=expected)
+
+
+def test_rle_masks_to_polygons_matches_legacy_dense_path_for_lazy_uncompressed_counts() -> (
+    None
+):
+    for mask in _deterministic_masks():
+        legacy_instances = _to_instances([mask])
+        lazy_instances = _FakeLazyRLEMasks(
+            image_size=legacy_instances.image_size,
+            masks=[mask],
+        )
+
+        actual = rle_masks_to_polygons(masks=lazy_instances)
+        expected = _legacy_rle_masks2poly(masks=legacy_instances)
+
+        _assert_polygons_exactly_equal(actual=actual, expected=expected)
+
+
+def test_adapter_rle_masks2poly_matches_legacy_dense_path() -> None:
+    from inference.core.models.inference_models_adapters import rle_masks2poly
+
+    for mask in _deterministic_masks():
+        instances = _to_instances([mask])
+
+        actual = rle_masks2poly(masks=instances)
+        expected = _legacy_rle_masks2poly(masks=instances)
+
+        _assert_polygons_exactly_equal(actual=actual, expected=expected)

From d188bcfd3682f4a4f04f0c37a3c2a60033da7bc0 Mon Sep 17 00:00:00 2001
From: Aseem Saxena <aseem.bits@gmail.com>
Date: Tue, 2 Jun 2026 01:37:40 +0000
Subject: [PATCH 36/76] Improve RF-DETR stream pipeline scheduling

---
 .../core/entities/responses/inference.py      |  89 +++
 .../interfaces/stream/inference_pipeline.py   | 130 ++++-
 .../stream/model_handlers/workflows.py        | 114 +++-
 .../core/models/inference_models_adapters.py  | 516 ++++++++++++++++--
 .../roboflow/instance_segmentation/v3.py      | 198 ++++++-
 .../models/base/instance_segmentation.py      | 171 +++++-
 .../inference_models/models/common/trt.py     |  86 ++-
 .../rfdetr_instance_segmentation_trt.py       |  91 +++
 8 files changed, 1289 insertions(+), 106 deletions(-)

diff --git a/inference/core/entities/responses/inference.py b/inference/core/entities/responses/inference.py
index 912dbd98f3..3fe67f536e 100644
--- a/inference/core/entities/responses/inference.py
+++ b/inference/core/entities/responses/inference.py
@@ -1,4 +1,5 @@
 import base64
+from dataclasses import dataclass, field
 from typing import Any, Dict, List, Literal, Optional, Union
 from uuid import uuid4
 
@@ -273,6 +274,94 @@ class InstanceSegmentationInferenceResponse(
     ]
 
 
+# Dataclass twins used on the workflow-local fast path in
+# `InferenceModelsInstanceSegmentationAdapter.postprocess` when
+# `kwargs["source"] == "workflow-execution"`. The workflow block consumes
+# a plain dict via `_is_response_dc_to_dict` and never needs the pydantic
+# interface. HTTP / cache / visualization paths still receive the pydantic
+# `InstanceSegmentationInferenceResponse` because they use
+# `source != "workflow-execution"`.
+@dataclass(slots=True)
+class PointDC:
+    x: float
+    y: float
+
+
+@dataclass(slots=True)
+class InferenceResponseImageDC:
+    width: int
+    height: int
+
+
+@dataclass(slots=True)
+class InstanceSegmentationPredictionDC:
+    x: float
+    y: float
+    width: float
+    height: float
+    confidence: float
+    class_name: str  # serialized as "class" in the dict form
+    class_id: int
+    points: list  # list[PointDC]
+    detection_id: str = field(default_factory=lambda: str(uuid4()))
+    parent_id: object = None
+    class_confidence: object = None
+
+
+@dataclass(slots=True)
+class InstanceSegmentationInferenceResponseDC:
+    predictions: list  # list[InstanceSegmentationPredictionDC]
+    image: InferenceResponseImageDC
+    # `Model.infer_from_request` assigns .time and .inference_id after
+    # construction (see inference/core/models/base.py:154-157); they're
+    # declared here so the slotted dataclass permits the reassignment.
+    inference_id: object = None
+    frame_id: object = None
+    time: object = None
+    visualization: object = None
+    # Internal stream-pipeline fast path: lets workflow execution carry a
+    # response future through Model.infer_from_request without blocking the
+    # inference thread. `_is_response_dc_to_dict` intentionally ignores it.
+    _async_response_future: object = None
+
+
+def _is_pred_dc_to_dict(p: InstanceSegmentationPredictionDC) -> dict:
+    """Bit-equivalent to `InstanceSegmentationPrediction(...).model_dump(by_alias=True, exclude_none=True)`."""
+    d = {
+        "x": p.x,
+        "y": p.y,
+        "width": p.width,
+        "height": p.height,
+        "confidence": p.confidence,
+        "class": p.class_name,  # alias
+        "class_id": p.class_id,
+        "detection_id": p.detection_id,
+        "points": [{"x": pt.x, "y": pt.y} for pt in p.points],
+    }
+    if p.class_confidence is not None:
+        d["class_confidence"] = p.class_confidence
+    if p.parent_id is not None:
+        d["parent_id"] = p.parent_id
+    return d
+
+
+def _is_response_dc_to_dict(r: InstanceSegmentationInferenceResponseDC) -> dict:
+    """Bit-equivalent to `InstanceSegmentationInferenceResponse(...).model_dump(by_alias=True, exclude_none=True)`."""
+    d = {
+        "image": {"width": r.image.width, "height": r.image.height},
+        "predictions": [_is_pred_dc_to_dict(p) for p in r.predictions],
+    }
+    if r.inference_id is not None:
+        d["inference_id"] = r.inference_id
+    if r.frame_id is not None:
+        d["frame_id"] = r.frame_id
+    if r.time is not None:
+        d["time"] = r.time
+    if r.visualization is not None:
+        d["visualization"] = r.visualization
+    return d
+
+
 class SemanticSegmentationInferenceResponse(
     CvInferenceResponse, WithVisualizationResponse
 ):
diff --git a/inference/core/interfaces/stream/inference_pipeline.py b/inference/core/interfaces/stream/inference_pipeline.py
index 77282bf703..a0306b37c6 100644
--- a/inference/core/interfaces/stream/inference_pipeline.py
+++ b/inference/core/interfaces/stream/inference_pipeline.py
@@ -1,4 +1,4 @@
-from concurrent.futures import ThreadPoolExecutor
+from concurrent.futures import Future, ThreadPoolExecutor
 from datetime import datetime
 from enum import Enum
 from functools import partial
@@ -38,12 +38,13 @@
 )
 from inference.core.interfaces.stream.entities import (
     AnyPrediction,
+    InferenceHandlerResult,
     InferenceHandler,
     ModelConfig,
     SinkHandler,
 )
 from inference.core.interfaces.stream.model_handlers.roboflow_models import (
-    default_process_frame,
+    RoboflowModelHandler,
 )
 from inference.core.interfaces.stream.sinks import active_learning_sink, multi_sink
 from inference.core.interfaces.stream.utils import (
@@ -59,6 +60,7 @@
 from inference.core.managers.decorators.fixed_size_cache import WithFixedSizeCache
 from inference.core.registries.roboflow import RoboflowModelRegistry
 from inference.core.utils.function import experimental
+from inference.core.utils.nsight import nsight_frame_label, nsight_mark
 from inference.core.workflows.core_steps.common.entities import StepExecutionMode
 from inference.core.workflows.execution_engine.profiling.core import (
     BaseWorkflowsProfiler,
@@ -252,8 +254,9 @@ def init(
             tradeoff_factor=tradeoff_factor,
         )
         model = get_model(model_id=model_id, api_key=api_key)
-        on_video_frame = partial(
-            default_process_frame, model=model, inference_config=inference_config
+        on_video_frame = RoboflowModelHandler(
+            model=model,
+            inference_config=inference_config,
         )
         active_learning_middleware = NullActiveLearningMiddleware()
         if active_learning_enabled is None:
@@ -653,9 +656,7 @@ def init_with_workflow(
                 workflow_id=workflow_id,
                 profiler=profiler,
             )
-            workflow_runner = WorkflowRunner()
-            on_video_frame = partial(
-                workflow_runner.run_workflow,
+            on_video_frame = WorkflowRunner(
                 workflows_parameters=workflows_parameters,
                 execution_engine=execution_engine,
                 image_input_name=image_input_name,
@@ -915,21 +916,12 @@ def _execute_inference(self) -> None:
                 self._watchdog.on_model_inference_started(
                     frames=video_frames,
                 )
-                predictions = self._on_video_frame(video_frames)
-                self._watchdog.on_model_prediction_ready(
-                    frames=video_frames,
-                )
-                self._predictions_queue.put((predictions, video_frames))
-                send_inference_pipeline_status_update(
-                    severity=UpdateSeverity.DEBUG,
-                    event_type=INFERENCE_COMPLETED_EVENT,
-                    payload={
-                        "frames_ids": [f.frame_id for f in video_frames],
-                        "frames_timestamps": [f.frame_timestamp for f in video_frames],
-                        "sources_id": [f.source_id for f in video_frames],
-                    },
-                    status_update_handlers=self._status_update_handlers,
+                inference_result = self._on_video_frame(video_frames)
+                self._queue_inference_result(
+                    inference_result=inference_result,
+                    fallback_video_frames=video_frames,
                 )
+            self._drain_inference_handler()
 
         except Exception as error:
             payload = {
@@ -962,13 +954,86 @@ def _dispatch_inference_results(self) -> None:
                 self._predictions_queue.task_done()
                 break
             predictions, video_frames = inference_results
+            frame_id = _video_frames_trace_id(video_frames=video_frames)
+            nsight_mark(nsight_frame_label(frame_id, "dispatch_accept_result"))
+            predictions = _resolve_prediction_futures(predictions)
+            nsight_mark(nsight_frame_label(frame_id, "dispatch_predictions_resolved"))
             if self._on_prediction is not None:
                 self._handle_predictions_dispatching(
                     predictions=predictions,
                     video_frames=video_frames,
                 )
+            nsight_mark(nsight_frame_label(frame_id, "cpu_full_complete"))
             self._predictions_queue.task_done()
 
+    def _queue_inference_result(
+        self,
+        inference_result: Optional[Union[List[AnyPrediction], InferenceHandlerResult]],
+        fallback_video_frames: List[VideoFrame],
+    ) -> None:
+        normalised_result = self._normalise_inference_result(
+            inference_result=inference_result,
+            fallback_video_frames=fallback_video_frames,
+        )
+        if normalised_result is None:
+            return None
+        predictions, video_frames = normalised_result
+        self._watchdog.on_model_prediction_ready(
+            frames=video_frames,
+        )
+        self._predictions_queue.put((predictions, video_frames))
+        send_inference_pipeline_status_update(
+            severity=UpdateSeverity.DEBUG,
+            event_type=INFERENCE_COMPLETED_EVENT,
+            payload={
+                "frames_ids": [f.frame_id for f in video_frames],
+                "frames_timestamps": [f.frame_timestamp for f in video_frames],
+                "sources_id": [f.source_id for f in video_frames],
+            },
+            status_update_handlers=self._status_update_handlers,
+        )
+
+    def _normalise_inference_result(
+        self,
+        inference_result: Optional[Union[List[AnyPrediction], InferenceHandlerResult]],
+        fallback_video_frames: List[VideoFrame],
+    ) -> Optional[Tuple[List[AnyPrediction], List[VideoFrame]]]:
+        if inference_result is None:
+            return None
+        if isinstance(inference_result, InferenceHandlerResult):
+            video_frames = (
+                inference_result.video_frames
+                if inference_result.video_frames is not None
+                else fallback_video_frames
+            )
+            if len(video_frames) == 0:
+                return None
+            return inference_result.predictions, video_frames
+        if len(fallback_video_frames) == 0:
+            return None
+        return inference_result, fallback_video_frames
+
+    def _drain_inference_handler(self) -> None:
+        flush_fn = getattr(self._on_video_frame, "flush", None)
+        if not callable(flush_fn):
+            return None
+        flush_result = flush_fn()
+        if flush_result is None:
+            return None
+        if isinstance(flush_result, list) and all(
+            isinstance(result, InferenceHandlerResult) for result in flush_result
+        ):
+            for result in flush_result:
+                self._queue_inference_result(
+                    inference_result=result,
+                    fallback_video_frames=[],
+                )
+            return None
+        self._queue_inference_result(
+            inference_result=flush_result,
+            fallback_video_frames=[],
+        )
+
     def _handle_predictions_dispatching(
         self,
         predictions: List[AnyPrediction],
@@ -1068,3 +1133,26 @@ def send_inference_pipeline_status_update(
             handler(status_update)
         except Exception as error:
             logger.warning(f"Could not execute handler update. Cause: {error}")
+
+
+def _resolve_prediction_futures(value: Any) -> Any:
+    if isinstance(value, Future):
+        return _resolve_prediction_futures(value.result())
+    if isinstance(value, list):
+        return [_resolve_prediction_futures(element) for element in value]
+    if isinstance(value, tuple):
+        return tuple(_resolve_prediction_futures(element) for element in value)
+    if isinstance(value, dict):
+        return {
+            key: _resolve_prediction_futures(element) for key, element in value.items()
+        }
+    return value
+
+
+def _video_frames_trace_id(video_frames: List[VideoFrame]) -> Optional[str]:
+    if not video_frames:
+        return None
+    frame_ids = [str(video_frame.frame_id) for video_frame in video_frames]
+    if len(frame_ids) == 1:
+        return frame_ids[0]
+    return ",".join(frame_ids)
diff --git a/inference/core/interfaces/stream/model_handlers/workflows.py b/inference/core/interfaces/stream/model_handlers/workflows.py
index 820bf855fc..5e2caac87a 100644
--- a/inference/core/interfaces/stream/model_handlers/workflows.py
+++ b/inference/core/interfaces/stream/model_handlers/workflows.py
@@ -1,24 +1,82 @@
-from typing import List, Optional
+from typing import Any, Dict, List, Optional
 
 from inference.core.interfaces.camera.entities import VideoFrame
+from inference.core.interfaces.stream.entities import InferenceHandlerResult
+from inference.core.interfaces.stream.model_handlers.workflows_context import (
+    workflow_stream_flush_context,
+)
+from inference.core.utils.nsight import (
+    nsight_frame_context,
+    nsight_frame_label,
+    nsight_mark,
+)
 from inference.core.workflows.execution_engine.core import ExecutionEngine
 from inference.core.workflows.execution_engine.entities.base import VideoMetadata
 
 
 class WorkflowRunner:
-
-    def run_workflow(
+    def __init__(
         self,
-        video_frames: List[VideoFrame],
-        workflows_parameters: Optional[dict],
+        workflows_parameters: Optional[Dict[str, Any]],
         execution_engine: ExecutionEngine,
         image_input_name: str,
         video_metadata_input_name: str,
         serialize_results: bool = False,
         _is_preview: bool = False,
-    ) -> List[dict]:
-        if workflows_parameters is None:
-            workflows_parameters = {}
+    ):
+        self._workflows_parameters = workflows_parameters
+        self._execution_engine = execution_engine
+        self._image_input_name = image_input_name
+        self._video_metadata_input_name = video_metadata_input_name
+        self._serialize_results = serialize_results
+        self._is_preview = _is_preview
+        self._pending_video_frames: List[List[VideoFrame]] = []
+
+    def __call__(
+        self, video_frames: List[VideoFrame]
+    ) -> Optional[InferenceHandlerResult]:
+        frame_id = _video_frames_trace_id(video_frames=video_frames)
+        with nsight_frame_context(frame_id=frame_id):
+            nsight_mark(nsight_frame_label(frame_id, "cpu_start"))
+            predictions = self._run_workflow(video_frames=video_frames)
+            nsight_mark(nsight_frame_label(frame_id, "gpu_submitted"))
+        stream_buffer_depth = self._stream_buffer_depth()
+        if stream_buffer_depth <= 0:
+            self._pending_video_frames.clear()
+            return InferenceHandlerResult(
+                predictions=predictions,
+                video_frames=video_frames,
+            )
+        self._pending_video_frames.append(video_frames)
+        if len(self._pending_video_frames) <= stream_buffer_depth:
+            return None
+        emit_video_frames = self._pending_video_frames.pop(0)
+        return InferenceHandlerResult(
+            predictions=predictions,
+            video_frames=emit_video_frames,
+        )
+
+    def flush(self) -> Optional[List[InferenceHandlerResult]]:
+        if self._stream_buffer_depth() <= 0:
+            self._pending_video_frames.clear()
+            return None
+        if not self._pending_video_frames:
+            return None
+        results = []
+        while self._pending_video_frames:
+            emit_video_frames = self._pending_video_frames.pop(0)
+            with workflow_stream_flush_context():
+                predictions = self._run_workflow(video_frames=emit_video_frames)
+            results.append(
+                InferenceHandlerResult(
+                    predictions=predictions,
+                    video_frames=emit_video_frames,
+                )
+            )
+        return results
+
+    def _run_workflow(self, video_frames: List[VideoFrame]) -> List[dict]:
+        workflows_parameters: Dict[str, Any] = dict(self._workflows_parameters or {})
         # TODO: pass fps reflecting each stream to workflows_parameters
         fps = video_frames[0].fps
         if video_frames[0].measured_fps:
@@ -41,7 +99,7 @@ def run_workflow(
             )
             for video_frame in video_frames
         ]
-        workflows_parameters[image_input_name] = [
+        workflows_parameters[self._image_input_name] = [
             {
                 "type": "numpy_object",
                 "value": video_frame.image,
@@ -51,10 +109,40 @@ def run_workflow(
                 video_frames, video_metadata_for_images
             )
         ]
-        workflows_parameters[video_metadata_input_name] = video_metadata_for_images
-        return execution_engine.run(
+        workflows_parameters[self._video_metadata_input_name] = (
+            video_metadata_for_images
+        )
+        return self._execution_engine.run(
             runtime_parameters=workflows_parameters,
             fps=fps,
-            serialize_results=serialize_results,
-            _is_preview=_is_preview,
+            serialize_results=self._serialize_results,
+            _is_preview=self._is_preview,
         )
+
+    def _uses_stream_buffering(self) -> bool:
+        return self._stream_buffer_depth() > 0
+
+    def _stream_buffer_depth(self) -> int:
+        engine = getattr(self._execution_engine, "_engine", None)
+        compiled_workflow = getattr(engine, "_compiled_workflow", None)
+        steps = getattr(compiled_workflow, "steps", {})
+        stream_buffer_depth = 0
+        for initialised_step in steps.values():
+            step_instance = getattr(initialised_step, "step", None)
+            is_stream_pipelined = getattr(step_instance, "is_stream_pipelined", None)
+            if callable(is_stream_pipelined) and is_stream_pipelined():
+                get_depth = getattr(step_instance, "stream_pipeline_depth", None)
+                if callable(get_depth):
+                    stream_buffer_depth = max(stream_buffer_depth, int(get_depth()))
+                else:
+                    stream_buffer_depth = max(stream_buffer_depth, 1)
+        return stream_buffer_depth
+
+
+def _video_frames_trace_id(video_frames: List[VideoFrame]) -> Optional[str]:
+    if not video_frames:
+        return None
+    frame_ids = [str(video_frame.frame_id) for video_frame in video_frames]
+    if len(frame_ids) == 1:
+        return frame_ids[0]
+    return ",".join(frame_ids)
diff --git a/inference/core/models/inference_models_adapters.py b/inference/core/models/inference_models_adapters.py
index 310f0a1ff3..9639c87960 100644
--- a/inference/core/models/inference_models_adapters.py
+++ b/inference/core/models/inference_models_adapters.py
@@ -1,8 +1,11 @@
 import base64
 import io
+import os
+from collections import deque
+from concurrent.futures import Future, ThreadPoolExecutor
 from io import BytesIO
 from time import perf_counter
-from typing import Any, List, Optional, Tuple, Union
+from typing import Any, Deque, List, Optional, Tuple, Union
 
 import numpy as np
 import torch
@@ -16,9 +19,12 @@
     ClassificationInferenceResponse,
     InferenceResponse,
     InferenceResponseImage,
+    InferenceResponseImageDC,
     InstanceSegmentationInferenceResponse,
+    InstanceSegmentationInferenceResponseDC,
     InstanceSegmentationPrediction,
     InstanceSegmentationRLEPrediction,
+    InstanceSegmentationPredictionDC,
     Keypoint,
     KeypointsDetectionInferenceResponse,
     KeypointsPrediction,
@@ -26,6 +32,7 @@
     ObjectDetectionInferenceResponse,
     ObjectDetectionPrediction,
     Point,
+    PointDC,
     SemanticSegmentationInferenceResponse,
     SemanticSegmentationPrediction,
 )
@@ -42,7 +49,12 @@
 from inference.core.models.base import Model
 from inference.core.roboflow_api import get_extra_weights_provider_headers
 from inference.core.utils.image_utils import load_image_bgr, load_image_rgb
-from inference.core.utils.nsight import nsight_range
+from inference.core.utils.nsight import (
+    nsight_current_frame_id,
+    nsight_frame_label,
+    nsight_mark,
+    nsight_range,
+)
 from inference.core.utils.postprocess import bitpacked_masks2poly, masks2poly
 from inference.core.utils.rle_to_polygon import rle_masks_to_polygons
 from inference.core.utils.visualisation import draw_detection_predictions
@@ -62,6 +74,10 @@
     PreProcessingOverrides,
     SemanticSegmentationModel,
 )
+from inference_models.models.base.instance_segmentation import InferenceFuture
+from inference_models.models.base.semantic_segmentation import (
+    SemanticSegmentationResult,
+)
 from inference_models.models.base.types import InstancesRLEMasks, PreprocessingMetadata
 from inference_models.models.common.rle_utils import torch_mask_to_coco_rle
 
@@ -89,6 +105,31 @@
     "#FF39C9",
 ]
 
+# Pinned host buffers for async DtoH on the full-postproc Triton fast path.
+# Keyed by (name, dtype); reused across frames provided the cached buffer is
+# at least as large as the requested shape in every dimension.
+PINNED_HOST_BUFFERS: dict = {}
+
+
+def get_pinned_buffer(name: str, shape, dtype: torch.dtype) -> torch.Tensor:
+    key = (name, dtype)
+    buf = PINNED_HOST_BUFFERS.get(key)
+    if buf is not None and all(buf.shape[i] >= shape[i] for i in range(len(shape))):
+        return buf[tuple(slice(0, s) for s in shape)]
+    buf = torch.empty(shape, dtype=dtype, pin_memory=True)
+    PINNED_HOST_BUFFERS[key] = buf
+    return buf
+
+
+class _PipelinePrimingSentinel:
+    __slots__ = ()
+
+    def __repr__(self) -> str:  # pragma: no cover - debug only
+        return "<_PIPELINE_PRIMING>"
+
+
+_PIPELINE_PRIMING = _PipelinePrimingSentinel()
+
 
 class InferenceModelsObjectDetectionAdapter(Model):
     def __init__(self, model_id: str, api_key: str = None, **kwargs):
@@ -273,6 +314,23 @@ def __init__(self, model_id: str, api_key: str = None, **kwargs):
             **kwargs,
         )
         self.class_names = list(self._model.class_names)
+        # Stream pipelining: depth=1 means original synchronous behavior
+        # (preprocess→forward→postprocess on each frame, in order). depth=2
+        # means two stages in parallel: while the GPU works on the current
+        # frame, the CPU prepares/submits the next frame, then harvests the
+        # previous response. The response delay is therefore depth - 1 frames.
+        self._pipeline_depth = max(1, int(os.getenv("RFDETR_PIPELINE_DEPTH", "1")))
+        self._response_delay = max(1, self._pipeline_depth - 1)
+        # Per-adapter in-flight futures + metadata. Not thread-safe; the
+        # InferencePipeline is single-producer and the adapter is owned by a
+        # single worker.
+        self._pending_futures: Deque[
+            Tuple[InferenceFuture, PreprocessingMetadata, dict]
+        ] = deque()
+        self._response_executor: Optional[ThreadPoolExecutor] = None
+        self._response_futures: Deque[
+            Future[List[InstanceSegmentationInferenceResponse]]
+        ] = deque()
 
     def map_inference_kwargs(self, kwargs: dict) -> dict:
         kwargs["input_color_format"] = "bgr"
@@ -309,13 +367,198 @@ def preprocess(self, image: Any, **kwargs):
             for v in images
         ]
         mapped_kwargs = self.map_inference_kwargs(kwargs)
-        return self._model.pre_process(np_images, **mapped_kwargs)
+        trace_frame_id = nsight_current_frame_id()
+        nsight_mark(nsight_frame_label(trace_frame_id, "gpu_start"))
+        with nsight_range(nsight_frame_label(trace_frame_id, "gpu_preprocess_submit")):
+            preprocessed = self._model.pre_process(np_images, **mapped_kwargs)
+        nsight_mark(nsight_frame_label(trace_frame_id, "gpu_preprocess_submitted"))
+        return preprocessed
 
     def predict(self, img_in, **kwargs):
         mapped_kwargs = self.map_inference_kwargs(kwargs)
-        return self._model.forward(img_in, **mapped_kwargs)
+        if self._pipeline_depth <= 1:
+            # Original path: forward on current frame, postprocess on
+            # current frame, all synchronous.
+            return self._model.forward(img_in, **mapped_kwargs)
+
+        mapped_kwargs["defer_count_to_adapter"] = (
+            kwargs.get("response_mask_format") != "rle"
+        )
+        mapped_kwargs["defer_postprocess_sync"] = True
+        mapped_kwargs["reuse_trt_graph_outputs"] = True
+        # Pipelined path: submit current frame forward and return its future.
+        # `postprocess()` immediately submits this frame's postprocess GPU
+        # work, then returns the oldest response once the configured frame
+        # delay has been reached.
+        trace_frame_id = nsight_current_frame_id()
+        with nsight_range(nsight_frame_label(trace_frame_id, "gpu_forward_submit")):
+            fut = self._model.forward_async(img_in, None, **mapped_kwargs)
+        nsight_mark(nsight_frame_label(trace_frame_id, "gpu_forward_submitted"))
+        fut._trace_frame_id = trace_frame_id  # type: ignore[attr-defined]
+        fut._adapter_kwargs = {  # type: ignore[attr-defined]
+            "mapped_kwargs": mapped_kwargs
+        }
+        return fut
+
+    def flush(self) -> List[InstanceSegmentationInferenceResponse]:
+        """Drain the tail of the pipelined queue.
+
+        Returns responses for any in-flight frames whose forward/postprocess
+        GPU work was submitted but whose CPU-visible response has not yet been
+        materialized. Callers that use `RFDETR_PIPELINE_DEPTH>=2` MUST invoke
+        this at stream end or the final frames will be dropped.
+        """
+        if self._pipeline_depth <= 1:
+            return []
+        self._submit_all_pending_responses()
+        responses: List[InstanceSegmentationInferenceResponse] = []
+        while self._response_futures:
+            responses.extend(self._response_futures.popleft().result())
+        return responses
+
+    def _get_response_executor(self) -> ThreadPoolExecutor:
+        if self._response_executor is None:
+            self._response_executor = ThreadPoolExecutor(max_workers=1)
+        return self._response_executor
+
+    def _submit_future_gpu_work(
+        self,
+        fut: InferenceFuture,
+        meta: PreprocessingMetadata,
+        mapped_kwargs: dict,
+    ) -> None:
+        if getattr(fut, "_adapter_gpu_work_submitted", False):
+            return None
+        fut._meta = meta  # type: ignore[attr-defined]
+        fut._kwargs = mapped_kwargs  # type: ignore[attr-defined]
+        submit_gpu_work = getattr(fut, "submit_gpu_work", None)
+        if callable(submit_gpu_work):
+            trace_frame_id = getattr(fut, "_trace_frame_id", nsight_current_frame_id())
+            with nsight_range(
+                nsight_frame_label(trace_frame_id, "gpu_postprocess_submit")
+            ):
+                submit_gpu_work(meta)
+            nsight_mark(nsight_frame_label(trace_frame_id, "gpu_postprocess_submitted"))
+            fut._adapter_gpu_work_submitted = True  # type: ignore[attr-defined]
+
+    def _submit_response_build(
+        self,
+        fut: InferenceFuture,
+        meta: PreprocessingMetadata,
+        mapped_kwargs: dict,
+    ) -> None:
+        fut._meta = meta  # type: ignore[attr-defined]
+        fut._kwargs = mapped_kwargs  # type: ignore[attr-defined]
+        trace_frame_id = getattr(fut, "_trace_frame_id", nsight_current_frame_id())
+        fut._trace_frame_id = trace_frame_id  # type: ignore[attr-defined]
+        response_future = self._get_response_executor().submit(
+            self._finalize_future,
+            fut,
+            meta,
+            mapped_kwargs,
+        )
+        response_future._trace_frame_id = trace_frame_id  # type: ignore[attr-defined]
+        self._response_futures.append(response_future)
+
+    def _submit_ready_responses(self) -> None:
+        while len(self._pending_futures) > self._response_delay:
+            self._submit_response_build(*self._pending_futures.popleft())
+
+    def _submit_all_pending_responses(self) -> None:
+        while self._pending_futures:
+            self._submit_response_build(*self._pending_futures.popleft())
 
     def postprocess(
+        self,
+        predictions,
+        preprocess_return_metadata: PreprocessingMetadata,
+        **kwargs,
+    ) -> List[InstanceSegmentationInferenceResponse]:
+        if self._pipeline_depth <= 1:
+            return self._postprocess_sync(
+                predictions, preprocess_return_metadata, **kwargs
+            )
+        fut: InferenceFuture = predictions
+        mapped_kwargs = getattr(fut, "_adapter_kwargs", {}).get("mapped_kwargs", {})
+        self._submit_future_gpu_work(
+            fut,
+            preprocess_return_metadata,
+            mapped_kwargs,
+        )
+        self._pending_futures.append((fut, preprocess_return_metadata, mapped_kwargs))
+        self._submit_ready_responses()
+
+        if not self._response_futures:
+            return self._empty_responses_for_metadata(
+                preprocess_return_metadata=preprocess_return_metadata,
+                workflow_execution=kwargs.get("source") == "workflow-execution",
+            )
+
+        response_future = self._response_futures.popleft()
+        if kwargs.get("source") == "workflow-execution":
+            responses = self._empty_responses_for_metadata(
+                preprocess_return_metadata=preprocess_return_metadata,
+                workflow_execution=True,
+            )
+            if responses:
+                responses[0]._async_response_future = response_future
+            return responses
+        return response_future.result()
+
+    def _empty_responses_for_metadata(
+        self,
+        preprocess_return_metadata: PreprocessingMetadata,
+        workflow_execution: bool,
+    ) -> List[InstanceSegmentationInferenceResponse]:
+        if workflow_execution:
+            return [
+                InstanceSegmentationInferenceResponseDC(
+                    predictions=[],
+                    image=InferenceResponseImageDC(
+                        width=m.original_size.width,
+                        height=m.original_size.height,
+                    ),
+                )
+                for m in preprocess_return_metadata
+            ]
+        return [
+            InstanceSegmentationInferenceResponse(
+                predictions=[],
+                image=InferenceResponseImage(
+                    width=m.original_size.width,
+                    height=m.original_size.height,
+                ),
+            )
+            for m in preprocess_return_metadata
+        ]
+
+    def _finalize_future(
+        self,
+        fut: InferenceFuture,
+        preprocess_return_metadata: PreprocessingMetadata,
+        mapped_kwargs: dict,
+    ) -> List[InstanceSegmentationInferenceResponse]:
+        # Override the future's stashed meta (which was `None` at submit
+        # time) with the correct metadata for the frame whose forward pass
+        # the future represents. This is an allowed private-surface tweak
+        # because _DirectInferenceFuture's post_process is memoised.
+        fut._meta = preprocess_return_metadata  # type: ignore[attr-defined]
+        fut._kwargs = mapped_kwargs  # type: ignore[attr-defined]
+        trace_frame_id = getattr(fut, "_trace_frame_id", None)
+        nsight_mark(nsight_frame_label(trace_frame_id, "cpu_response_start"))
+        detections_list = fut.result()
+        for det in detections_list:
+            try:
+                det._trace_frame_id = trace_frame_id  # type: ignore[attr-defined]
+            except AttributeError:
+                pass
+        responses = self._build_responses_from_detections(
+            detections_list, preprocess_return_metadata, **mapped_kwargs
+        )
+        nsight_mark(nsight_frame_label(trace_frame_id, "cpu_response_complete"))
+        return responses
+
+    def _postprocess_sync(
         self,
         predictions: List[InstanceDetections],
         preprocess_return_metadata: PreprocessingMetadata,
@@ -323,31 +566,195 @@ def postprocess(
     ) -> List[InstanceSegmentationInferenceResponse]:
         return_in_rle = kwargs.get("response_mask_format") == "rle"
         mapped_kwargs = self.map_inference_kwargs(kwargs)
+        mapped_kwargs["defer_count_to_adapter"] = not return_in_rle
         detections_list = self._model.post_process(
             predictions, preprocess_return_metadata, **mapped_kwargs
         )
+        return self._build_responses_from_detections(
+            detections_list, preprocess_return_metadata, **kwargs
+        )
+
+    def _build_responses_from_detections(
+        self,
+        detections_list: List[InstanceDetections],
+        preprocess_return_metadata: PreprocessingMetadata,
+        **kwargs,
+    ) -> List[InstanceSegmentationInferenceResponse]:
+        return_in_rle = kwargs.get("response_mask_format") == "rle"
+        # Workflow callers consume a plain dict via `_is_response_dc_to_dict`;
+        # dataclasses avoid pydantic validation + `model_dump` overhead per
+        # frame. Keep the pydantic path for RLE responses and for non-workflow
+        # callers that rely on the response model type.
+        use_dc = kwargs.get("source") == "workflow-execution" and not return_in_rle
 
         responses: List[InstanceSegmentationInferenceResponse] = []
         for preproc_metadata, det in zip(preprocess_return_metadata, detections_list):
+            trace_frame_id = getattr(det, "_trace_frame_id", nsight_current_frame_id())
+            finalize_pending = getattr(det, "_finalize_pending_postproc", None)
+            if callable(finalize_pending):
+                with nsight_range(
+                    nsight_frame_label(trace_frame_id, "gpu_finish_wait")
+                ):
+                    det = finalize_pending()
+                try:
+                    det._trace_frame_id = trace_frame_id  # type: ignore[attr-defined]
+                except Exception:
+                    pass
+                nsight_mark(
+                    nsight_frame_label(trace_frame_id, "cpu_predictions_accepted")
+                )
             H = preproc_metadata.original_size.height
             W = preproc_metadata.original_size.width
 
-            xyxy = det.xyxy.detach().cpu().numpy()
-            confs = det.confidence.detach().cpu().numpy()
-            if isinstance(det.mask, torch.Tensor):
-                masks = det.mask.detach().cpu().numpy()
-                if return_in_rle:
-                    polys_or_rles = [
-                        torch_mask_to_coco_rle(mask=mask) for mask in masks
-                    ]
+            combined_gpu = getattr(det, "_combined_gpu", None)
+            mask_gpu = getattr(det, "_mask_gpu", None)
+            mask_packed_gpu = getattr(det, "_mask_packed_gpu", None)
+            mask_cpu = getattr(det, "_mask_cpu", None)
+            defer_count_to_adapter = getattr(det, "_defer_count_to_adapter", False)
+            done_event = getattr(det, "_postproc_done_event", None)
+            dense_mask_cuda = isinstance(mask_gpu, torch.Tensor) and mask_gpu.is_cuda
+            packed_mask_cuda = (
+                isinstance(mask_packed_gpu, torch.Tensor) and mask_packed_gpu.is_cuda
+            )
+            if (
+                not return_in_rle
+                and done_event is not None
+                and (dense_mask_cuda or packed_mask_cuda)
+            ):
+                device = mask_gpu.device if dense_mask_cuda else mask_packed_gpu.device
+                stream = torch.cuda.current_stream(device)
+                done_event.wait(stream)
+                nsight_mark(
+                    nsight_frame_label(trace_frame_id, "gpu_finish_wait_enqueued")
+                )
+
+                if (
+                    defer_count_to_adapter
+                    and isinstance(combined_gpu, torch.Tensor)
+                    and combined_gpu.is_cuda
+                ):
+                    combined_host = get_pinned_buffer(
+                        "combined_full",
+                        tuple(combined_gpu.shape),
+                        combined_gpu.dtype,
+                    )
+                    combined_host.copy_(combined_gpu, non_blocking=True)
+                    stream.synchronize()
+                    combined_np = combined_host.numpy()
+                    class_column = combined_np[:, 5]
+                    inactive_indices = np.flatnonzero(class_column < 0)
+                    n_survivors = (
+                        int(inactive_indices[0])
+                        if inactive_indices.size > 0
+                        else int(class_column.shape[0])
+                    )
+                    if n_survivors == 0:
+                        xyxy = np.empty((0, 4), dtype=np.int32)
+                        confs = np.empty((0,), dtype=np.float32)
+                        class_ids = np.empty((0,), dtype=np.int32)
+                        polys_or_rles = []
+                    else:
+                        combined_slice = combined_np[:n_survivors]
+                        xyxy = combined_slice[:, :4]
+                        confs = combined_slice[:, 4].view(np.float32)
+                        class_ids = combined_slice[:, 5]
+                        if packed_mask_cuda:
+                            packed_slice = mask_packed_gpu[:n_survivors]
+                            packed_host = get_pinned_buffer(
+                                "mask_packed",
+                                tuple(packed_slice.shape),
+                                packed_slice.dtype,
+                            )
+                            packed_host.copy_(packed_slice, non_blocking=True)
+                            stream.synchronize()
+                            polys_or_rles = bitpacked_masks2poly(
+                                packed_host.numpy(), width=W
+                            )
+                        else:
+                            mask_slice = mask_gpu[:n_survivors]
+                            mask_host = get_pinned_buffer(
+                                "mask", tuple(mask_slice.shape), mask_slice.dtype
+                            )
+                            mask_host.copy_(mask_slice, non_blocking=True)
+                            stream.synchronize()
+                            polys_or_rles = masks2poly(mask_host.numpy())
                 else:
-                    polys_or_rles = masks2poly(masks)
+                    n_survivors = int(det.xyxy.shape[0])
+                    if n_survivors == 0:
+                        xyxy = np.empty((0, 4), dtype=np.int32)
+                        confs = np.empty((0,), dtype=np.float32)
+                        class_ids = np.empty((0,), dtype=np.int32)
+                        polys_or_rles = []
+                    else:
+                        mask_slice = mask_gpu[:n_survivors]
+                        mask_host = get_pinned_buffer(
+                            "mask", tuple(mask_slice.shape), mask_slice.dtype
+                        )
+                        if (
+                            isinstance(combined_gpu, torch.Tensor)
+                            and combined_gpu.is_cuda
+                            and tuple(combined_gpu.shape)
+                            == (n_survivors, det.xyxy.shape[1] + 2)
+                        ):
+                            combined_slice = combined_gpu[:n_survivors]
+                            combined_host = get_pinned_buffer(
+                                "combined",
+                                tuple(combined_slice.shape),
+                                combined_slice.dtype,
+                            )
+                            combined_host.copy_(combined_slice, non_blocking=True)
+                            mask_host.copy_(mask_slice, non_blocking=True)
+                            stream.synchronize()
+                            combined_np = combined_host.numpy()
+                            xyxy = combined_np[:, :4]
+                            confs = combined_np[:, 4].view(np.float32)
+                            class_ids = combined_np[:, 5]
+                            polys_or_rles = masks2poly(mask_host.numpy())
+                        else:
+                            xyxy_host = get_pinned_buffer(
+                                "xyxy", tuple(det.xyxy.shape), det.xyxy.dtype
+                            )
+                            conf_host = get_pinned_buffer(
+                                "conf",
+                                tuple(det.confidence.shape),
+                                det.confidence.dtype,
+                            )
+                            class_host = get_pinned_buffer(
+                                "class_id",
+                                tuple(det.class_id.shape),
+                                det.class_id.dtype,
+                            )
+                            xyxy_host.copy_(det.xyxy, non_blocking=True)
+                            conf_host.copy_(det.confidence, non_blocking=True)
+                            class_host.copy_(det.class_id, non_blocking=True)
+                            mask_host.copy_(mask_slice, non_blocking=True)
+                            stream.synchronize()
+                            xyxy = xyxy_host.numpy()
+                            confs = conf_host.numpy()
+                            class_ids = class_host.numpy()
+                            polys_or_rles = masks2poly(mask_host.numpy())
+            elif not return_in_rle and isinstance(mask_cpu, np.ndarray):
+                xyxy = det.xyxy.detach().cpu().numpy()
+                confs = det.confidence.detach().cpu().numpy()
+                class_ids = det.class_id.detach().cpu().numpy()
+                polys_or_rles = masks2poly(mask_cpu)
             else:
-                if return_in_rle:
-                    polys_or_rles = det.mask.to_coco_rle_masks()
+                xyxy = det.xyxy.detach().cpu().numpy()
+                confs = det.confidence.detach().cpu().numpy()
+                if isinstance(det.mask, torch.Tensor):
+                    masks = det.mask.detach().cpu().numpy()
+                    if return_in_rle:
+                        polys_or_rles = [
+                            torch_mask_to_coco_rle(mask=mask) for mask in masks
+                        ]
+                    else:
+                        polys_or_rles = masks2poly(masks)
                 else:
-                    polys_or_rles = rle_masks2poly(det.mask)
-            class_ids = det.class_id.detach().cpu().numpy()
+                    if return_in_rle:
+                        polys_or_rles = det.mask.to_coco_rle_masks()
+                    else:
+                        polys_or_rles = rle_masks2poly(det.mask)
+                class_ids = det.class_id.detach().cpu().numpy()
 
             predictions: List[
                 Union[InstanceSegmentationPrediction, InstanceSegmentationRLEPrediction]
@@ -371,46 +778,71 @@ def postprocess(
                     and class_name not in kwargs["class_filter"]
                 ):
                     continue
-                if not return_in_rle:
+                if use_dc:
                     predictions.append(
-                        InstanceSegmentationPrediction(
+                        InstanceSegmentationPredictionDC(
                             x=cx,
                             y=cy,
                             width=w,
                             height=h,
                             confidence=float(conf),
+                            class_name=class_name,
+                            class_id=class_id_int,
                             points=[
-                                Point(x=point[0], y=point[1])
+                                PointDC(x=float(point[0]), y=float(point[1]))
                                 for point in mask_as_poly_or_rle
                             ],
-                            **{"class": class_name},
-                            class_id=class_id_int,
                         )
                     )
                 else:
-                    if isinstance(mask_as_poly_or_rle["counts"], bytes):
-                        mask_as_poly_or_rle["counts"] = mask_as_poly_or_rle[
-                            "counts"
-                        ].decode("ascii")
-                    predictions.append(
-                        InstanceSegmentationRLEPrediction(
-                            x=cx,
-                            y=cy,
-                            width=w,
-                            height=h,
-                            confidence=float(conf),
-                            rle=mask_as_poly_or_rle,
-                            **{"class": class_name},
-                            class_id=class_id_int,
+                    if not return_in_rle:
+                        predictions.append(
+                            InstanceSegmentationPrediction(
+                                x=cx,
+                                y=cy,
+                                width=w,
+                                height=h,
+                                confidence=float(conf),
+                                points=[
+                                    Point(x=point[0], y=point[1])
+                                    for point in mask_as_poly_or_rle
+                                ],
+                                **{"class": class_name},
+                                class_id=class_id_int,
+                            )
+                        )
+                    else:
+                        if isinstance(mask_as_poly_or_rle["counts"], bytes):
+                            mask_as_poly_or_rle["counts"] = mask_as_poly_or_rle[
+                                "counts"
+                            ].decode("ascii")
+                        predictions.append(
+                            InstanceSegmentationRLEPrediction(
+                                x=cx,
+                                y=cy,
+                                width=w,
+                                height=h,
+                                confidence=float(conf),
+                                rle=mask_as_poly_or_rle,
+                                **{"class": class_name},
+                                class_id=class_id_int,
+                            )
                         )
-                    )
 
-            responses.append(
-                InstanceSegmentationInferenceResponse(
-                    predictions=predictions,
-                    image=InferenceResponseImage(width=W, height=H),
+            if use_dc:
+                responses.append(
+                    InstanceSegmentationInferenceResponseDC(
+                        predictions=predictions,
+                        image=InferenceResponseImageDC(width=W, height=H),
+                    )
+                )
+            else:
+                responses.append(
+                    InstanceSegmentationInferenceResponse(
+                        predictions=predictions,
+                        image=InferenceResponseImage(width=W, height=H),
+                    )
                 )
-            )
         return responses
 
     def clear_cache(self, delete_from_disk: bool = True) -> None:
diff --git a/inference/core/workflows/core_steps/models/roboflow/instance_segmentation/v3.py b/inference/core/workflows/core_steps/models/roboflow/instance_segmentation/v3.py
index 6361b69654..f662d81629 100644
--- a/inference/core/workflows/core_steps/models/roboflow/instance_segmentation/v3.py
+++ b/inference/core/workflows/core_steps/models/roboflow/instance_segmentation/v3.py
@@ -1,3 +1,4 @@
+from concurrent.futures import Future, ThreadPoolExecutor
 from typing import List, Literal, Optional, Type, Union
 
 from pydantic import ConfigDict, Field, PositiveInt, model_validator
@@ -5,6 +6,10 @@
 from inference.core.entities.requests.inference import (
     InstanceSegmentationInferenceRequest,
 )
+from inference.core.entities.responses.inference import (
+    InstanceSegmentationInferenceResponseDC,
+    _is_response_dc_to_dict,
+)
 from inference.core.env import (
     HOSTED_INSTANCE_SEGMENTATION_URL,
     LOCAL_INFERENCE_API_URL,
@@ -12,7 +17,11 @@
     WORKFLOWS_REMOTE_EXECUTION_MAX_STEP_BATCH_SIZE,
     WORKFLOWS_REMOTE_EXECUTION_MAX_STEP_CONCURRENT_REQUESTS,
 )
+from inference.core.interfaces.stream.model_handlers.workflows_context import (
+    is_workflow_stream_flush_active,
+)
 from inference.core.managers.base import ModelManager
+from inference.core.utils.nsight import nsight_frame_label, nsight_mark, nsight_range
 from inference.core.workflows.core_steps.common.entities import StepExecutionMode
 from inference.core.workflows.core_steps.common.utils import (
     attach_parents_coordinates_to_batch_of_sv_detections,
@@ -232,6 +241,8 @@ def __init__(
         self._model_manager = model_manager
         self._api_key = api_key
         self._step_execution_mode = step_execution_mode
+        self._last_model_id: Optional[str] = None
+        self._stream_response_executor: Optional[ThreadPoolExecutor] = None
 
     @classmethod
     def get_init_parameters(cls) -> List[str]:
@@ -314,34 +325,57 @@ def run_locally(
         enforce_dense_masks_in_inference_models: bool,
     ) -> BlockResult:
         inference_images = [i.to_inference_format(numpy_preferred=True) for i in images]
-        request = InstanceSegmentationInferenceRequest(
-            api_key=self._api_key,
-            model_id=model_id,
-            image=inference_images,
-            disable_active_learning=disable_active_learning,
-            active_learning_target_dataset=active_learning_target_dataset,
-            class_agnostic_nms=class_agnostic_nms,
-            class_filter=class_filter,
-            confidence=confidence,
-            iou_threshold=iou_threshold,
-            max_detections=max_detections,
-            max_candidates=max_candidates,
-            mask_decode_mode=mask_decode_mode,
-            tradeoff_factor=tradeoff_factor,
-            source="workflow-execution",
-            enforce_dense_masks_in_inference_models=enforce_dense_masks_in_inference_models,
-        )
+        self._last_model_id = model_id
         self._model_manager.add_model(
             model_id=model_id,
             api_key=self._api_key,
         )
-        predictions = self._model_manager.infer_from_request_sync(
-            model_id=model_id, request=request
-        )
+        if is_workflow_stream_flush_active():
+            predictions = self._model_manager.flush(model_id=model_id)
+        else:
+            request = InstanceSegmentationInferenceRequest(
+                api_key=self._api_key,
+                model_id=model_id,
+                image=inference_images,
+                disable_active_learning=disable_active_learning,
+                active_learning_target_dataset=active_learning_target_dataset,
+                class_agnostic_nms=class_agnostic_nms,
+                class_filter=class_filter,
+                confidence=confidence,
+                iou_threshold=iou_threshold,
+                max_detections=max_detections,
+                max_candidates=max_candidates,
+                mask_decode_mode=mask_decode_mode,
+                tradeoff_factor=tradeoff_factor,
+                source="workflow-execution",
+                enforce_dense_masks_in_inference_models=enforce_dense_masks_in_inference_models,
+            )
+            predictions = self._model_manager.infer_from_request_sync(
+                model_id=model_id, request=request
+            )
         if not isinstance(predictions, list):
             predictions = [predictions]
+        async_response_future = self._extract_async_response_future(
+            predictions=predictions
+        )
+        if async_response_future is not None:
+            return self._submit_async_post_process_result(
+                predictions_future=async_response_future,
+                images=images,
+                class_filter=class_filter,
+                model_id=model_id,
+            )
+        # The adapter returns dataclass responses when source="workflow-execution"
+        # (cheaper construct + dict-walk than pydantic). Any other response type
+        # (e.g. if a non-rfdetr backend is bound to the same block) falls back
+        # to `model_dump`.
         predictions = [
-            e.model_dump(by_alias=True, exclude_none=True) for e in predictions
+            (
+                _is_response_dc_to_dict(e)
+                if isinstance(e, InstanceSegmentationInferenceResponseDC)
+                else e.model_dump(by_alias=True, exclude_none=True)
+            )
+            for e in predictions
         ]
         return self._post_process_result(
             images=images,
@@ -350,6 +384,128 @@ def run_locally(
             model_id=model_id,
         )
 
+    def _extract_async_response_future(
+        self,
+        predictions: List[object],
+    ) -> Optional[Future]:
+        for prediction in predictions:
+            async_response_future = getattr(prediction, "_async_response_future", None)
+            if isinstance(async_response_future, Future):
+                return async_response_future
+        return None
+
+    def _get_stream_response_executor(self) -> ThreadPoolExecutor:
+        if self._stream_response_executor is None:
+            self._stream_response_executor = ThreadPoolExecutor(max_workers=1)
+        return self._stream_response_executor
+
+    def _submit_async_post_process_result(
+        self,
+        predictions_future: Future,
+        images: Batch[WorkflowImageData],
+        class_filter: Optional[List[str]],
+        model_id: str,
+    ) -> BlockResult:
+        finalized_result_future = self._get_stream_response_executor().submit(
+            self._finalize_async_prediction_value,
+            predictions_future,
+            images,
+            class_filter,
+            model_id,
+        )
+        trace_frame_id = getattr(predictions_future, "_trace_frame_id", None)
+        finalized_result_future._trace_frame_id = (  # type: ignore[attr-defined]
+            trace_frame_id
+        )
+        return [
+            {
+                "inference_id": None,
+                "predictions": self._submit_async_prediction_selector(
+                    result_future=finalized_result_future,
+                    image_index=image_index,
+                    trace_frame_id=trace_frame_id,
+                ),
+                "model_id": model_id,
+            }
+            for image_index in range(len(images))
+        ]
+
+    def _submit_async_prediction_selector(
+        self,
+        result_future: Future,
+        image_index: int,
+        trace_frame_id: Optional[str],
+    ) -> Future:
+        prediction_future = self._get_stream_response_executor().submit(
+            self._select_async_prediction_value,
+            result_future,
+            image_index,
+        )
+        prediction_future._trace_frame_id = trace_frame_id  # type: ignore[attr-defined]
+        return prediction_future
+
+    def _finalize_async_prediction_value(
+        self,
+        predictions_future: Future,
+        images: Batch[WorkflowImageData],
+        class_filter: Optional[List[str]],
+        model_id: str,
+    ) -> BlockResult:
+        trace_frame_id = getattr(predictions_future, "_trace_frame_id", None)
+        nsight_mark(nsight_frame_label(trace_frame_id, "workflow_finalize_start"))
+        predictions = predictions_future.result()
+        if not isinstance(predictions, list):
+            predictions = [predictions]
+        predictions = [
+            (
+                _is_response_dc_to_dict(e)
+                if isinstance(e, InstanceSegmentationInferenceResponseDC)
+                else e.model_dump(by_alias=True, exclude_none=True)
+            )
+            for e in predictions
+        ]
+        with nsight_range(
+            nsight_frame_label(trace_frame_id, "workflow_prediction_convert")
+        ):
+            result = self._post_process_result(
+                images=images,
+                predictions=predictions,
+                class_filter=class_filter,
+                model_id=model_id,
+            )
+        nsight_mark(nsight_frame_label(trace_frame_id, "workflow_finalize_complete"))
+        return result
+
+    def _select_async_prediction_value(
+        self,
+        result_future: Future,
+        image_index: int,
+    ):
+        result = result_future.result()
+        if image_index >= len(result):
+            return []
+        return result[image_index]["predictions"]
+
+    def is_stream_pipelined(self) -> bool:
+        if self._step_execution_mode is not StepExecutionMode.LOCAL:
+            return False
+        if (
+            self._last_model_id is None
+            or self._last_model_id not in self._model_manager
+        ):
+            return False
+        model = self._model_manager[self._last_model_id]
+        return (
+            callable(getattr(model, "flush", None))
+            and getattr(model, "_pipeline_depth", 1) > 1
+        )
+
+    def stream_pipeline_depth(self) -> int:
+        if not self.is_stream_pipelined():
+            return 0
+        model = self._model_manager[self._last_model_id]
+        return max(0, int(getattr(model, "_pipeline_depth", 1)) - 1)
+
     def run_remotely(
         self,
         images: Batch[WorkflowImageData],
diff --git a/inference_models/inference_models/models/base/instance_segmentation.py b/inference_models/inference_models/models/base/instance_segmentation.py
index 92e4aed41a..d7e62070ea 100644
--- a/inference_models/inference_models/models/base/instance_segmentation.py
+++ b/inference_models/inference_models/models/base/instance_segmentation.py
@@ -1,6 +1,17 @@
 from abc import ABC, abstractmethod
 from dataclasses import dataclass
-from typing import Generic, List, Literal, Optional, Set, Tuple, Union
+from typing import (
+    Any,
+    Generic,
+    List,
+    Literal,
+    Optional,
+    Protocol,
+    Set,
+    Tuple,
+    Union,
+    runtime_checkable,
+)
 
 import numpy as np
 import supervision as sv
@@ -17,6 +28,105 @@
 InstanceSegmentationMaskFormat = Literal["dense", "rle"]
 
 
+_MISSING = object()
+
+
+@runtime_checkable
+class InferenceFuture(Protocol):
+    """Future-like handle over an in-flight inference request.
+
+    The returned object lets a caller start a subsequent ``infer_async`` call
+    while the GPU is still executing the previous one. Calling ``result()``
+    blocks on a single GPU event, then runs CPU-side post-processing and
+    returns the decoded detections. ``done()`` is a non-blocking probe.
+    """
+
+    def result(self) -> List["InstanceDetections"]: ...
+
+    def done(self) -> bool: ...
+
+
+class _DirectInferenceFuture:
+    """Concrete ``InferenceFuture`` backed by a single ``torch.cuda.Event``.
+
+    Holds the raw forward output plus the preprocessing metadata needed by
+    ``post_process``. The event is recorded on the stream that produced the
+    raw output; ``result()`` synchronizes on it before running CPU decode.
+    Post-process output is memoised so ``result()`` may be called repeatedly.
+    """
+
+    # No __slots__: adapters stash per-request context on the future
+    # (e.g. pipeline-depth-2 stashes `_adapter_kwargs` so `postprocess`
+    # can rebuild the decode call for the PREVIOUS frame even when the
+    # submit site passed `meta=None`). The Future is short-lived so the
+    # per-instance dict overhead is negligible.
+
+    def __init__(
+        self,
+        model: "InstanceSegmentationModel",
+        raw: Any,
+        meta: Any,
+        evt: Optional[torch.cuda.Event],
+        kwargs: dict,
+    ) -> None:
+        self._model = model
+        self._raw = raw
+        self._meta = meta
+        self._evt = evt
+        self._kwargs = kwargs
+        self._cached: Any = _MISSING
+
+    @property
+    def preprocess_metadata(self) -> Any:
+        """The metadata captured at ``pre_process`` time for this request."""
+        return self._meta
+
+    def done(self) -> bool:
+        if self._cached is not _MISSING:
+            return True
+        if self._evt is None:
+            return True
+        return self._evt.query()
+
+    def submit_gpu_work(self, meta: Any = None) -> None:
+        """Enqueue the ``post_process`` GPU work eagerly.
+
+        Under depth>=2 pipelining ``result()`` is intentionally delayed so
+        the source loop can prepare later frames. Without eager submission,
+        the postproc kernels are also delayed until that future is finalized,
+        leaving a bubble between the TensorRT produce event and postproc.
+
+        Calling ``submit_gpu_work`` from the adapter's ``postprocess`` step
+        enqueues the postproc stream wait immediately after the corresponding
+        TensorRT graph has been submitted. The host still does not block, and
+        ``result()`` later reuses the enqueued postproc result.
+
+        Idempotent: calling it once is enough; subsequent calls to
+        ``result()`` reuse the enqueued postproc result.
+        """
+        if self._cached is not _MISSING:
+            return
+        if meta is None:
+            meta = self._meta
+        else:
+            self._meta = meta
+        # `post_process` is expected to be non-blocking: it enqueues its
+        # CUDA kernels on a private stream and returns a handle/structure
+        # that the caller reads later. The host does NOT block here.
+        self._cached = self._model.post_process(self._raw, meta, **self._kwargs)
+
+    def result(self) -> List["InstanceDetections"]:
+        # No host sync here: post_process() enqueues its GPU work on a
+        # dedicated stream and uses stream.wait_event() internally to order
+        # itself after the forward stream. The final host sync happens where
+        # CPU-visible results are actually needed (DtoH copies in the adapter).
+        if self._cached is _MISSING:
+            self._cached = self._model.post_process(
+                self._raw, self._meta, **self._kwargs
+            )
+        return self._cached
+
+
 @dataclass
 class InstanceDetections:
     xyxy: torch.Tensor  # (n_boxes, 4)
@@ -113,10 +223,69 @@ def infer(
         images: Union[torch.Tensor, List[torch.Tensor], np.ndarray, List[np.ndarray]],
         **kwargs,
     ) -> List[InstanceDetections]:
+        # Synchronous direct path: pre_process → forward → post_process in
+        # sequence, with no per-call output cloning. The async variant
+        # (``infer_async``) exists for pipelined callers that need to
+        # submit frame N+1 before frame N's output buffers have been read
+        # — cloning makes those callers safe. Here, ``post_process``
+        # consumes the raw forward output immediately, so no clone is
+        # needed and we avoid the ~80µs of DtoD copies on the inference
+        # stream. This keeps the ``infer()`` entry point at maximum
+        # throughput for single-thread, single-model users.
         pre_processed_images, pre_processing_meta = self.pre_process(images, **kwargs)
         model_results = self.forward(pre_processed_images, **kwargs)
         return self.post_process(model_results, pre_processing_meta, **kwargs)
 
+    def infer_async(
+        self,
+        images: Union[torch.Tensor, List[torch.Tensor], np.ndarray, List[np.ndarray]],
+        **kwargs,
+    ) -> InferenceFuture:
+        """Submit an inference request and return a future.
+
+        The default implementation performs ``pre_process`` and ``forward``
+        synchronously, records a CUDA event on the current stream, and defers
+        ``post_process`` until ``result()`` is called on the returned future.
+        Subclasses that run ``forward`` on a dedicated stream should override
+        this to record the event on that stream (see the TRT model).
+        """
+        pre_processed_images, pre_processing_meta = self.pre_process(images, **kwargs)
+        return self.forward_async(pre_processed_images, pre_processing_meta, **kwargs)
+
+    def forward_async(
+        self,
+        pre_processed_images: PreprocessedInputs,
+        pre_processing_meta: PreprocessingMetadata,
+        **kwargs,
+    ) -> InferenceFuture:
+        """Run ``forward`` only and return a future pinned to that launch.
+
+        Separating this from ``infer_async`` lets the adapter interleave
+        preprocessing for frame N+1 with the forward pass for frame N on a
+        dedicated stream while holding a future whose ``result()`` will
+        decode frame N once its outputs are ready.
+        """
+        model_results = self.forward(pre_processed_images, **kwargs)
+        # Prefer a produce-event already recorded on the forward stream (eg.
+        # the TRT graph stream) so `done()` reflects true GPU completion
+        # without straddling a stream boundary. Fall back to recording on
+        # the current stream for models that don't expose one.
+        evt: Optional[torch.cuda.Event] = None
+        first = (
+            model_results[0]
+            if isinstance(model_results, (tuple, list))
+            else model_results
+        )
+        existing = getattr(first, "_trt_produce_event", None)
+        if existing is not None:
+            evt = existing
+        elif torch.cuda.is_available():
+            evt = torch.cuda.Event()
+            evt.record()
+        return _DirectInferenceFuture(
+            self, model_results, pre_processing_meta, evt, kwargs
+        )
+
     @abstractmethod
     def pre_process(
         self, images: Union[torch.Tensor, List[torch.Tensor]], **kwargs
diff --git a/inference_models/inference_models/models/common/trt.py b/inference_models/inference_models/models/common/trt.py
index 2d62408930..0c2f730b1c 100644
--- a/inference_models/inference_models/models/common/trt.py
+++ b/inference_models/inference_models/models/common/trt.py
@@ -78,6 +78,7 @@ class TRTCudaGraphState:
     input_buffer: torch.Tensor
     output_buffers: List[torch.Tensor]
     execution_context: trt.IExecutionContext
+    consumer_done_event: Optional[torch.cuda.Event] = None
 
 
 class TRTCudaGraphCache:
@@ -435,6 +436,7 @@ def infer_from_trt_engine(
     outputs: List[str],
     stream: Optional[torch.cuda.Stream] = None,
     trt_cuda_graph_cache: Optional[TRTCudaGraphCache] = None,
+    synchronize: bool = True,
 ) -> List[torch.Tensor]:
     """Run inference using a TensorRT engine, optionally with CUDA graph acceleration.
 
@@ -570,8 +572,10 @@ def infer_from_trt_engine(
             input_name=input_name,
             outputs=outputs,
             trt_cuda_graph_cache=trt_cuda_graph_cache,
+            synchronize=synchronize,
         )
-    stream.synchronize()
+    if synchronize:
+        stream.synchronize()
     return results
 
 
@@ -584,6 +588,7 @@ def _infer_from_trt_engine(
     input_name: str,
     outputs: List[str],
     trt_cuda_graph_cache: Optional[TRTCudaGraphCache] = None,
+    synchronize: bool = True,
 ) -> List[torch.Tensor]:
     if trt_config.static_batch_size is not None:
         min_batch_size = trt_config.static_batch_size
@@ -601,6 +606,7 @@ def _infer_from_trt_engine(
         min_batch_size=min_batch_size,
         max_batch_size=max_batch_size,
         trt_cuda_graph_cache=trt_cuda_graph_cache,
+        synchronize=synchronize,
     )
 
 
@@ -614,6 +620,7 @@ def _infer_from_trt_engine_with_batch_size_boundaries(
     min_batch_size: int,
     max_batch_size: int,
     trt_cuda_graph_cache: Optional[TRTCudaGraphCache] = None,
+    synchronize: bool = True,
 ) -> List[torch.Tensor]:
     if pre_processed_images.shape[0] <= max_batch_size:
         reminder = min_batch_size - pre_processed_images.shape[0]
@@ -637,6 +644,7 @@ def _infer_from_trt_engine_with_batch_size_boundaries(
             input_name=input_name,
             outputs=outputs,
             trt_cuda_graph_cache=trt_cuda_graph_cache,
+            synchronize=synchronize,
         )
         if reminder > 0:
             results = [r[:-reminder] for r in results]
@@ -667,6 +675,7 @@ def _infer_from_trt_engine_with_batch_size_boundaries(
             input_name=input_name,
             outputs=outputs,
             trt_cuda_graph_cache=trt_cuda_graph_cache,
+            synchronize=synchronize,
         )
         if reminder > 0:
             results = [r[:-reminder] for r in results]
@@ -683,6 +692,7 @@ def _execute_trt_engine(
     input_name: str,
     outputs: List[str],
     trt_cuda_graph_cache: Optional[TRTCudaGraphCache] = None,
+    synchronize: bool = True,
 ) -> List[torch.Tensor]:
     if trt_cuda_graph_cache is not None:
         input_shape = tuple(pre_processed_images.shape)
@@ -692,12 +702,17 @@ def _execute_trt_engine(
         if cache_key not in trt_cuda_graph_cache:
             LOGGER.debug("Capturing CUDA graph for shape %s", input_shape)
 
+            use_external = getattr(
+                pre_processed_images, "_trt_reuse_as_input_buffer", False
+            )
             results, trt_cuda_graph = _capture_cuda_graph(
                 pre_processed_images=pre_processed_images,
                 engine=engine,
                 device=device,
                 input_name=input_name,
                 outputs=outputs,
+                use_pre_processed_images_as_input_buffer=bool(use_external),
+                clone_outputs=synchronize,
             )
             trt_cuda_graph_cache[cache_key] = trt_cuda_graph
             return results
@@ -705,11 +720,34 @@ def _execute_trt_engine(
         else:
             trt_cuda_graph_state = trt_cuda_graph_cache[cache_key]
             stream = trt_cuda_graph_state.cuda_stream
+            consumer_done = trt_cuda_graph_state.consumer_done_event
+            if consumer_done is not None:
+                stream.wait_event(consumer_done)
+            input_ready = getattr(pre_processed_images, "_trt_ready_event", None)
             with torch.cuda.stream(stream):
-                trt_cuda_graph_state.input_buffer.copy_(pre_processed_images)
+                if input_ready is not None:
+                    stream.wait_event(input_ready)
+                if (
+                    trt_cuda_graph_state.input_buffer.data_ptr()
+                    != pre_processed_images.data_ptr()
+                ):
+                    trt_cuda_graph_state.input_buffer.copy_(pre_processed_images)
                 trt_cuda_graph_state.cuda_graph.replay()
-                results = [buf.clone() for buf in trt_cuda_graph_state.output_buffers]
-            stream.synchronize()
+                if synchronize:
+                    results = [
+                        buf.clone() for buf in trt_cuda_graph_state.output_buffers
+                    ]
+                else:
+                    results = list(trt_cuda_graph_state.output_buffers)
+                produce_event = torch.cuda.Event()
+                produce_event.record(stream)
+            if synchronize:
+                stream.synchronize()
+            _attach_trt_graph_metadata(
+                results=results,
+                trt_cuda_graph_state=trt_cuda_graph_state,
+                produce_event=produce_event,
+            )
             return results
 
     else:
@@ -752,14 +790,24 @@ def _capture_cuda_graph(
     device: torch.device,
     input_name: str,
     outputs: List[str],
+    use_pre_processed_images_as_input_buffer: bool = False,
+    clone_outputs: bool = True,
 ) -> Tuple[List[torch.Tensor], TRTCudaGraphState]:
     # Each CUDA graph needs its own execution context. Sharing a single context
     # across graphs for different input shapes causes TRT to reallocate internal
     # workspace buffers, invalidating GPU addresses baked into earlier graphs.
     graph_context = engine.create_execution_context()
 
-    input_buffer = torch.empty_like(pre_processed_images, device=device)
-    input_buffer.copy_(pre_processed_images)
+    stream = torch.cuda.Stream(device=device)
+    input_ready = getattr(pre_processed_images, "_trt_ready_event", None)
+    if use_pre_processed_images_as_input_buffer:
+        input_buffer = pre_processed_images
+    else:
+        input_buffer = torch.empty_like(pre_processed_images, device=device)
+        with torch.cuda.stream(stream):
+            if input_ready is not None:
+                stream.wait_event(input_ready)
+            input_buffer.copy_(pre_processed_images)
 
     status = graph_context.set_input_shape(
         input_name, tuple(pre_processed_images.shape)
@@ -788,7 +836,9 @@ def _capture_cuda_graph(
         graph_context.set_tensor_address(output, output_buffer.data_ptr())
         output_buffers.append(output_buffer)
 
-    stream = torch.cuda.Stream(device=device)
+    if use_pre_processed_images_as_input_buffer and input_ready is not None:
+        with torch.cuda.stream(stream):
+            stream.wait_event(input_ready)
     with torch.cuda.stream(stream):
         status = graph_context.execute_async_v3(stream_handle=stream.cuda_stream)
         if not status:
@@ -813,7 +863,12 @@ def _capture_cuda_graph(
     # in order to avoid drift of results - it's better to replay to get the results
     with torch.cuda.stream(stream):
         cuda_graph.replay()
-        results = [buf.clone() for buf in output_buffers]
+        if clone_outputs:
+            results = [buf.clone() for buf in output_buffers]
+        else:
+            results = list(output_buffers)
+        produce_event = torch.cuda.Event()
+        produce_event.record(stream)
     stream.synchronize()
 
     trt_cuda_graph_state = TRTCudaGraphState(
@@ -823,10 +878,25 @@ def _capture_cuda_graph(
         output_buffers=output_buffers,
         execution_context=graph_context,
     )
+    _attach_trt_graph_metadata(
+        results=results,
+        trt_cuda_graph_state=trt_cuda_graph_state,
+        produce_event=produce_event,
+    )
 
     return results, trt_cuda_graph_state
 
 
+def _attach_trt_graph_metadata(
+    results: List[torch.Tensor],
+    trt_cuda_graph_state: TRTCudaGraphState,
+    produce_event: torch.cuda.Event,
+) -> None:
+    for result in results:
+        result._trt_graph_state = trt_cuda_graph_state  # type: ignore[attr-defined]
+        result._trt_produce_event = produce_event  # type: ignore[attr-defined]
+
+
 def _trt_dtype_to_torch(trt_dtype):
     return {
         trt.DataType.FLOAT: torch.float32,
diff --git a/inference_models/inference_models/models/rfdetr/rfdetr_instance_segmentation_trt.py b/inference_models/inference_models/models/rfdetr/rfdetr_instance_segmentation_trt.py
index 4ec56543e2..5bfc5fad5a 100644
--- a/inference_models/inference_models/models/rfdetr/rfdetr_instance_segmentation_trt.py
+++ b/inference_models/inference_models/models/rfdetr/rfdetr_instance_segmentation_trt.py
@@ -10,6 +10,11 @@
     InstanceSegmentationModel,
     PreProcessingOverrides,
 )
+
+# Hoisted to module scope to avoid per-call `from ... import` inside the hot
+# forward_async path. Re-import inside the function added ~13µs/frame in the
+# instrumented run on Jetson Orin. Import here is a no-op on every call.
+from inference_models.models.base.instance_segmentation import _DirectInferenceFuture
 from inference_models.configuration import (
     DEFAULT_DEVICE,
     INFERENCE_MODELS_RFDETR_DEFAULT_CONFIDENCE,
@@ -302,6 +307,92 @@ def forward(
                 )
                 return detections, labels, masks
 
+    def forward_async(
+        self,
+        pre_processed_images: torch.Tensor,
+        pre_processing_meta,
+        **kwargs,
+    ):
+        """Submit CUDA-graph inference without waiting for completion."""
+        if self._trt_cuda_graph_cache is None:
+            return super().forward_async(
+                pre_processed_images, pre_processing_meta, **kwargs
+            )
+
+        preproc_event = getattr(self, "_fast_preproc_event", None)
+        if preproc_event is not None:
+            self._inference_stream.wait_event(preproc_event)
+            self._fast_preproc_event = None
+        with self._lock:
+            with use_cuda_context(context=self._cuda_context):
+                raw = infer_from_trt_engine(
+                    pre_processed_images=pre_processed_images,
+                    trt_config=self._trt_config,
+                    engine=self._engine,
+                    context=self._execution_context,
+                    device=self._device,
+                    input_name=self._input_name,
+                    outputs=self._output_names,
+                    stream=self._inference_stream,
+                    trt_cuda_graph_cache=self._trt_cuda_graph_cache,
+                    synchronize=False,
+                )
+        graph_state = getattr(raw[0], "_trt_graph_state", None)
+        if graph_state is None:
+            self._inference_stream.synchronize()
+            return _DirectInferenceFuture(self, raw, pre_processing_meta, None, kwargs)
+        produce_event = getattr(raw[0], "_trt_produce_event", None)
+        if kwargs.get("reuse_trt_graph_outputs", False):
+            future_kwargs = dict(kwargs)
+            future_kwargs["defer_postprocess_sync"] = True
+            return _DirectInferenceFuture(
+                self, raw, pre_processing_meta, produce_event, future_kwargs
+            )
+
+        stream = graph_state.cuda_stream
+
+        tls = self._thread_local_storage
+        clone_sets = getattr(tls, "clone_sets", None)
+        if clone_sets is None:
+            raw0, raw1, raw2 = raw
+            clone_sets = [
+                (
+                    torch.empty_like(raw0),
+                    torch.empty_like(raw1),
+                    torch.empty_like(raw2),
+                )
+                for _ in range(3)
+            ]
+            tls.clone_sets = clone_sets
+            tls.clone_idx = 0
+        idx = tls.clone_idx
+        clones = clone_sets[idx]
+        tls.clone_idx = (idx + 1) % len(clone_sets)
+
+        prev_stream = torch.cuda.current_stream(self._device)
+        torch.cuda.set_stream(stream)
+        try:
+            raw0, raw1, raw2 = raw
+            clones[0].copy_(raw0, non_blocking=True)
+            clones[1].copy_(raw1, non_blocking=True)
+            clones[2].copy_(raw2, non_blocking=True)
+            produce_event = torch.cuda.Event()
+            produce_event.record(stream)
+            consumer_done = graph_state.consumer_done_event
+            if consumer_done is None:
+                consumer_done = torch.cuda.Event()
+                graph_state.consumer_done_event = consumer_done
+            consumer_done.record(stream)
+        finally:
+            torch.cuda.set_stream(prev_stream)
+
+        clones[0]._trt_produce_event = produce_event  # type: ignore[attr-defined]
+        future_kwargs = dict(kwargs)
+        future_kwargs["defer_postprocess_sync"] = True
+        return _DirectInferenceFuture(
+            self, clones, pre_processing_meta, produce_event, future_kwargs
+        )
+
     def post_process(
         self,
         model_results: Tuple[torch.Tensor, torch.Tensor, torch.Tensor],

From 120d3025443fc3fa0dde4851582c26af0a516178 Mon Sep 17 00:00:00 2001
From: Aseem Saxena <aseem.bits@gmail.com>
Date: Tue, 2 Jun 2026 01:37:51 +0000
Subject: [PATCH 37/76] Restore RF-DETR pipeline submit ordering

---
 .../core/models/inference_models_adapters.py  |  32 ++-
 .../models/base/instance_segmentation.py      |  15 +-
 .../models/test_inference_models_adapters.py  | 226 ++++++++++++++++++
 3 files changed, 258 insertions(+), 15 deletions(-)

diff --git a/inference/core/models/inference_models_adapters.py b/inference/core/models/inference_models_adapters.py
index 9639c87960..8decf10eef 100644
--- a/inference/core/models/inference_models_adapters.py
+++ b/inference/core/models/inference_models_adapters.py
@@ -324,6 +324,9 @@ def __init__(self, model_id: str, api_key: str = None, **kwargs):
         # Per-adapter in-flight futures + metadata. Not thread-safe; the
         # InferencePipeline is single-producer and the adapter is owned by a
         # single worker.
+        self._pending_gpu_submissions: Deque[
+            Tuple[InferenceFuture, PreprocessingMetadata, dict]
+        ] = deque()
         self._pending_futures: Deque[
             Tuple[InferenceFuture, PreprocessingMetadata, dict]
         ] = deque()
@@ -386,10 +389,11 @@ def predict(self, img_in, **kwargs):
         )
         mapped_kwargs["defer_postprocess_sync"] = True
         mapped_kwargs["reuse_trt_graph_outputs"] = True
-        # Pipelined path: submit current frame forward and return its future.
-        # `postprocess()` immediately submits this frame's postprocess GPU
-        # work, then returns the oldest response once the configured frame
-        # delay has been reached.
+        # Pipelined path: before launching frame N's forward, enqueue the
+        # oldest frame whose postprocess metadata is already known. That keeps
+        # postprocess off the current frame's postprocess() host path while
+        # still preserving the correctness dependency for reused TRT outputs.
+        self._submit_next_pending_gpu_work()
         trace_frame_id = nsight_current_frame_id()
         with nsight_range(nsight_frame_label(trace_frame_id, "gpu_forward_submit")):
             fut = self._model.forward_async(img_in, None, **mapped_kwargs)
@@ -410,6 +414,7 @@ def flush(self) -> List[InstanceSegmentationInferenceResponse]:
         """
         if self._pipeline_depth <= 1:
             return []
+        self._submit_all_pending_gpu_work()
         self._submit_all_pending_responses()
         responses: List[InstanceSegmentationInferenceResponse] = []
         while self._response_futures:
@@ -441,6 +446,15 @@ def _submit_future_gpu_work(
             nsight_mark(nsight_frame_label(trace_frame_id, "gpu_postprocess_submitted"))
             fut._adapter_gpu_work_submitted = True  # type: ignore[attr-defined]
 
+    def _submit_next_pending_gpu_work(self) -> None:
+        if not self._pending_gpu_submissions:
+            return None
+        self._submit_future_gpu_work(*self._pending_gpu_submissions.popleft())
+
+    def _submit_all_pending_gpu_work(self) -> None:
+        while self._pending_gpu_submissions:
+            self._submit_future_gpu_work(*self._pending_gpu_submissions.popleft())
+
     def _submit_response_build(
         self,
         fut: InferenceFuture,
@@ -480,10 +494,12 @@ def postprocess(
             )
         fut: InferenceFuture = predictions
         mapped_kwargs = getattr(fut, "_adapter_kwargs", {}).get("mapped_kwargs", {})
-        self._submit_future_gpu_work(
-            fut,
-            preprocess_return_metadata,
-            mapped_kwargs,
+        self._pending_gpu_submissions.append(
+            (
+                fut,
+                preprocess_return_metadata,
+                mapped_kwargs,
+            )
         )
         self._pending_futures.append((fut, preprocess_return_metadata, mapped_kwargs))
         self._submit_ready_responses()
diff --git a/inference_models/inference_models/models/base/instance_segmentation.py b/inference_models/inference_models/models/base/instance_segmentation.py
index d7e62070ea..c5e46b19e0 100644
--- a/inference_models/inference_models/models/base/instance_segmentation.py
+++ b/inference_models/inference_models/models/base/instance_segmentation.py
@@ -91,15 +91,16 @@ def done(self) -> bool:
     def submit_gpu_work(self, meta: Any = None) -> None:
         """Enqueue the ``post_process`` GPU work eagerly.
 
-        Under depth>=2 pipelining ``result()`` is intentionally delayed so
-        the source loop can prepare later frames. Without eager submission,
-        the postproc kernels are also delayed until that future is finalized,
+        Under depth>=2 pipelining ``result()`` is intentionally delayed so the
+        source loop can prepare later frames. Without eager submission, the
+        postproc kernels are also delayed until that future is finalized,
         leaving a bubble between the TensorRT produce event and postproc.
 
-        Calling ``submit_gpu_work`` from the adapter's ``postprocess`` step
-        enqueues the postproc stream wait immediately after the corresponding
-        TensorRT graph has been submitted. The host still does not block, and
-        ``result()`` later reuses the enqueued postproc result.
+        Calling ``submit_gpu_work`` from the adapter before it launches the
+        next frame's forward keeps the reused TRT outputs correct while moving
+        this host-side submission work out of the current frame's postprocess
+        call. The host still does not block here, and ``result()`` later
+        reuses the enqueued postproc result.
 
         Idempotent: calling it once is enough; subsequent calls to
         ``result()`` reuse the enqueued postproc result.
diff --git a/tests/inference/unit_tests/core/models/test_inference_models_adapters.py b/tests/inference/unit_tests/core/models/test_inference_models_adapters.py
index e5f9b209cc..5a1103e883 100644
--- a/tests/inference/unit_tests/core/models/test_inference_models_adapters.py
+++ b/tests/inference/unit_tests/core/models/test_inference_models_adapters.py
@@ -1,10 +1,17 @@
 """Unit tests for inference.core.models.inference_models_adapters."""
 
+from collections import deque
+from concurrent.futures import Future
+from contextlib import nullcontext
+from types import SimpleNamespace
+
 import pytest
 import torch
 
 from inference.core.exceptions import PostProcessingError
+from inference.core.models import inference_models_adapters as adapters_module
 from inference.core.models.inference_models_adapters import (
+    InferenceModelsInstanceSegmentationAdapter,
     prepare_classification_response,
     prepare_multi_label_classification_response,
 )
@@ -14,6 +21,90 @@
 )
 
 
+class _ImmediateExecutor:
+    def submit(self, fn, *args, **kwargs) -> Future:
+        future = Future()
+        try:
+            future.set_result(fn(*args, **kwargs))
+        except BaseException as error:  # pragma: no cover - defensive
+            future.set_exception(error)
+        return future
+
+
+class _FakePipelineFuture:
+    def __init__(self, name: str, ops: list[str]):
+        self.name = name
+        self.ops = ops
+        self.submitted_meta = []
+        self._adapter_gpu_work_submitted = False
+
+    def submit_gpu_work(self, meta=None) -> None:
+        self.ops.append(f"submit:{self.name}")
+        self.submitted_meta.append(meta)
+
+    def result(self):
+        assert self.submitted_meta, f"result() called before submit for {self.name}"
+        self.ops.append(f"result:{self.name}")
+        return [SimpleNamespace(name=self.name)]
+
+    def done(self) -> bool:
+        return bool(self.submitted_meta)
+
+
+class _FakePipelineModel:
+    supported_mask_formats = {"rle"}
+
+    def __init__(self, futures: list[_FakePipelineFuture], ops: list[str]):
+        self._futures = deque(futures)
+        self._ops = ops
+
+    def forward_async(self, _img_in, _meta, **_kwargs):
+        future = self._futures.popleft()
+        self._ops.append(f"forward:{future.name}")
+        return future
+
+
+def _make_meta(tag: str):
+    return [
+        SimpleNamespace(
+            tag=tag,
+            original_size=SimpleNamespace(width=10, height=20),
+        )
+    ]
+
+
+def _make_pipeline_adapter(
+    monkeypatch: pytest.MonkeyPatch,
+    futures: list[_FakePipelineFuture],
+    ops: list[str],
+    pipeline_depth: int = 2,
+) -> InferenceModelsInstanceSegmentationAdapter:
+    adapter = object.__new__(InferenceModelsInstanceSegmentationAdapter)
+    adapter._pipeline_depth = pipeline_depth
+    adapter._response_delay = max(1, pipeline_depth - 1)
+    adapter._pending_gpu_submissions = deque()
+    adapter._pending_futures = deque()
+    adapter._response_futures = deque()
+    adapter._response_executor = None
+    adapter._model = _FakePipelineModel(futures=futures, ops=ops)
+    adapter.class_names = []
+    adapter.map_inference_kwargs = lambda kwargs: dict(kwargs)
+    adapter._get_response_executor = lambda: _ImmediateExecutor()
+    adapter._build_responses_from_detections = (
+        lambda _detections, preprocess_return_metadata, **_kwargs: [
+            preprocess_return_metadata[0].tag
+        ]
+    )
+
+    monkeypatch.setattr(adapters_module, "nsight_current_frame_id", lambda: 0)
+    monkeypatch.setattr(adapters_module, "nsight_frame_label", lambda *_args: "trace")
+    monkeypatch.setattr(adapters_module, "nsight_mark", lambda *_args, **_kwargs: None)
+    monkeypatch.setattr(
+        adapters_module, "nsight_range", lambda *_args, **_kwargs: nullcontext()
+    )
+    return adapter
+
+
 def test_prepare_multi_label_response_uses_class_ids_for_predicted_classes() -> None:
     """The model's `post_process` is the source of truth for which classes
     are "predicted" (it owns the priority chain user → per-class → global
@@ -86,3 +177,138 @@ def test_prepare_classification_response_fails_on_class_count_mismatch() -> None
             class_names=["cat", "dog"],
             confidence_threshold=0.0,
         )
+
+
+def test_pipeline_submits_previous_future_before_next_forward(
+    monkeypatch: pytest.MonkeyPatch,
+) -> None:
+    ops: list[str] = []
+    future_1 = _FakePipelineFuture(name="f1", ops=ops)
+    future_2 = _FakePipelineFuture(name="f2", ops=ops)
+    adapter = _make_pipeline_adapter(
+        monkeypatch=monkeypatch,
+        futures=[future_1, future_2],
+        ops=ops,
+        pipeline_depth=2,
+    )
+
+    meta_1 = _make_meta("meta-1")
+    prediction_1 = adapter.predict("frame-1", response_mask_format="dense")
+    priming = adapter.postprocess(
+        prediction_1,
+        meta_1,
+        response_mask_format="dense",
+    )
+
+    assert len(priming) == 1
+    assert future_1.submitted_meta == []
+
+    adapter.predict("frame-2", response_mask_format="dense")
+
+    assert future_1.submitted_meta == [meta_1]
+    assert ops == ["forward:f1", "submit:f1", "forward:f2"]
+
+
+def test_pipeline_returns_previous_frame_response_using_previous_metadata(
+    monkeypatch: pytest.MonkeyPatch,
+) -> None:
+    ops: list[str] = []
+    future_1 = _FakePipelineFuture(name="f1", ops=ops)
+    future_2 = _FakePipelineFuture(name="f2", ops=ops)
+    adapter = _make_pipeline_adapter(
+        monkeypatch=monkeypatch,
+        futures=[future_1, future_2],
+        ops=ops,
+        pipeline_depth=2,
+    )
+
+    meta_1 = _make_meta("meta-1")
+    meta_2 = _make_meta("meta-2")
+    prediction_1 = adapter.predict("frame-1", response_mask_format="dense")
+    adapter.postprocess(prediction_1, meta_1, response_mask_format="dense")
+
+    prediction_2 = adapter.predict("frame-2", response_mask_format="dense")
+    responses = adapter.postprocess(
+        prediction_2,
+        meta_2,
+        response_mask_format="dense",
+    )
+
+    assert responses == ["meta-1"]
+    assert future_1.submitted_meta == [meta_1]
+    assert future_2.submitted_meta == []
+    assert ops == [
+        "forward:f1",
+        "submit:f1",
+        "forward:f2",
+        "result:f1",
+    ]
+
+
+def test_pipeline_flush_submits_remaining_gpu_work_before_finalizing(
+    monkeypatch: pytest.MonkeyPatch,
+) -> None:
+    ops: list[str] = []
+    future_1 = _FakePipelineFuture(name="f1", ops=ops)
+    adapter = _make_pipeline_adapter(
+        monkeypatch=monkeypatch,
+        futures=[future_1],
+        ops=ops,
+        pipeline_depth=2,
+    )
+
+    meta_1 = _make_meta("meta-1")
+    prediction_1 = adapter.predict("frame-1", response_mask_format="dense")
+    adapter.postprocess(prediction_1, meta_1, response_mask_format="dense")
+
+    responses = adapter.flush()
+
+    assert responses == ["meta-1"]
+    assert future_1.submitted_meta == [meta_1]
+    assert ops == ["forward:f1", "submit:f1", "result:f1"]
+
+
+def test_pipeline_depth_three_submits_oldest_pending_before_forward(
+    monkeypatch: pytest.MonkeyPatch,
+) -> None:
+    ops: list[str] = []
+    future_1 = _FakePipelineFuture(name="f1", ops=ops)
+    future_2 = _FakePipelineFuture(name="f2", ops=ops)
+    future_3 = _FakePipelineFuture(name="f3", ops=ops)
+    adapter = _make_pipeline_adapter(
+        monkeypatch=monkeypatch,
+        futures=[future_1, future_2, future_3],
+        ops=ops,
+        pipeline_depth=3,
+    )
+
+    meta_1 = _make_meta("meta-1")
+    meta_2 = _make_meta("meta-2")
+    meta_3 = _make_meta("meta-3")
+
+    prediction_1 = adapter.predict("frame-1", response_mask_format="dense")
+    adapter.postprocess(prediction_1, meta_1, response_mask_format="dense")
+
+    prediction_2 = adapter.predict("frame-2", response_mask_format="dense")
+    priming = adapter.postprocess(prediction_2, meta_2, response_mask_format="dense")
+
+    prediction_3 = adapter.predict("frame-3", response_mask_format="dense")
+    responses = adapter.postprocess(
+        prediction_3,
+        meta_3,
+        response_mask_format="dense",
+    )
+
+    assert len(priming) == 1
+    assert responses == ["meta-1"]
+    assert future_1.submitted_meta == [meta_1]
+    assert future_2.submitted_meta == [meta_2]
+    assert future_3.submitted_meta == []
+    assert ops == [
+        "forward:f1",
+        "submit:f1",
+        "forward:f2",
+        "submit:f2",
+        "forward:f3",
+        "result:f1",
+    ]

From 2d5a9a5ceae7b6653ed64a8956f1f2b924f92ccb Mon Sep 17 00:00:00 2001
From: Aseem Saxena <aseem.bits@gmail.com>
Date: Tue, 2 Jun 2026 01:39:16 +0000
Subject: [PATCH 38/76] Add bitpacked RF-DETR polygon helper

---
 inference/core/utils/postprocess.py | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/inference/core/utils/postprocess.py b/inference/core/utils/postprocess.py
index 0ceafb8e75..c32ede44cd 100644
--- a/inference/core/utils/postprocess.py
+++ b/inference/core/utils/postprocess.py
@@ -61,6 +61,23 @@ def masks2poly(masks: np.ndarray) -> List[np.ndarray]:
     return segments
 
 
+def bitpacked_masks2poly(bitpacked_masks: np.ndarray, width: int) -> List[np.ndarray]:
+    """Convert bit-packed masks with 8 pixels per byte into polygons."""
+    segments = []
+    for packed_mask in bitpacked_masks:
+        packed = (
+            packed_mask
+            if packed_mask.flags.c_contiguous
+            else np.ascontiguousarray(packed_mask)
+        )
+        unpacked = np.unpackbits(packed, axis=-1, bitorder="little")[..., :width]
+        if not np.any(unpacked):
+            segments.append(np.zeros((0, 2), dtype=np.float32))
+            continue
+        segments.append(mask2poly(unpacked))
+    return segments
+
+
 def masks2multipoly(masks: np.ndarray) -> List[np.ndarray]:
     """Converts binary masks to polygonal segments.
 

From a86a2240e66dd46c2a0dce4f6b89ed1c06336ea0 Mon Sep 17 00:00:00 2001
From: Aseem Saxena <aseem.bits@gmail.com>
Date: Tue, 2 Jun 2026 19:56:02 +0000
Subject: [PATCH 39/76] Prepare RF-DETR pipeline integration for review

---
 .../rfdetr_rle_to_poly_microbenchmark.py      |  65 +++--
 inference/core/interfaces/stream/entities.py  |  10 +-
 .../stream/model_handlers/roboflow_models.py  |  17 ++
 .../model_handlers/workflows_context.py       |  18 ++
 .../core/models/inference_models_adapters.py  |  84 ++++--
 inference/core/utils/rle_to_polygon.py        |  72 +++++-
 .../core/workflows/core_steps/common/utils.py | 240 +++++++++++++++---
 .../roboflow/instance_segmentation/v3.py      |  77 +++---
 .../models/test_inference_models_adapters.py  |   6 +-
 9 files changed, 481 insertions(+), 108 deletions(-)
 create mode 100644 inference/core/interfaces/stream/model_handlers/workflows_context.py

diff --git a/development/stream_interface/rfdetr_rle_to_poly_microbenchmark.py b/development/stream_interface/rfdetr_rle_to_poly_microbenchmark.py
index 2554cce763..5e23c4fab4 100644
--- a/development/stream_interface/rfdetr_rle_to_poly_microbenchmark.py
+++ b/development/stream_interface/rfdetr_rle_to_poly_microbenchmark.py
@@ -1,9 +1,9 @@
 """Capture/replay benchmark for RF-DETR RLE-to-polygon conversion.
 
 This targets the CPU path in
-``inference.core.models.inference_models_adapters.rle_masks2poly``:
+``inference.core.utils.rle_to_polygon.rle_masks_to_polygons``:
 
-    COCO RLE counts -> dense mask -> cv2.findContours -> polygon arrays
+    COCO RLE counts -> sparse crop -> cv2.findContours -> polygon arrays
 
 Default usage captures 100 invocations from the 1080p workflow and immediately
 replays them with exact output checks:
@@ -40,7 +40,7 @@
 _WORKFLOW_PATH = (
     _REPO_ROOT / "development" / "stream_interface" / "rfdetr_nano_seg_trt_workflow.py"
 )
-_TARGET_FUNCTION = "rle_masks2poly"
+_TARGET_FUNCTION = "rle_masks_to_polygons"
 _SCHEMA_VERSION = 1
 
 
@@ -63,11 +63,17 @@ def _load_workflow_module() -> Any:
 
 
 def _snapshot_masks(masks: Any) -> dict:
-    return {
+    snapshot = {
         "image_size": tuple(masks.image_size),
         "masks": list(masks.masks),
         "mask_count": len(masks.masks),
     }
+    counts = getattr(masks, "_rle_counts_cpu", None)
+    lengths = getattr(masks, "_rle_lengths_cpu", None)
+    if counts is not None and lengths is not None:
+        snapshot["rle_counts_cpu"] = np.array(counts, copy=True)
+        snapshot["rle_lengths_cpu"] = np.array(lengths, copy=True)
+    return snapshot
 
 
 def _snapshot_output(output: List[np.ndarray]) -> List[np.ndarray]:
@@ -118,8 +124,9 @@ def maybe_save(self, masks: Any, output: List[np.ndarray]) -> None:
 def _install_capture_hook(state: _CaptureState) -> None:
     _ensure_local_import_paths()
     from inference.core.models import inference_models_adapters as adapters
+    from inference.core.utils import rle_to_polygon
 
-    original = getattr(adapters, _TARGET_FUNCTION)
+    original = getattr(rle_to_polygon, _TARGET_FUNCTION)
 
     @functools.wraps(original)
     def wrapper(masks: Any) -> List[np.ndarray]:
@@ -127,6 +134,8 @@ def wrapper(masks: Any) -> List[np.ndarray]:
         state.maybe_save(masks=masks, output=result)
         return result
 
+    setattr(rle_to_polygon, _TARGET_FUNCTION, wrapper)
+    # The adapter imports the function directly at module load time.
     setattr(adapters, _TARGET_FUNCTION, wrapper)
 
 
@@ -211,7 +220,7 @@ def sink(predictions: Any, video_frames: Any) -> None:
         cases_dir=cases_dir,
         payload={
             "schema_version": _SCHEMA_VERSION,
-            "function": "inference.core.models.inference_models_adapters.rle_masks2poly",
+            "function": "inference.core.utils.rle_to_polygon.rle_masks_to_polygons",
             "case_count": state.count,
             "total_masks": state.total_masks,
             "video_reference": args.video_reference,
@@ -242,15 +251,19 @@ def _load_case(path: Path) -> dict:
     return payload
 
 
-def _materialize_masks(case: dict) -> Any:
+def _materialize_masks(case: dict, use_lazy_counts: bool) -> Any:
     _ensure_local_import_paths()
     from inference_models.models.base.types import InstancesRLEMasks
 
     payload = case["inputs"]["masks"]
-    return InstancesRLEMasks(
+    masks = InstancesRLEMasks(
         image_size=tuple(payload["image_size"]),
         masks=list(payload["masks"]),
     )
+    if use_lazy_counts and "rle_counts_cpu" in payload and "rle_lengths_cpu" in payload:
+        masks._rle_counts_cpu = np.array(payload["rle_counts_cpu"], copy=True)
+        masks._rle_lengths_cpu = np.array(payload["rle_lengths_cpu"], copy=True)
+    return masks
 
 
 def _assert_outputs_equal(
@@ -298,15 +311,17 @@ def _nvtx_range(enabled: bool, message: str):
         yield
 
 
-def _run_one_replay_case(*, case_path: Path, nvtx: bool) -> float:
-    from inference.core.models.inference_models_adapters import rle_masks2poly
+def _run_one_replay_case(
+    *, case_path: Path, nvtx: bool, use_lazy_counts: bool
+) -> float:
+    from inference.core.utils.rle_to_polygon import rle_masks_to_polygons
 
     case = _load_case(case_path)
-    masks = _materialize_masks(case=case)
+    masks = _materialize_masks(case=case, use_lazy_counts=use_lazy_counts)
     label = f"rfdetr.rle_to_poly.case={case['case_index']}" f".masks={len(masks.masks)}"
     start = perf_counter()
     with _nvtx_range(nvtx, label):
-        actual = rle_masks2poly(masks)
+        actual = rle_masks_to_polygons(masks)
     elapsed = perf_counter() - start
     _assert_outputs_equal(
         actual=actual,
@@ -365,17 +380,28 @@ def _run_replay(args: argparse.Namespace) -> dict:
 
     print(
         f"[replay] cases={len(case_paths)} repeats={args.repeats} "
-        f"warmup_repeats={args.warmup_repeats} nvtx={args.nvtx}",
+        f"warmup_repeats={args.warmup_repeats} nvtx={args.nvtx} "
+        f"use_lazy_counts={args.use_lazy_counts}",
         flush=True,
     )
     for _ in range(args.warmup_repeats):
         for case_path in case_paths:
-            _run_one_replay_case(case_path=case_path, nvtx=args.nvtx)
+            _run_one_replay_case(
+                case_path=case_path,
+                nvtx=args.nvtx,
+                use_lazy_counts=args.use_lazy_counts,
+            )
 
     timings = []
     for repeat_index in range(args.repeats):
         for case_path in case_paths:
-            timings.append(_run_one_replay_case(case_path=case_path, nvtx=args.nvtx))
+            timings.append(
+                _run_one_replay_case(
+                    case_path=case_path,
+                    nvtx=args.nvtx,
+                    use_lazy_counts=args.use_lazy_counts,
+                )
+            )
         print(
             f"[replay] completed repeat {repeat_index + 1}/{args.repeats}",
             flush=True,
@@ -418,6 +444,15 @@ def _parse_args() -> argparse.Namespace:
         action="store_true",
         help="Add NVTX ranges around each replayed rle_masks2poly call.",
     )
+    parser.add_argument(
+        "--use-lazy-counts",
+        action=argparse.BooleanOptionalAction,
+        default=True,
+        help=(
+            "When captured cases include uncompressed RLE counts, restore them "
+            "onto the replay masks."
+        ),
+    )
     args = parser.parse_args()
     if args.capture_count <= 0:
         raise ValueError("--capture-count must be positive")
diff --git a/inference/core/interfaces/stream/entities.py b/inference/core/interfaces/stream/entities.py
index 5ebf82dae9..0daf1db844 100644
--- a/inference/core/interfaces/stream/entities.py
+++ b/inference/core/interfaces/stream/entities.py
@@ -121,7 +121,15 @@ class PipelineStateReport:
     sources_metadata: List[SourceMetadata]
 
 
-InferenceHandler = Callable[[List[VideoFrame]], List[AnyPrediction]]
+@dataclass(frozen=True)
+class InferenceHandlerResult:
+    predictions: List[AnyPrediction]
+    video_frames: Optional[List[VideoFrame]] = None
+
+
+InferenceHandler = Callable[
+    [List[VideoFrame]], Optional[Union[List[AnyPrediction], InferenceHandlerResult]]
+]
 SinkHandler = Optional[
     Union[
         Callable[[AnyPrediction, VideoFrame], None],
diff --git a/inference/core/interfaces/stream/model_handlers/roboflow_models.py b/inference/core/interfaces/stream/model_handlers/roboflow_models.py
index cb2d995a9e..c60badea4e 100644
--- a/inference/core/interfaces/stream/model_handlers/roboflow_models.py
+++ b/inference/core/interfaces/stream/model_handlers/roboflow_models.py
@@ -33,3 +33,20 @@ def default_process_frame(
         )
         for p in predictions
     ]
+
+
+class RoboflowModelHandler:
+    def __init__(
+        self,
+        model: OnnxRoboflowInferenceModel,
+        inference_config: ModelConfig,
+    ):
+        self._model = model
+        self._inference_config = inference_config
+
+    def __call__(self, video_frame: List[VideoFrame]) -> List[dict]:
+        return default_process_frame(
+            video_frame=video_frame,
+            model=self._model,
+            inference_config=self._inference_config,
+        )
diff --git a/inference/core/interfaces/stream/model_handlers/workflows_context.py b/inference/core/interfaces/stream/model_handlers/workflows_context.py
new file mode 100644
index 0000000000..6c2c99e849
--- /dev/null
+++ b/inference/core/interfaces/stream/model_handlers/workflows_context.py
@@ -0,0 +1,18 @@
+from contextlib import contextmanager
+import threading
+
+_WORKFLOW_STREAM_CONTEXT = threading.local()
+
+
+def is_workflow_stream_flush_active() -> bool:
+    return bool(getattr(_WORKFLOW_STREAM_CONTEXT, "flush_active", False))
+
+
+@contextmanager
+def workflow_stream_flush_context():
+    previous = is_workflow_stream_flush_active()
+    _WORKFLOW_STREAM_CONTEXT.flush_active = True
+    try:
+        yield
+    finally:
+        _WORKFLOW_STREAM_CONTEXT.flush_active = previous
diff --git a/inference/core/models/inference_models_adapters.py b/inference/core/models/inference_models_adapters.py
index 8decf10eef..999878b7df 100644
--- a/inference/core/models/inference_models_adapters.py
+++ b/inference/core/models/inference_models_adapters.py
@@ -330,6 +330,7 @@ def __init__(self, model_id: str, api_key: str = None, **kwargs):
         self._pending_futures: Deque[
             Tuple[InferenceFuture, PreprocessingMetadata, dict]
         ] = deque()
+        self._gpu_submit_generation = 0
         self._response_executor: Optional[ThreadPoolExecutor] = None
         self._response_futures: Deque[
             Future[List[InstanceSegmentationInferenceResponse]]
@@ -360,19 +361,20 @@ def map_inference_kwargs(self, kwargs: dict) -> dict:
     def preprocess(self, image: Any, **kwargs):
         is_batch = isinstance(image, list)
         images = image if is_batch else [image]
-        np_images: List[np.ndarray] = [
-            load_image_bgr(
-                v,
-                disable_preproc_auto_orient=kwargs.get(
-                    "disable_preproc_auto_orient", False
-                ),
-            )
-            for v in images
-        ]
-        mapped_kwargs = self.map_inference_kwargs(kwargs)
         trace_frame_id = nsight_current_frame_id()
+        with nsight_range(nsight_frame_label(trace_frame_id, "cpu_preprocess.load")):
+            np_images: List[np.ndarray] = [
+                load_image_bgr(
+                    v,
+                    disable_preproc_auto_orient=kwargs.get(
+                        "disable_preproc_auto_orient", False
+                    ),
+                )
+                for v in images
+            ]
+        mapped_kwargs = self.map_inference_kwargs(kwargs)
         nsight_mark(nsight_frame_label(trace_frame_id, "gpu_start"))
-        with nsight_range(nsight_frame_label(trace_frame_id, "gpu_preprocess_submit")):
+        with nsight_range(nsight_frame_label(trace_frame_id, "gpu_preprocess.submit")):
             preprocessed = self._model.pre_process(np_images, **mapped_kwargs)
         nsight_mark(nsight_frame_label(trace_frame_id, "gpu_preprocess_submitted"))
         return preprocessed
@@ -395,13 +397,19 @@ def predict(self, img_in, **kwargs):
         # still preserving the correctness dependency for reused TRT outputs.
         self._submit_next_pending_gpu_work()
         trace_frame_id = nsight_current_frame_id()
-        with nsight_range(nsight_frame_label(trace_frame_id, "gpu_forward_submit")):
-            fut = self._model.forward_async(img_in, None, **mapped_kwargs)
+        pre_processing_meta = getattr(img_in, "_pre_processing_meta", None)
+        with nsight_range(nsight_frame_label(trace_frame_id, "gpu_forward.submit")):
+            fut = self._model.forward_async(
+                img_in, pre_processing_meta, **mapped_kwargs
+            )
         nsight_mark(nsight_frame_label(trace_frame_id, "gpu_forward_submitted"))
         fut._trace_frame_id = trace_frame_id  # type: ignore[attr-defined]
         fut._adapter_kwargs = {  # type: ignore[attr-defined]
             "mapped_kwargs": mapped_kwargs
         }
+        if pre_processing_meta is not None:
+            self._submit_future_gpu_work(fut, pre_processing_meta, mapped_kwargs)
+        self._submit_ready_responses()
         return fut
 
     def flush(self) -> List[InstanceSegmentationInferenceResponse]:
@@ -440,10 +448,14 @@ def _submit_future_gpu_work(
         if callable(submit_gpu_work):
             trace_frame_id = getattr(fut, "_trace_frame_id", nsight_current_frame_id())
             with nsight_range(
-                nsight_frame_label(trace_frame_id, "gpu_postprocess_submit")
+                nsight_frame_label(trace_frame_id, "gpu_postprocess.submit")
             ):
                 submit_gpu_work(meta)
             nsight_mark(nsight_frame_label(trace_frame_id, "gpu_postprocess_submitted"))
+            self._gpu_submit_generation = getattr(self, "_gpu_submit_generation", 0) + 1
+            fut._adapter_gpu_submit_generation = (  # type: ignore[attr-defined]
+                self._gpu_submit_generation
+            )
             fut._adapter_gpu_work_submitted = True  # type: ignore[attr-defined]
 
     def _submit_next_pending_gpu_work(self) -> None:
@@ -465,6 +477,7 @@ def _submit_response_build(
         fut._kwargs = mapped_kwargs  # type: ignore[attr-defined]
         trace_frame_id = getattr(fut, "_trace_frame_id", nsight_current_frame_id())
         fut._trace_frame_id = trace_frame_id  # type: ignore[attr-defined]
+        nsight_mark(nsight_frame_label(trace_frame_id, "cpu_response_released"))
         response_future = self._get_response_executor().submit(
             self._finalize_future,
             fut,
@@ -475,7 +488,17 @@ def _submit_response_build(
         self._response_futures.append(response_future)
 
     def _submit_ready_responses(self) -> None:
-        while len(self._pending_futures) > self._response_delay:
+        while self._pending_futures:
+            fut, meta, mapped_kwargs = self._pending_futures[0]
+            submit_generation = getattr(fut, "_adapter_gpu_submit_generation", None)
+            if submit_generation is None:
+                self._submit_future_gpu_work(fut, meta, mapped_kwargs)
+                submit_generation = getattr(fut, "_adapter_gpu_submit_generation", None)
+            if submit_generation is None:
+                break
+            gpu_submit_generation = getattr(self, "_gpu_submit_generation", 0)
+            if gpu_submit_generation < submit_generation + self._response_delay:
+                break
             self._submit_response_build(*self._pending_futures.popleft())
 
     def _submit_all_pending_responses(self) -> None:
@@ -502,7 +525,9 @@ def postprocess(
             )
         )
         self._pending_futures.append((fut, preprocess_return_metadata, mapped_kwargs))
-        self._submit_ready_responses()
+        if len(self._pending_futures) > self._response_delay:
+            self._submit_next_pending_gpu_work()
+            self._submit_ready_responses()
 
         if not self._response_futures:
             return self._empty_responses_for_metadata(
@@ -562,15 +587,22 @@ def _finalize_future(
         fut._kwargs = mapped_kwargs  # type: ignore[attr-defined]
         trace_frame_id = getattr(fut, "_trace_frame_id", None)
         nsight_mark(nsight_frame_label(trace_frame_id, "cpu_response_start"))
-        detections_list = fut.result()
-        for det in detections_list:
-            try:
-                det._trace_frame_id = trace_frame_id  # type: ignore[attr-defined]
-            except AttributeError:
-                pass
-        responses = self._build_responses_from_detections(
-            detections_list, preprocess_return_metadata, **mapped_kwargs
-        )
+        with nsight_range(nsight_frame_label(trace_frame_id, "cpu_postprocess.total")):
+            with nsight_range(
+                nsight_frame_label(trace_frame_id, "cpu_postprocess.await_gpu_result")
+            ):
+                detections_list = fut.result()
+            for det in detections_list:
+                try:
+                    det._trace_frame_id = trace_frame_id  # type: ignore[attr-defined]
+                except AttributeError:
+                    pass
+            with nsight_range(
+                nsight_frame_label(trace_frame_id, "cpu_postprocess.build_response")
+            ):
+                responses = self._build_responses_from_detections(
+                    detections_list, preprocess_return_metadata, **mapped_kwargs
+                )
         nsight_mark(nsight_frame_label(trace_frame_id, "cpu_response_complete"))
         return responses
 
@@ -609,7 +641,7 @@ def _build_responses_from_detections(
             finalize_pending = getattr(det, "_finalize_pending_postproc", None)
             if callable(finalize_pending):
                 with nsight_range(
-                    nsight_frame_label(trace_frame_id, "gpu_finish_wait")
+                    nsight_frame_label(trace_frame_id, "cpu_postprocess.accept_gpu_rle")
                 ):
                     det = finalize_pending()
                 try:
diff --git a/inference/core/utils/rle_to_polygon.py b/inference/core/utils/rle_to_polygon.py
index f15c4a2d90..e271e0d821 100644
--- a/inference/core/utils/rle_to_polygon.py
+++ b/inference/core/utils/rle_to_polygon.py
@@ -54,14 +54,84 @@ def polygon_from_uncompressed_counts(
     height: int,
     width: int,
 ) -> np.ndarray:
+    counts_array = _counts_to_array(counts=counts)
+    polygon = _polygon_from_column_aligned_counts(
+        counts=counts_array,
+        height=height,
+        width=width,
+    )
+    if polygon is not None:
+        return polygon
     columns = _counts_to_column_intervals(
-        counts=counts,
+        counts=counts_array,
         height=height,
         width=width,
     )
     return _polygon_from_column_intervals(columns=columns)
 
 
+def _counts_to_array(counts: Iterable[int]) -> np.ndarray:
+    if isinstance(counts, np.ndarray):
+        return counts.astype(np.int64, copy=False)
+    return np.fromiter((int(count) for count in counts), dtype=np.int64)
+
+
+def _polygon_from_column_aligned_counts(
+    counts: np.ndarray,
+    height: int,
+    width: int,
+) -> Optional[np.ndarray]:
+    if counts.size == 0:
+        return _EMPTY_POLYGON.copy()
+    if np.any(counts < 0):
+        raise ValueError("COCO RLE counts must be non-negative")
+
+    total_size = height * width
+    ends = np.cumsum(counts, dtype=np.int64)
+    if ends[-1] > total_size:
+        raise ValueError("COCO RLE counts exceed the mask size")
+
+    foreground_lengths = counts[1::2]
+    if foreground_lengths.size == 0:
+        return _EMPTY_POLYGON.copy()
+    foreground_ends = ends[1::2]
+    foreground_starts = foreground_ends - foreground_lengths
+    non_empty = foreground_lengths > 0
+    if not np.any(non_empty):
+        return _EMPTY_POLYGON.copy()
+    foreground_starts = foreground_starts[non_empty]
+    foreground_ends = foreground_ends[non_empty]
+
+    columns = foreground_starts // height
+    y_starts = foreground_starts - columns * height
+    last_pixels = foreground_ends - 1
+    end_columns = last_pixels // height
+    if np.any(end_columns != columns):
+        return None
+    y_ends = foreground_ends - columns * height
+
+    x_min = int(columns.min())
+    x_max = int(columns.max())
+    y_min = int(y_starts.min())
+    y_max = int(y_ends.max())
+    crop = np.zeros((y_max - y_min, x_max - x_min + 1), dtype=np.uint8)
+    for x, y0, y1 in zip(columns, y_starts, y_ends):
+        crop[int(y0) - y_min : int(y1) - y_min, int(x) - x_min] = 1
+
+    contours = cv2.findContours(
+        crop,
+        cv2.RETR_EXTERNAL,
+        cv2.CHAIN_APPROX_SIMPLE,
+        offset=(x_min, y_min),
+    )[0]
+    if not contours:
+        return _EMPTY_POLYGON.copy()
+
+    contour_lengths = np.fromiter((len(c) for c in contours), dtype=np.intp)
+    selected_contour = contours[int(contour_lengths.argmax())]
+    return np.asarray(selected_contour, dtype=np.float32).reshape(-1, 2)
+
+
 def _get_lazy_uncompressed_counts(
     masks: object,
 ) -> Optional[Tuple[np.ndarray, np.ndarray]]:
diff --git a/inference/core/workflows/core_steps/common/utils.py b/inference/core/workflows/core_steps/common/utils.py
index b8f0bb763b..4229c1608e 100644
--- a/inference/core/workflows/core_steps/common/utils.py
+++ b/inference/core/workflows/core_steps/common/utils.py
@@ -2,11 +2,24 @@
 import uuid
 from concurrent.futures import ThreadPoolExecutor
 from copy import deepcopy
-from typing import Any, Callable, Dict, Iterable, List, Optional, Set, TypeVar, Union
+from typing import (
+    Any,
+    Callable,
+    Dict,
+    Iterable,
+    List,
+    Optional,
+    Set,
+    Tuple,
+    TypeVar,
+    Union,
+)
 
+import cv2
 import numpy as np
 import supervision as sv
 from supervision.config import CLASS_NAME_DATA_FIELD
+from supervision.detection.compact_mask import CompactMask
 
 from inference.core.entities.requests.clip import ClipCompareRequest
 from inference.core.entities.requests.doctr import DoctrOCRInferenceRequest
@@ -51,6 +64,7 @@
     WorkflowImageData,
 )
 from inference.core.workflows.prototypes.block import BlockResult
+from inference.core.utils.nsight import nsight_range
 
 T = TypeVar("T")
 
@@ -94,6 +108,150 @@ def filter_out_invalid_polygons(predictions: List[dict]) -> List[dict]:
     ]
 
 
+def _get_or_create_detection_id(prediction: dict) -> object:
+    if DETECTION_ID_KEY in prediction:
+        return prediction[DETECTION_ID_KEY]
+    return str(uuid.uuid4())
+
+
+def _mask_crop_to_compact_rle_counts(mask: np.ndarray) -> np.ndarray:
+    flat = np.asarray(mask, dtype=np.bool_).ravel(order="F")
+    if len(flat) == 0:
+        return np.array([0], dtype=np.int32)
+    changes = np.diff(flat.view(np.uint8))
+    boundaries = np.where(changes != 0)[0] + 1
+    positions = np.concatenate(([0], boundaries, [len(flat)]))
+    run_lengths = np.diff(positions).astype(np.int32)
+    if flat[0]:
+        run_lengths = np.concatenate(([np.int32(0)], run_lengths))
+    return run_lengths
+
+
+def _polygon_prediction_to_compact_mask_crop(
+    polygon: np.ndarray,
+    image_width: int,
+    image_height: int,
+) -> Tuple[np.ndarray, Tuple[int, int], Tuple[int, int]]:
+    x_min = int(np.min(polygon[:, 0]))
+    x_max = int(np.max(polygon[:, 0]))
+    y_min = int(np.min(polygon[:, 1]))
+    y_max = int(np.max(polygon[:, 1]))
+
+    if x_max < 0 or y_max < 0 or x_min >= image_width or y_min >= image_height:
+        mask = np.zeros((1, 1), dtype=bool)
+        return (
+            _mask_crop_to_compact_rle_counts(mask),
+            (1, 1),
+            (
+                min(max(x_min, 0), image_width - 1),
+                min(max(y_min, 0), image_height - 1),
+            ),
+        )
+
+    x1 = max(0, x_min)
+    y1 = max(0, y_min)
+    x2 = min(image_width - 1, x_max)
+    y2 = min(image_height - 1, y_max)
+    crop = np.zeros((y2 - y1 + 1, x2 - x1 + 1), dtype=np.uint8)
+    shifted_polygon = polygon - np.array([x1, y1], dtype=np.int32)
+    cv2.fillPoly(crop, [shifted_polygon], color=(1,))
+    return (
+        _mask_crop_to_compact_rle_counts(crop),
+        (crop.shape[0], crop.shape[1]),
+        (x1, y1),
+    )
+
+
+def _try_convert_polygon_predictions_to_sv_detections(
+    prediction: Dict[str, Union[List[Dict[str, Any]], Any]],
+    predictions_key: str,
+    image_key: str,
+) -> Optional[Tuple[sv.Detections, List[Dict[str, Any]]]]:
+    raw_predictions = prediction[predictions_key]
+    required_prediction_keys = {
+        X_KEY,
+        Y_KEY,
+        WIDTH_KEY,
+        HEIGHT_KEY,
+        "confidence",
+        "class_id",
+        "class",
+        "points",
+    }
+    if any(
+        not required_prediction_keys.issubset(p)
+        or p.get("rle") is not None
+        or p.get(RLE_MASK_KEY_IN_INFERENCE_RESPONSE) is not None
+        for p in raw_predictions
+    ):
+        return None
+
+    has_tracker = ["tracker_id" in p for p in raw_predictions]
+    if any(has_tracker) and not all(has_tracker):
+        return None
+
+    image_width = int(prediction[image_key][WIDTH_KEY])
+    image_height = int(prediction[image_key][HEIGHT_KEY])
+    valid_predictions = filter_out_invalid_polygons(predictions=raw_predictions)
+    if not valid_predictions:
+        detections = sv.Detections.empty()
+        detections.data = {CLASS_NAME_DATA_FIELD: np.empty(0, dtype=str)}
+        return detections, valid_predictions
+
+    count = len(valid_predictions)
+    xyxy = np.empty((count, 4), dtype=np.float64)
+    confidence = np.empty(count, dtype=np.float64)
+    class_id = np.empty(count, dtype=np.int64)
+    class_name = []
+    tracker_id = np.empty(count, dtype=np.int64) if all(has_tracker) else None
+    rles = []
+    crop_shapes = np.empty((count, 2), dtype=np.int32)
+    offsets = np.empty((count, 2), dtype=np.int32)
+
+    for idx, item in enumerate(valid_predictions):
+        x = float(item[X_KEY])
+        y = float(item[Y_KEY])
+        width = float(item[WIDTH_KEY])
+        height = float(item[HEIGHT_KEY])
+        x_min = x - width / 2
+        y_min = y - height / 2
+        xyxy[idx] = [x_min, y_min, x_min + width, y_min + height]
+        confidence[idx] = float(item["confidence"])
+        class_id[idx] = int(item["class_id"])
+        class_name.append(item["class"])
+        if tracker_id is not None:
+            tracker_id[idx] = int(item["tracker_id"])
+
+        polygon = np.array(
+            [[point[X_KEY], point[Y_KEY]] for point in item["points"]],
+            dtype=np.int32,
+        )
+        rle, crop_shape, offset = _polygon_prediction_to_compact_mask_crop(
+            polygon=polygon,
+            image_width=image_width,
+            image_height=image_height,
+        )
+        rles.append(rle)
+        crop_shapes[idx] = crop_shape
+        offsets[idx] = offset
+
+    masks = CompactMask(
+        rles=rles,
+        crop_shapes=crop_shapes,
+        offsets=offsets,
+        image_shape=(image_height, image_width),
+    )
+    detections = sv.Detections(
+        xyxy=xyxy,
+        confidence=confidence,
+        class_id=class_id,
+        mask=masks,
+        tracker_id=tracker_id,
+        data={CLASS_NAME_DATA_FIELD: np.array(class_name)},
+    )
+    return detections, valid_predictions
+
+
 def attach_prediction_type_info_to_sv_detections_batch(
     predictions: List[sv.Detections],
     prediction_type: str,
@@ -109,34 +267,54 @@ def convert_inference_detections_batch_to_sv_detections(
     predictions_key: str = "predictions",
     image_key: str = "image",
 ) -> List[sv.Detections]:
-    batch_of_detections: List[sv.Detections] = []
-    for p in predictions:
-        width, height = p[image_key][WIDTH_KEY], p[image_key][HEIGHT_KEY]
-        detections = sv.Detections.from_inference(p)
-        raw_predictions = p[predictions_key]
-        if len(detections) != len(raw_predictions):
-            raw_predictions = filter_out_invalid_polygons(predictions=raw_predictions)
-        parent_ids = [d.get(PARENT_ID_KEY, "") for d in raw_predictions]
-        detection_ids = [
-            d.get(DETECTION_ID_KEY, str(uuid.uuid4())) for d in raw_predictions
-        ]
-        detections[DETECTION_ID_KEY] = np.array(detection_ids)
-        detections[PARENT_ID_KEY] = np.array(parent_ids)
-        detections[IMAGE_DIMENSIONS_KEY] = np.array([[height, width]] * len(detections))
-        if INFERENCE_ID_KEY in p:
-            detections[INFERENCE_ID_KEY] = np.array(
-                [p[INFERENCE_ID_KEY]] * len(detections)
-            )
-        rle_masks = [
-            d.get(RLE_MASK_KEY_IN_INFERENCE_RESPONSE) or d.get("rle")
-            for d in raw_predictions
-        ]
-        if any(m is not None for m in rle_masks):
-            detections.data[RLE_MASK_KEY_IN_SV_DETECTIONS] = np.array(
-                rle_masks, dtype=object
-            )
-        batch_of_detections.append(detections)
-    return batch_of_detections
+    with nsight_range("workflow.to_sv.convert_inference_batch"):
+        batch_of_detections: List[sv.Detections] = []
+        for p in predictions:
+            width, height = p[image_key][WIDTH_KEY], p[image_key][HEIGHT_KEY]
+            with nsight_range("workflow.to_sv.convert.from_inference"):
+                with nsight_range("workflow.to_sv.convert.fast_polygon"):
+                    fast_result = _try_convert_polygon_predictions_to_sv_detections(
+                        prediction=p,
+                        predictions_key=predictions_key,
+                        image_key=image_key,
+                    )
+                if fast_result is None:
+                    detections = sv.Detections.from_inference(p)
+                    raw_predictions = p[predictions_key]
+                    if len(detections) != len(raw_predictions):
+                        with nsight_range(
+                            "workflow.to_sv.convert.filter_invalid_polygons"
+                        ):
+                            raw_predictions = filter_out_invalid_polygons(
+                                predictions=raw_predictions
+                            )
+                else:
+                    detections, raw_predictions = fast_result
+            with nsight_range("workflow.to_sv.convert.metadata_arrays"):
+                parent_ids = [d.get(PARENT_ID_KEY, "") for d in raw_predictions]
+                detection_ids = [
+                    _get_or_create_detection_id(d) for d in raw_predictions
+                ]
+                detections[DETECTION_ID_KEY] = np.array(detection_ids)
+                detections[PARENT_ID_KEY] = np.array(parent_ids)
+                detections[IMAGE_DIMENSIONS_KEY] = np.array(
+                    [[height, width]] * len(detections)
+                )
+                if INFERENCE_ID_KEY in p:
+                    detections[INFERENCE_ID_KEY] = np.array(
+                        [p[INFERENCE_ID_KEY]] * len(detections)
+                    )
+            with nsight_range("workflow.to_sv.convert.rle_masks"):
+                rle_masks = [
+                    d.get(RLE_MASK_KEY_IN_INFERENCE_RESPONSE) or d.get("rle")
+                    for d in raw_predictions
+                ]
+                if any(m is not None for m in rle_masks):
+                    detections.data[RLE_MASK_KEY_IN_SV_DETECTIONS] = np.array(
+                        rle_masks, dtype=object
+                    )
+            batch_of_detections.append(detections)
+        return batch_of_detections
 
 
 def add_inference_keypoints_to_sv_detections(
@@ -455,9 +633,7 @@ def post_process_ocr_result(
         prediction["predictions"] = sv.Detections.from_inference(prediction)
         if len(prediction["predictions"]) != len(raw_predictions):
             raw_predictions = filter_out_invalid_polygons(predictions=raw_predictions)
-        detection_ids = [
-            p.get("detection_id", str(uuid.uuid4())) for p in raw_predictions
-        ]
+        detection_ids = [_get_or_create_detection_id(p) for p in raw_predictions]
         prediction["predictions"]["detection_id"] = detection_ids
         prediction[PREDICTION_TYPE_KEY] = "ocr"
         prediction[PARENT_ID_KEY] = image.parent_metadata.parent_id
diff --git a/inference/core/workflows/core_steps/models/roboflow/instance_segmentation/v3.py b/inference/core/workflows/core_steps/models/roboflow/instance_segmentation/v3.py
index f662d81629..a9f56d34f8 100644
--- a/inference/core/workflows/core_steps/models/roboflow/instance_segmentation/v3.py
+++ b/inference/core/workflows/core_steps/models/roboflow/instance_segmentation/v3.py
@@ -453,26 +453,35 @@ def _finalize_async_prediction_value(
     ) -> BlockResult:
         trace_frame_id = getattr(predictions_future, "_trace_frame_id", None)
         nsight_mark(nsight_frame_label(trace_frame_id, "workflow_finalize_start"))
-        predictions = predictions_future.result()
-        if not isinstance(predictions, list):
-            predictions = [predictions]
-        predictions = [
-            (
-                _is_response_dc_to_dict(e)
-                if isinstance(e, InstanceSegmentationInferenceResponseDC)
-                else e.model_dump(by_alias=True, exclude_none=True)
-            )
-            for e in predictions
-        ]
         with nsight_range(
-            nsight_frame_label(trace_frame_id, "workflow_prediction_convert")
+            nsight_frame_label(trace_frame_id, "workflow_finalize.total")
         ):
-            result = self._post_process_result(
-                images=images,
-                predictions=predictions,
-                class_filter=class_filter,
-                model_id=model_id,
-            )
+            with nsight_range(
+                nsight_frame_label(trace_frame_id, "workflow_finalize.await_response")
+            ):
+                predictions = predictions_future.result()
+            if not isinstance(predictions, list):
+                predictions = [predictions]
+            with nsight_range(
+                nsight_frame_label(trace_frame_id, "workflow_finalize.response_to_dict")
+            ):
+                predictions = [
+                    (
+                        _is_response_dc_to_dict(e)
+                        if isinstance(e, InstanceSegmentationInferenceResponseDC)
+                        else e.model_dump(by_alias=True, exclude_none=True)
+                    )
+                    for e in predictions
+                ]
+            with nsight_range(
+                nsight_frame_label(trace_frame_id, "workflow_finalize.to_sv")
+            ):
+                result = self._post_process_result(
+                    images=images,
+                    predictions=predictions,
+                    class_filter=class_filter,
+                    model_id=model_id,
+                )
         nsight_mark(nsight_frame_label(trace_frame_id, "workflow_finalize_complete"))
         return result
 
@@ -570,19 +579,25 @@ def _post_process_result(
         model_id: str,
     ) -> BlockResult:
         inference_ids = [p.get(INFERENCE_ID_KEY, None) for p in predictions]
-        predictions = convert_inference_detections_batch_to_sv_detections(predictions)
-        predictions = attach_prediction_type_info_to_sv_detections_batch(
-            predictions=predictions,
-            prediction_type="instance-segmentation",
-        )
-        predictions = filter_out_unwanted_classes_from_sv_detections_batch(
-            predictions=predictions,
-            classes_to_accept=class_filter,
-        )
-        predictions = attach_parents_coordinates_to_batch_of_sv_detections(
-            images=images,
-            predictions=predictions,
-        )
+        with nsight_range("workflow.to_sv.convert_inference"):
+            predictions = convert_inference_detections_batch_to_sv_detections(
+                predictions
+            )
+        with nsight_range("workflow.to_sv.attach_prediction_type"):
+            predictions = attach_prediction_type_info_to_sv_detections_batch(
+                predictions=predictions,
+                prediction_type="instance-segmentation",
+            )
+        with nsight_range("workflow.to_sv.class_filter"):
+            predictions = filter_out_unwanted_classes_from_sv_detections_batch(
+                predictions=predictions,
+                classes_to_accept=class_filter,
+            )
+        with nsight_range("workflow.to_sv.attach_parents"):
+            predictions = attach_parents_coordinates_to_batch_of_sv_detections(
+                images=images,
+                predictions=predictions,
+            )
         return [
             {
                 "inference_id": inference_id,
diff --git a/tests/inference/unit_tests/core/models/test_inference_models_adapters.py b/tests/inference/unit_tests/core/models/test_inference_models_adapters.py
index 5a1103e883..860de92396 100644
--- a/tests/inference/unit_tests/core/models/test_inference_models_adapters.py
+++ b/tests/inference/unit_tests/core/models/test_inference_models_adapters.py
@@ -236,11 +236,12 @@ def test_pipeline_returns_previous_frame_response_using_previous_metadata(
 
     assert responses == ["meta-1"]
     assert future_1.submitted_meta == [meta_1]
-    assert future_2.submitted_meta == []
+    assert future_2.submitted_meta == [meta_2]
     assert ops == [
         "forward:f1",
         "submit:f1",
         "forward:f2",
+        "submit:f2",
         "result:f1",
     ]
 
@@ -303,12 +304,13 @@ def test_pipeline_depth_three_submits_oldest_pending_before_forward(
     assert responses == ["meta-1"]
     assert future_1.submitted_meta == [meta_1]
     assert future_2.submitted_meta == [meta_2]
-    assert future_3.submitted_meta == []
+    assert future_3.submitted_meta == [meta_3]
     assert ops == [
         "forward:f1",
         "submit:f1",
         "forward:f2",
         "submit:f2",
         "forward:f3",
+        "submit:f3",
         "result:f1",
     ]

From 5ccefce7c6b2e7c9f0b39f32f7a086c5eaec0c66 Mon Sep 17 00:00:00 2001
From: Aseem Saxena <aseem.bits@gmail.com>
Date: Tue, 2 Jun 2026 21:03:40 +0000
Subject: [PATCH 40/76] Keep RF-DETR postprocess deferral in pipeline stack

---
 .../inference_models/models/rfdetr/common.py  |   4 +
 .../rfdetr_instance_segmentation_trt.py       |  37 ++-
 .../models/rfdetr/triton_postprocess.py       | 274 +++++++++++++++++-
 3 files changed, 308 insertions(+), 7 deletions(-)

diff --git a/inference_models/inference_models/models/rfdetr/common.py b/inference_models/inference_models/models/rfdetr/common.py
index d9735465f0..c298efe63a 100644
--- a/inference_models/inference_models/models/rfdetr/common.py
+++ b/inference_models/inference_models/models/rfdetr/common.py
@@ -232,6 +232,7 @@ def _post_process_single_instance_segmentation_result_to_rle_masks_with_triton(
     threshold: Union[float, torch.Tensor],
     num_classes: int,
     classes_re_mapping: Optional[ClassesReMapping],
+    defer_postprocess_sync: bool = False,
 ) -> InstanceDetections:
     triton_result = (
         post_process_single_instance_segmentation_result_to_rle_masks_triton(
@@ -241,6 +242,7 @@ def _post_process_single_instance_segmentation_result_to_rle_masks_with_triton(
             image_meta=image_meta,
             threshold=threshold,
             classes_re_mapping=classes_re_mapping,
+            defer_postprocess_sync=defer_postprocess_sync,
         )
     )
     if triton_result is not None:
@@ -364,6 +366,7 @@ def post_process_instance_segmentation_results_to_rle_masks(
     threshold: Union[float, torch.Tensor],
     num_classes: int,
     classes_re_mapping: Optional[ClassesReMapping],
+    defer_postprocess_sync: bool = False,
 ) -> List[InstanceDetections]:
     logits_sigmoid = torch.nn.functional.sigmoid(logits)
     device = bboxes.device
@@ -383,6 +386,7 @@ def post_process_instance_segmentation_results_to_rle_masks(
             threshold=threshold,
             num_classes=num_classes,
             classes_re_mapping=classes_re_mapping,
+            defer_postprocess_sync=defer_postprocess_sync,
         )
         for image_bboxes, image_logits, image_masks, image_meta in zip(
             bboxes,
diff --git a/inference_models/inference_models/models/rfdetr/rfdetr_instance_segmentation_trt.py b/inference_models/inference_models/models/rfdetr/rfdetr_instance_segmentation_trt.py
index 5bfc5fad5a..5641d73e59 100644
--- a/inference_models/inference_models/models/rfdetr/rfdetr_instance_segmentation_trt.py
+++ b/inference_models/inference_models/models/rfdetr/rfdetr_instance_segmentation_trt.py
@@ -227,6 +227,8 @@ def __init__(
         self._trt_cuda_graph_cache = trt_cuda_graph_cache
         self._lock = threading.Lock()
         self._inference_stream = torch.cuda.Stream(device=self._device)
+        self._pre_process_cuda_stream = torch.cuda.Stream(device=self._device)
+        self._post_process_cuda_stream = torch.cuda.Stream(device=self._device)
         self._thread_local_storage = threading.local()
         self.recommended_parameters = recommended_parameters
         self._fast_preprocess_enabled = INFERENCE_MODELS_RFDETR_TRITON_PREPROC_ENABLED
@@ -416,7 +418,11 @@ def post_process(
             recommended_parameters=self.recommended_parameters,
             default_confidence=INFERENCE_MODELS_RFDETR_DEFAULT_CONFIDENCE,
         )
+        produce_event = getattr(model_results[0], "_trt_produce_event", None)
+        graph_state = getattr(model_results[0], "_trt_graph_state", None)
         with torch.cuda.stream(self._post_process_stream):
+            if produce_event is not None:
+                self._post_process_stream.wait_event(produce_event)
             for result_element in model_results:
                 result_element.record_stream(self._post_process_stream)
             bboxes, logits, masks = model_results
@@ -439,8 +445,31 @@ def post_process(
                     threshold=confidence_filter.get_threshold(self.class_names),
                     num_classes=len(self.class_names),
                     classes_re_mapping=self._classes_re_mapping,
+                    defer_postprocess_sync=kwargs.get("defer_postprocess_sync", False),
                 )
-        self._post_process_stream.synchronize()
+            if graph_state is not None:
+                output_consumed_events = [
+                    getattr(result, "_trt_outputs_consumed_event", None)
+                    for result in results
+                ]
+                if output_consumed_events and all(
+                    event is not None for event in output_consumed_events
+                ):
+                    graph_state.consumer_done_event = output_consumed_events[-1]
+                else:
+                    consumer_done = graph_state.consumer_done_event
+                    if consumer_done is None:
+                        consumer_done = torch.cuda.Event()
+                        graph_state.consumer_done_event = consumer_done
+                    consumer_done.record(self._post_process_stream)
+        should_sync = True
+        if kwargs.get("defer_postprocess_sync", False):
+            should_sync = not all(
+                getattr(result, "_postproc_done_event", None) is not None
+                for result in results
+            )
+        if should_sync:
+            self._post_process_stream.synchronize()
         return results
 
     @property
@@ -453,8 +482,4 @@ def _pre_process_stream(self) -> torch.cuda.Stream:
 
     @property
     def _post_process_stream(self) -> torch.cuda.Stream:
-        if not hasattr(self._thread_local_storage, "post_process_stream"):
-            self._thread_local_storage.post_process_stream = torch.cuda.Stream(
-                device=self._device
-            )
-        return self._thread_local_storage.post_process_stream
+        return self._post_process_cuda_stream
diff --git a/inference_models/inference_models/models/rfdetr/triton_postprocess.py b/inference_models/inference_models/models/rfdetr/triton_postprocess.py
index 163f316bf6..f29d97f085 100644
--- a/inference_models/inference_models/models/rfdetr/triton_postprocess.py
+++ b/inference_models/inference_models/models/rfdetr/triton_postprocess.py
@@ -21,7 +21,7 @@
 """
 
 import warnings
-from collections import OrderedDict
+from collections import OrderedDict, defaultdict
 from threading import Lock
 from typing import List, Optional, Tuple, Union
 
@@ -61,6 +61,8 @@
 _MAX_INTERPOLATION_WEIGHT_CACHE_ENTRIES = 16
 _INTERPOLATION_WEIGHT_CACHE = OrderedDict()
 _INTERPOLATION_WEIGHT_CACHE_LOCK = Lock()
+_PINNED_HOST_POOL = defaultdict(list)
+_PINNED_HOST_POOL_LOCK = Lock()
 
 
 def _get_interpolation_weights(
@@ -156,6 +158,25 @@ def _interpolation_cache_key(
     )
 
 
+def _acquire_pinned_host_buffer(source: torch.Tensor) -> torch.Tensor:
+    """Return a pinned CPU tensor matching ``source`` for async DtoH copies."""
+    key = (tuple(source.shape), source.dtype)
+    with _PINNED_HOST_POOL_LOCK:
+        buffers = _PINNED_HOST_POOL[key]
+        if buffers:
+            return buffers.pop()
+    # Pinned memory is required for ``non_blocking=True`` GPU-to-CPU copies to
+    # overlap with later GPU work; allocating it per frame is expensive.
+    return torch.empty(key[0], dtype=key[1], pin_memory=True)
+
+
+def _release_pinned_host_buffer(buffer: torch.Tensor) -> None:
+    """Return a pinned host buffer to the small shape/dtype reuse pool."""
+    key = (tuple(buffer.shape), buffer.dtype)
+    with _PINNED_HOST_POOL_LOCK:
+        _PINNED_HOST_POOL[key].append(buffer)
+
+
 def post_process_single_instance_segmentation_result_to_rle_masks_triton(
     image_bboxes: torch.Tensor,
     image_scores: torch.Tensor,
@@ -163,6 +184,7 @@ def post_process_single_instance_segmentation_result_to_rle_masks_triton(
     image_meta: PreProcessingMetadata,
     threshold: Union[float, torch.Tensor],
     classes_re_mapping: Optional[ClassesReMapping],
+    defer_postprocess_sync: bool = False,
 ) -> Optional[InstanceDetections]:
     """Run the sparse Triton RF-DETR RLE postprocess path for one image.
 
@@ -174,6 +196,11 @@ def post_process_single_instance_segmentation_result_to_rle_masks_triton(
     than one class above threshold, the first pass asks for a retry and the
     second pass emits up to ``_SPARSE_MAX_CLASSES_PER_QUERY`` query-class
     candidates per query.
+
+    When ``defer_postprocess_sync`` is set, the function enqueues the metadata,
+    sparse RLE, and DtoH copies, then returns a placeholder whose finalizer does
+    CPU assembly later. That is used by the streaming pipeline to keep the next
+    frame's GPU work moving while Python handles previous detections.
     """
     unsupported_reason = _unsupported_triton_postprocess_reason(
         image_bboxes=image_bboxes,
@@ -216,6 +243,94 @@ def post_process_single_instance_segmentation_result_to_rle_masks_triton(
         axis="width",
     )
 
+    if defer_postprocess_sync:
+        # Deferred pipeline mode separates class metadata from query mask RLE:
+        # every class candidate can reuse the one sparse mask generated for its
+        # source query, so GPU work remains bounded by the number of queries.
+        topk_metadata_rows = num_queries * _SPARSE_MAX_CLASSES_PER_QUERY
+        query_metadata = torch.empty(
+            (num_queries, _HEADER_SIZE),
+            dtype=torch.float32,
+            device=image_scores.device,
+        )
+        class_metadata = torch.empty(
+            (topk_metadata_rows, _HEADER_SIZE),
+            dtype=torch.float32,
+            device=image_scores.device,
+        )
+        records = torch.empty(
+            (_SPARSE_MAX_TOTAL_RUNS + 1, 3),
+            dtype=torch.int32,
+            device=image_scores.device,
+        )
+        _select_topk_query_class_metadata_kernel[(num_queries,)](
+            image_scores,
+            image_bboxes,
+            class_mapping,
+            class_metadata,
+            query_metadata,
+            records,
+            confidence_threshold,
+            num_queries,
+            num_classes,
+            class_mapping.shape[0],
+            output_height,
+            output_width,
+            BLOCK_CLASSES=triton.next_power_of_2(num_classes),
+            METADATA_STRIDE=_HEADER_SIZE,
+            MAX_CLASSES_PER_QUERY=_SPARSE_MAX_CLASSES_PER_QUERY,
+            FLAG_WRITE_QUERY_METADATA=True,
+            FLAG_OVERFLOW_CLASSES=False,
+        )
+        _sparse_atomic_rle_from_metadata_kernel[
+            (num_queries, triton.cdiv(_SPARSE_MAX_ROI_WIDTH, _SPARSE_BLOCK_COLS))
+        ](
+            image_masks,
+            y_idx,
+            y_weight,
+            x_idx,
+            x_weight,
+            query_metadata,
+            records,
+            num_queries,
+            mask_height,
+            mask_width,
+            output_height,
+            output_width,
+            image_masks.stride(0),
+            image_masks.stride(1),
+            image_masks.stride(2),
+            BLOCK_MASK=triton.next_power_of_2(mask_height * mask_width),
+            BLOCK_OUT_H=triton.next_power_of_2(output_height),
+            BLOCK_OUT_W=triton.next_power_of_2(output_width),
+            BLOCK_ROI_H=_BLOCK_ROI_H,
+            MAX_ROI_WIDTH=_SPARSE_MAX_ROI_WIDTH,
+            MAX_TOTAL_RUNS=_SPARSE_MAX_TOTAL_RUNS,
+            METADATA_STRIDE=_HEADER_SIZE,
+            BLOCK_COLS=_SPARSE_BLOCK_COLS,
+        )
+        outputs_consumed_event = torch.cuda.Event()
+        outputs_consumed_event.record(torch.cuda.current_stream(image_scores.device))
+        class_metadata_host = _acquire_pinned_host_buffer(class_metadata)
+        records_host = _acquire_pinned_host_buffer(records)
+        # Pinned buffers let the DtoH copies follow the postprocess kernel on
+        # the CUDA stream while Python starts preparing later frames.
+        class_metadata_host.copy_(class_metadata, non_blocking=True)
+        records_host.copy_(records, non_blocking=True)
+        done_event = torch.cuda.Event()
+        done_event.record(torch.cuda.current_stream(image_scores.device))
+        return _deferred_instance_detections_from_sparse_query_records(
+            class_metadata_host=class_metadata_host,
+            records_host=records_host,
+            keepalive_tensors=(query_metadata, class_metadata, records),
+            done_event=done_event,
+            outputs_consumed_event=outputs_consumed_event,
+            max_total_runs=_SPARSE_MAX_TOTAL_RUNS,
+            height=output_height,
+            width=output_width,
+            max_detections=num_queries,
+        )
+
     # First pass: keep the common case small by selecting only the best class
     # for each query and emitting sparse RLE runs for those query masks.
     metadata = torch.empty(
@@ -451,6 +566,163 @@ def _instance_detections_from_sparse_records(
     )
 
 
+def _instance_detections_from_sparse_query_records(
+    class_metadata_host: np.ndarray,
+    records_host: np.ndarray,
+    max_total_runs: int,
+    height: int,
+    width: int,
+    max_detections: Optional[int] = None,
+) -> Optional[InstanceDetections]:
+    """Assemble detections when class rows share query-level RLE records.
+
+    Deferred pipeline mode emits up to four class candidates per query, but the
+    mask is identical for those class rows. The GPU therefore writes RLE records
+    once per query and this CPU helper fans that query mask out to the selected
+    class detections.
+    """
+    active_ranks = np.flatnonzero(class_metadata_host[:, 0] > 0.5)
+    if active_ranks.size == 0:
+        return InstanceDetections(
+            xyxy=torch.empty((0, 4), dtype=torch.int32),
+            confidence=torch.empty((0,), dtype=torch.float32),
+            class_id=torch.empty((0,), dtype=torch.int32),
+            mask=InstancesRLEMasks.from_coco_rle_masks(
+                image_size=(height, width),
+                masks=[],
+            ),
+        )
+    if np.any(class_metadata_host[active_ranks, 8] > 0.5):
+        return None
+    total_runs = int(records_host[0, 0])
+    if int(records_host[0, 1]) != 0 or total_runs < 0 or total_runs > max_total_runs:
+        return None
+
+    # Sort class candidates by the same score/key order as the eager path. The
+    # RLE records remain keyed by source query and are looked up below.
+    order = np.lexsort(
+        (
+            -class_metadata_host[active_ranks, 10],
+            -class_metadata_host[active_ranks, 2],
+        )
+    )
+    active_ranks = active_ranks[order]
+    if max_detections is not None:
+        active_ranks = active_ranks[:max_detections]
+    if total_runs:
+        records_host = records_host[1 : total_runs + 1]
+        # Group all records once by query and start position. This replaces the
+        # previous per-detection full-record scan while preserving stable order
+        # for duplicate starts within a query.
+        record_order = np.argsort(records_host[:, 1], kind="stable")
+        records_host = records_host[record_order]
+        record_order = np.argsort(records_host[:, 0], kind="stable")
+        records_host = records_host[record_order]
+        record_queries = records_host[:, 0]
+    else:
+        records_host = None
+        record_queries = None
+    boxes = (
+        torch.from_numpy(class_metadata_host[active_ranks, 3:7].copy()).round().int()
+    )
+    confidence = torch.from_numpy(class_metadata_host[active_ranks, 2].copy())
+    class_id = torch.from_numpy(class_metadata_host[active_ranks, 1].copy()).int()
+
+    rle_masks = []
+    for rank in active_ranks.tolist():
+        query_index = int(class_metadata_host[rank, 9])
+        if records_host is None:
+            rank_records = np.empty((0, 3), dtype=np.int32)
+        else:
+            # ``records_host`` is grouped by query once above, so each detection
+            # pays two binary searches instead of scanning every sparse run.
+            start_index = np.searchsorted(record_queries, query_index, side="left")
+            end_index = np.searchsorted(record_queries, query_index, side="right")
+            rank_records = records_host[start_index:end_index]
+        if rank_records.size:
+            starts_array = rank_records[:, 1].astype(np.int64, copy=False)
+            ends_array = rank_records[:, 2].astype(np.int64, copy=False)
+        else:
+            starts_array = np.empty((0,), dtype=np.int64)
+            ends_array = np.empty((0,), dtype=np.int64)
+        counts = _counts_from_runs(
+            starts=starts_array,
+            ends=ends_array,
+            height=height,
+            width=width,
+        )
+        rle_masks.append(_rle_from_counts(counts=counts, height=height, width=width))
+
+    instances_masks = InstancesRLEMasks.from_coco_rle_masks(
+        image_size=(height, width),
+        masks=rle_masks,
+    )
+    return InstanceDetections(
+        xyxy=boxes,
+        confidence=confidence,
+        class_id=class_id,
+        mask=instances_masks,
+    )
+
+
+def _deferred_instance_detections_from_sparse_query_records(
+    class_metadata_host: torch.Tensor,
+    records_host: torch.Tensor,
+    keepalive_tensors: tuple,
+    done_event: torch.cuda.Event,
+    outputs_consumed_event: torch.cuda.Event,
+    max_total_runs: int,
+    height: int,
+    width: int,
+    max_detections: Optional[int],
+) -> InstanceDetections:
+    """Return a placeholder detection object with deferred CPU finalization.
+
+    The CUDA stream already owns the postprocess kernels and async DtoH copies.
+    Returning this placeholder lets the streaming scheduler submit more GPU work
+    before synchronizing on ``done_event`` and converting sparse records to
+    ``InstanceDetections``.
+    """
+
+    def finalize() -> InstanceDetections:
+        """Synchronize the DtoH copies and build the real detections."""
+        try:
+            done_event.synchronize()
+            # Keep device tensors alive until the recorded copies complete; CUDA
+            # does not retain Python references for us.
+            _ = keepalive_tensors
+            result = _instance_detections_from_sparse_query_records(
+                class_metadata_host=class_metadata_host.numpy(),
+                records_host=records_host.numpy(),
+                max_total_runs=max_total_runs,
+                height=height,
+                width=width,
+                max_detections=max_detections,
+            )
+            if result is None:
+                raise RuntimeError("Deferred RF-DETR Triton RLE postprocess failed")
+            return result
+        finally:
+            _release_pinned_host_buffer(class_metadata_host)
+            _release_pinned_host_buffer(records_host)
+
+    detections = InstanceDetections(
+        xyxy=torch.empty((0, 4), dtype=torch.int32),
+        confidence=torch.empty((0,), dtype=torch.float32),
+        class_id=torch.empty((0,), dtype=torch.int32),
+        mask=InstancesRLEMasks.from_coco_rle_masks(
+            image_size=(height, width),
+            masks=[],
+        ),
+    )
+    # The stream adapter checks these private attributes to order reuse/finalize
+    # operations without forcing an immediate CUDA sync at this call site.
+    detections._postproc_done_event = done_event  # type: ignore[attr-defined]
+    detections._trt_outputs_consumed_event = outputs_consumed_event  # type: ignore[attr-defined]
+    detections._finalize_pending_postproc = finalize  # type: ignore[attr-defined]
+    return detections
+
+
 def _should_retry_sparse_topk_metadata(
     metadata_host: np.ndarray,
     records: torch.Tensor,

From 504e1c685dfffe011ad451d5be927d6d05d00509 Mon Sep 17 00:00:00 2001
From: Aseem Saxena <aseem.bits@gmail.com>
Date: Tue, 2 Jun 2026 21:58:17 +0000
Subject: [PATCH 41/76] Keep RF-DETR uncompressed RLE counts in pipeline stack

---
 .../models/rfdetr/triton_postprocess.py       | 21 +++++++++++++++++++
 1 file changed, 21 insertions(+)

diff --git a/inference_models/inference_models/models/rfdetr/triton_postprocess.py b/inference_models/inference_models/models/rfdetr/triton_postprocess.py
index f29d97f085..d51fde9229 100644
--- a/inference_models/inference_models/models/rfdetr/triton_postprocess.py
+++ b/inference_models/inference_models/models/rfdetr/triton_postprocess.py
@@ -530,6 +530,7 @@ def _instance_detections_from_sparse_records(
     class_id = torch.from_numpy(metadata_host[active_ranks, 1].copy()).int()
 
     rle_masks = []
+    rle_counts = []
     for rank in active_ranks.tolist():
         if records_host is None:
             rank_records = np.empty((0, 3), dtype=np.int32)
@@ -552,12 +553,14 @@ def _instance_detections_from_sparse_records(
             height=height,
             width=width,
         )
+        rle_counts.append(counts)
         rle_masks.append(_rle_from_counts(counts=counts, height=height, width=width))
 
     instances_masks = InstancesRLEMasks.from_coco_rle_masks(
         image_size=(height, width),
         masks=rle_masks,
     )
+    _attach_uncompressed_counts(instances_masks, rle_counts)
     return InstanceDetections(
         xyxy=boxes,
         confidence=confidence,
@@ -629,6 +632,7 @@ class detections.
     class_id = torch.from_numpy(class_metadata_host[active_ranks, 1].copy()).int()
 
     rle_masks = []
+    rle_counts = []
     for rank in active_ranks.tolist():
         query_index = int(class_metadata_host[rank, 9])
         if records_host is None:
@@ -651,12 +655,14 @@ class detections.
             height=height,
             width=width,
         )
+        rle_counts.append(counts)
         rle_masks.append(_rle_from_counts(counts=counts, height=height, width=width))
 
     instances_masks = InstancesRLEMasks.from_coco_rle_masks(
         image_size=(height, width),
         masks=rle_masks,
     )
+    _attach_uncompressed_counts(instances_masks, rle_counts)
     return InstanceDetections(
         xyxy=boxes,
         confidence=confidence,
@@ -867,6 +873,21 @@ def _rle_from_counts(counts: List[int], height: int, width: int) -> dict:
     )
 
 
+def _attach_uncompressed_counts(
+    masks: InstancesRLEMasks,
+    rle_counts: List[List[int]],
+) -> None:
+    max_length = max((len(counts) for counts in rle_counts), default=0)
+    counts_array = np.zeros((len(rle_counts), max_length), dtype=np.int64)
+    lengths_array = np.empty((len(rle_counts),), dtype=np.int32)
+    for index, counts in enumerate(rle_counts):
+        counts_length = len(counts)
+        lengths_array[index] = counts_length
+        counts_array[index, :counts_length] = counts
+    masks._rle_counts_cpu = counts_array  # type: ignore[attr-defined]
+    masks._rle_lengths_cpu = lengths_array  # type: ignore[attr-defined]
+
+
 if triton is not None:
 
     @triton.jit

From b0a340c13ebafc0528e304249df9f0d3e11578b9 Mon Sep 17 00:00:00 2001
From: Aseem Saxena <aseem.bits@gmail.com>
Date: Tue, 2 Jun 2026 23:06:40 +0000
Subject: [PATCH 42/76] Route RF-DETR deferred postprocess only through Triton

---
 .../inference_models/models/rfdetr/common.py  | 27 ++++++++++++++-----
 1 file changed, 20 insertions(+), 7 deletions(-)

diff --git a/inference_models/inference_models/models/rfdetr/common.py b/inference_models/inference_models/models/rfdetr/common.py
index c298efe63a..664a0136a5 100644
--- a/inference_models/inference_models/models/rfdetr/common.py
+++ b/inference_models/inference_models/models/rfdetr/common.py
@@ -372,13 +372,27 @@ def post_process_instance_segmentation_results_to_rle_masks(
     device = bboxes.device
     if isinstance(threshold, torch.Tensor):
         threshold = threshold.to(device=device, dtype=logits_sigmoid.dtype)
-    post_process_single = (
-        _post_process_single_instance_segmentation_result_to_rle_masks_with_triton
-        if _TRITON_POSTPROC_ENABLED
-        else _post_process_single_instance_segmentation_result_to_rle_masks
-    )
+    if _TRITON_POSTPROC_ENABLED:
+        return [
+            _post_process_single_instance_segmentation_result_to_rle_masks_with_triton(
+                image_bboxes=image_bboxes,
+                image_logits=image_logits,
+                image_masks=image_masks,
+                image_meta=image_meta,
+                threshold=threshold,
+                num_classes=num_classes,
+                classes_re_mapping=classes_re_mapping,
+                defer_postprocess_sync=defer_postprocess_sync,
+            )
+            for image_bboxes, image_logits, image_masks, image_meta in zip(
+                bboxes,
+                logits_sigmoid,
+                masks,
+                pre_processing_meta,
+            )
+        ]
     return [
-        post_process_single(
+        _post_process_single_instance_segmentation_result_to_rle_masks(
             image_bboxes=image_bboxes,
             image_logits=image_logits,
             image_masks=image_masks,
@@ -386,7 +400,6 @@ def post_process_instance_segmentation_results_to_rle_masks(
             threshold=threshold,
             num_classes=num_classes,
             classes_re_mapping=classes_re_mapping,
-            defer_postprocess_sync=defer_postprocess_sync,
         )
         for image_bboxes, image_logits, image_masks, image_meta in zip(
             bboxes,

From 5d8f659b5da18423414a1d93bf1961f2f86b2977 Mon Sep 17 00:00:00 2001
From: Aseem Saxena <aseem.bits@gmail.com>
Date: Tue, 2 Jun 2026 23:32:38 +0000
Subject: [PATCH 43/76] Document RF-DETR deferred postprocess helpers

---
 .../models/rfdetr/triton_postprocess.py             | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/inference_models/inference_models/models/rfdetr/triton_postprocess.py b/inference_models/inference_models/models/rfdetr/triton_postprocess.py
index d51fde9229..799377e879 100644
--- a/inference_models/inference_models/models/rfdetr/triton_postprocess.py
+++ b/inference_models/inference_models/models/rfdetr/triton_postprocess.py
@@ -560,6 +560,8 @@ def _instance_detections_from_sparse_records(
         image_size=(height, width),
         masks=rle_masks,
     )
+    # The pipeline RLE-to-polygon path consumes uncompressed counts directly, so
+    # keep them beside the pycocotools-compressed masks instead of decoding later.
     _attach_uncompressed_counts(instances_masks, rle_counts)
     return InstanceDetections(
         xyxy=boxes,
@@ -662,6 +664,8 @@ class detections.
         image_size=(height, width),
         masks=rle_masks,
     )
+    # The pipeline RLE-to-polygon path consumes uncompressed counts directly, so
+    # keep them beside the pycocotools-compressed masks instead of decoding later.
     _attach_uncompressed_counts(instances_masks, rle_counts)
     return InstanceDetections(
         xyxy=boxes,
@@ -877,6 +881,13 @@ def _attach_uncompressed_counts(
     masks: InstancesRLEMasks,
     rle_counts: List[List[int]],
 ) -> None:
+    """Attach padded uncompressed COCO counts for downstream polygon conversion.
+
+    ``InstancesRLEMasks`` stores compressed pycocotools RLEs for normal API
+    compatibility. The pipeline branch also converts masks to polygons; keeping
+    the uncompressed counts here avoids an extra compressed-RLE decode on that
+    hot CPU path.
+    """
     max_length = max((len(counts) for counts in rle_counts), default=0)
     counts_array = np.zeros((len(rle_counts), max_length), dtype=np.int64)
     lengths_array = np.empty((len(rle_counts),), dtype=np.int32)
@@ -884,6 +895,8 @@ def _attach_uncompressed_counts(
         counts_length = len(counts)
         lengths_array[index] = counts_length
         counts_array[index, :counts_length] = counts
+    # Private attributes are intentionally used as an optional fast-path cache;
+    # callers without this branch still rely on the standard compressed masks.
     masks._rle_counts_cpu = counts_array  # type: ignore[attr-defined]
     masks._rle_lengths_cpu = lengths_array  # type: ignore[attr-defined]
 

From 650526eb87ff929ca56c88a1bab467d3ace08081 Mon Sep 17 00:00:00 2001
From: Aseem Saxena <aseem.bits@gmail.com>
Date: Tue, 2 Jun 2026 23:39:04 +0000
Subject: [PATCH 44/76] Restore deferred RF-DETR query metadata path

---
 .../models/rfdetr/triton_postprocess.py       | 37 +++++++++++++++++++
 1 file changed, 37 insertions(+)

diff --git a/inference_models/inference_models/models/rfdetr/triton_postprocess.py b/inference_models/inference_models/models/rfdetr/triton_postprocess.py
index 799377e879..d0bd1dcb07 100644
--- a/inference_models/inference_models/models/rfdetr/triton_postprocess.py
+++ b/inference_models/inference_models/models/rfdetr/triton_postprocess.py
@@ -424,6 +424,7 @@ def post_process_single_instance_segmentation_result_to_rle_masks_triton(
         image_bboxes,
         class_mapping,
         metadata,
+        metadata,
         records,
         confidence_threshold,
         num_queries,
@@ -434,6 +435,7 @@ def post_process_single_instance_segmentation_result_to_rle_masks_triton(
         BLOCK_CLASSES=triton.next_power_of_2(num_classes),
         METADATA_STRIDE=_HEADER_SIZE,
         MAX_CLASSES_PER_QUERY=_SPARSE_MAX_CLASSES_PER_QUERY,
+        FLAG_WRITE_QUERY_METADATA=False,
         FLAG_OVERFLOW_CLASSES=True,
     )
     _sparse_atomic_rle_from_metadata_kernel[
@@ -1061,6 +1063,7 @@ def _select_topk_query_class_metadata_kernel(
         bboxes,
         class_mapping,
         metadata,
+        query_metadata,
         records,
         threshold: tl.constexpr,
         num_queries: tl.constexpr,
@@ -1071,6 +1074,7 @@ def _select_topk_query_class_metadata_kernel(
         BLOCK_CLASSES: tl.constexpr,
         METADATA_STRIDE: tl.constexpr,
         MAX_CLASSES_PER_QUERY: tl.constexpr,
+        FLAG_WRITE_QUERY_METADATA: tl.constexpr,
         FLAG_OVERFLOW_CLASSES: tl.constexpr,
     ):
         """Emit top passing query-class metadata rows for one RF-DETR query.
@@ -1210,6 +1214,39 @@ def _select_topk_query_class_metadata_kernel(
             tl.store(metadata + meta_base + 4, y1)
             tl.store(metadata + meta_base + 5, x2)
             tl.store(metadata + meta_base + 6, y2)
+            if FLAG_WRITE_QUERY_METADATA and class_rank == 0:
+                # The pipeline path wants best-query metadata for the RLE
+                # kernel while retaining expanded class metadata for CPU
+                # finalization.
+                query_meta_base = query_index * METADATA_STRIDE
+                tl.store(
+                    query_metadata + query_meta_base + 0,
+                    tl.where(is_valid_detection, 1.0, 0.0),
+                )
+                tl.store(
+                    query_metadata + query_meta_base + 1, mapped_class.to(tl.float32)
+                )
+                tl.store(
+                    query_metadata + query_meta_base + 2,
+                    tl.where(is_valid_detection, selected_score, 0.0),
+                )
+                tl.store(query_metadata + query_meta_base + 3, x1)
+                tl.store(query_metadata + query_meta_base + 4, y1)
+                tl.store(query_metadata + query_meta_base + 5, x2)
+                tl.store(query_metadata + query_meta_base + 6, y2)
+                tl.store(query_metadata + query_meta_base + 7, 0.0)
+                tl.store(query_metadata + query_meta_base + 8, 0.0)
+                tl.store(
+                    query_metadata + query_meta_base + 9, query_index.to(tl.float32)
+                )
+                tl.store(
+                    query_metadata + query_meta_base + 10, selected_index.to(tl.float32)
+                )
+                tl.store(query_metadata + query_meta_base + 11, 0.0)
+                tl.store(query_metadata + query_meta_base + 12, 0.0)
+                tl.store(query_metadata + query_meta_base + 13, 0.0)
+                tl.store(query_metadata + query_meta_base + 14, 0.0)
+                tl.store(query_metadata + query_meta_base + 15, 0.0)
             work_scores = tl.where(class_offsets == selected_class, -1.0, work_scores)
 
     @triton.jit

From 942e34afdbc904069a2ce1230475b8f160b107e2 Mon Sep 17 00:00:00 2001
From: Aseem Saxena <aseem.bits@gmail.com>
Date: Tue, 2 Jun 2026 23:50:54 +0000
Subject: [PATCH 45/76] Document RF-DETR deferred kernel metadata contract

---
 .../models/rfdetr/triton_postprocess.py                | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/inference_models/inference_models/models/rfdetr/triton_postprocess.py b/inference_models/inference_models/models/rfdetr/triton_postprocess.py
index d0bd1dcb07..d52cc7a79a 100644
--- a/inference_models/inference_models/models/rfdetr/triton_postprocess.py
+++ b/inference_models/inference_models/models/rfdetr/triton_postprocess.py
@@ -1098,6 +1098,13 @@ def _select_topk_query_class_metadata_kernel(
                 the ``class_rank``-th highest passing class for that query.
                 Columns have the same layout as
                 ``_select_best_query_metadata_kernel``.
+            query_metadata: CUDA float32 tensor with shape
+                ``[num_queries, METADATA_STRIDE]`` when
+                ``FLAG_WRITE_QUERY_METADATA`` is true. In deferred pipeline
+                mode, class metadata is expanded but the RLE kernel should still
+                run once per query, so class rank 0 is also written to this
+                query-level buffer. When ``FLAG_WRITE_QUERY_METADATA`` is false,
+                callers pass ``metadata`` here and the argument is unused.
             records: CUDA int32 tensor with shape ``[MAX_TOTAL_RUNS + 1, 3]``.
                 Program 0 resets ``records[0, 0]`` and ``records[0, 1]`` before
                 the RLE kernel appends runs.
@@ -1111,6 +1118,9 @@ def _select_topk_query_class_metadata_kernel(
             BLOCK_CLASSES: Power-of-two tile width covering all class columns.
             METADATA_STRIDE: Number of float32 fields per metadata row.
             MAX_CLASSES_PER_QUERY: Number of metadata rows reserved per query.
+            FLAG_WRITE_QUERY_METADATA: When true, additionally writes the best
+                passing class row for each query into ``query_metadata`` for the
+                deferred pipeline RLE kernel.
             FLAG_OVERFLOW_CLASSES: When true, writes ``records[0, 1] = 1`` if
                 more than ``MAX_CLASSES_PER_QUERY`` classes pass threshold; the
                 caller treats that as unsupported for exact top-k parity.

From 938a794e3ab3301335e49ce0f38227f624a96ac9 Mon Sep 17 00:00:00 2001
From: Aseem Saxena <aseem.bits@gmail.com>
Date: Wed, 3 Jun 2026 01:23:32 +0000
Subject: [PATCH 46/76] Remove RF-DETR pipeline profiling hooks

---
 .../rfdetr_rle_to_poly_microbenchmark.py      | 38 +-------
 .../interfaces/stream/inference_pipeline.py   | 14 ---
 .../stream/model_handlers/workflows.py        | 20 +---
 .../core/models/inference_models_adapters.py  | 94 ++++--------------
 inference/core/utils/nsight.py                | 96 -------------------
 .../core/workflows/core_steps/common/utils.py | 85 +++++++---------
 .../roboflow/instance_segmentation/v3.py      | 93 ++++++------------
 .../models/test_inference_models_adapters.py  | 29 +-----
 8 files changed, 94 insertions(+), 375 deletions(-)
 delete mode 100644 inference/core/utils/nsight.py

diff --git a/development/stream_interface/rfdetr_rle_to_poly_microbenchmark.py b/development/stream_interface/rfdetr_rle_to_poly_microbenchmark.py
index 5e23c4fab4..2db0604a36 100644
--- a/development/stream_interface/rfdetr_rle_to_poly_microbenchmark.py
+++ b/development/stream_interface/rfdetr_rle_to_poly_microbenchmark.py
@@ -15,9 +15,6 @@
 
     python development/stream_interface/rfdetr_rle_to_poly_microbenchmark.py \
         --mode replay --cases-dir temp/rfdetr_rle_to_poly_cases
-
-For Nsight Systems CPU/NVTX tracing, pass ``--nvtx`` during replay and profile
-with ``nsys profile --trace=nvtx,osrt --sample=process-tree``.
 """
 
 import argparse
@@ -25,7 +22,6 @@
 import importlib.util
 import json
 import os
-from contextlib import contextmanager
 from pathlib import Path
 import pickle
 import sys
@@ -294,34 +290,13 @@ def _assert_outputs_equal(
             )
 
 
-@contextmanager
-def _nvtx_range(enabled: bool, message: str):
-    if not enabled:
-        yield
-        return
-    try:
-        import torch
-
-        torch.cuda.nvtx.range_push(message)
-        try:
-            yield
-        finally:
-            torch.cuda.nvtx.range_pop()
-    except Exception:
-        yield
-
-
-def _run_one_replay_case(
-    *, case_path: Path, nvtx: bool, use_lazy_counts: bool
-) -> float:
+def _run_one_replay_case(*, case_path: Path, use_lazy_counts: bool) -> float:
     from inference.core.utils.rle_to_polygon import rle_masks_to_polygons
 
     case = _load_case(case_path)
     masks = _materialize_masks(case=case, use_lazy_counts=use_lazy_counts)
-    label = f"rfdetr.rle_to_poly.case={case['case_index']}" f".masks={len(masks.masks)}"
     start = perf_counter()
-    with _nvtx_range(nvtx, label):
-        actual = rle_masks_to_polygons(masks)
+    actual = rle_masks_to_polygons(masks)
     elapsed = perf_counter() - start
     _assert_outputs_equal(
         actual=actual,
@@ -380,7 +355,7 @@ def _run_replay(args: argparse.Namespace) -> dict:
 
     print(
         f"[replay] cases={len(case_paths)} repeats={args.repeats} "
-        f"warmup_repeats={args.warmup_repeats} nvtx={args.nvtx} "
+        f"warmup_repeats={args.warmup_repeats} "
         f"use_lazy_counts={args.use_lazy_counts}",
         flush=True,
     )
@@ -388,7 +363,6 @@ def _run_replay(args: argparse.Namespace) -> dict:
         for case_path in case_paths:
             _run_one_replay_case(
                 case_path=case_path,
-                nvtx=args.nvtx,
                 use_lazy_counts=args.use_lazy_counts,
             )
 
@@ -398,7 +372,6 @@ def _run_replay(args: argparse.Namespace) -> dict:
             timings.append(
                 _run_one_replay_case(
                     case_path=case_path,
-                    nvtx=args.nvtx,
                     use_lazy_counts=args.use_lazy_counts,
                 )
             )
@@ -439,11 +412,6 @@ def _parse_args() -> argparse.Namespace:
     parser.add_argument("--repeats", type=int, default=1)
     parser.add_argument("--warmup-repeats", type=int, default=0)
     parser.add_argument("--max-cases", type=int, default=None)
-    parser.add_argument(
-        "--nvtx",
-        action="store_true",
-        help="Add NVTX ranges around each replayed rle_masks2poly call.",
-    )
     parser.add_argument(
         "--use-lazy-counts",
         action=argparse.BooleanOptionalAction,
diff --git a/inference/core/interfaces/stream/inference_pipeline.py b/inference/core/interfaces/stream/inference_pipeline.py
index a0306b37c6..01842076b0 100644
--- a/inference/core/interfaces/stream/inference_pipeline.py
+++ b/inference/core/interfaces/stream/inference_pipeline.py
@@ -60,7 +60,6 @@
 from inference.core.managers.decorators.fixed_size_cache import WithFixedSizeCache
 from inference.core.registries.roboflow import RoboflowModelRegistry
 from inference.core.utils.function import experimental
-from inference.core.utils.nsight import nsight_frame_label, nsight_mark
 from inference.core.workflows.core_steps.common.entities import StepExecutionMode
 from inference.core.workflows.execution_engine.profiling.core import (
     BaseWorkflowsProfiler,
@@ -954,16 +953,12 @@ def _dispatch_inference_results(self) -> None:
                 self._predictions_queue.task_done()
                 break
             predictions, video_frames = inference_results
-            frame_id = _video_frames_trace_id(video_frames=video_frames)
-            nsight_mark(nsight_frame_label(frame_id, "dispatch_accept_result"))
             predictions = _resolve_prediction_futures(predictions)
-            nsight_mark(nsight_frame_label(frame_id, "dispatch_predictions_resolved"))
             if self._on_prediction is not None:
                 self._handle_predictions_dispatching(
                     predictions=predictions,
                     video_frames=video_frames,
                 )
-            nsight_mark(nsight_frame_label(frame_id, "cpu_full_complete"))
             self._predictions_queue.task_done()
 
     def _queue_inference_result(
@@ -1147,12 +1142,3 @@ def _resolve_prediction_futures(value: Any) -> Any:
             key: _resolve_prediction_futures(element) for key, element in value.items()
         }
     return value
-
-
-def _video_frames_trace_id(video_frames: List[VideoFrame]) -> Optional[str]:
-    if not video_frames:
-        return None
-    frame_ids = [str(video_frame.frame_id) for video_frame in video_frames]
-    if len(frame_ids) == 1:
-        return frame_ids[0]
-    return ",".join(frame_ids)
diff --git a/inference/core/interfaces/stream/model_handlers/workflows.py b/inference/core/interfaces/stream/model_handlers/workflows.py
index 5e2caac87a..5e8b274698 100644
--- a/inference/core/interfaces/stream/model_handlers/workflows.py
+++ b/inference/core/interfaces/stream/model_handlers/workflows.py
@@ -5,11 +5,6 @@
 from inference.core.interfaces.stream.model_handlers.workflows_context import (
     workflow_stream_flush_context,
 )
-from inference.core.utils.nsight import (
-    nsight_frame_context,
-    nsight_frame_label,
-    nsight_mark,
-)
 from inference.core.workflows.execution_engine.core import ExecutionEngine
 from inference.core.workflows.execution_engine.entities.base import VideoMetadata
 
@@ -35,11 +30,7 @@ def __init__(
     def __call__(
         self, video_frames: List[VideoFrame]
     ) -> Optional[InferenceHandlerResult]:
-        frame_id = _video_frames_trace_id(video_frames=video_frames)
-        with nsight_frame_context(frame_id=frame_id):
-            nsight_mark(nsight_frame_label(frame_id, "cpu_start"))
-            predictions = self._run_workflow(video_frames=video_frames)
-            nsight_mark(nsight_frame_label(frame_id, "gpu_submitted"))
+        predictions = self._run_workflow(video_frames=video_frames)
         stream_buffer_depth = self._stream_buffer_depth()
         if stream_buffer_depth <= 0:
             self._pending_video_frames.clear()
@@ -137,12 +128,3 @@ def _stream_buffer_depth(self) -> int:
                 else:
                     stream_buffer_depth = max(stream_buffer_depth, 1)
         return stream_buffer_depth
-
-
-def _video_frames_trace_id(video_frames: List[VideoFrame]) -> Optional[str]:
-    if not video_frames:
-        return None
-    frame_ids = [str(video_frame.frame_id) for video_frame in video_frames]
-    if len(frame_ids) == 1:
-        return frame_ids[0]
-    return ",".join(frame_ids)
diff --git a/inference/core/models/inference_models_adapters.py b/inference/core/models/inference_models_adapters.py
index 999878b7df..acb67b571c 100644
--- a/inference/core/models/inference_models_adapters.py
+++ b/inference/core/models/inference_models_adapters.py
@@ -49,12 +49,6 @@
 from inference.core.models.base import Model
 from inference.core.roboflow_api import get_extra_weights_provider_headers
 from inference.core.utils.image_utils import load_image_bgr, load_image_rgb
-from inference.core.utils.nsight import (
-    nsight_current_frame_id,
-    nsight_frame_label,
-    nsight_mark,
-    nsight_range,
-)
 from inference.core.utils.postprocess import bitpacked_masks2poly, masks2poly
 from inference.core.utils.rle_to_polygon import rle_masks_to_polygons
 from inference.core.utils.visualisation import draw_detection_predictions
@@ -361,23 +355,17 @@ def map_inference_kwargs(self, kwargs: dict) -> dict:
     def preprocess(self, image: Any, **kwargs):
         is_batch = isinstance(image, list)
         images = image if is_batch else [image]
-        trace_frame_id = nsight_current_frame_id()
-        with nsight_range(nsight_frame_label(trace_frame_id, "cpu_preprocess.load")):
-            np_images: List[np.ndarray] = [
-                load_image_bgr(
-                    v,
-                    disable_preproc_auto_orient=kwargs.get(
-                        "disable_preproc_auto_orient", False
-                    ),
-                )
-                for v in images
-            ]
+        np_images: List[np.ndarray] = [
+            load_image_bgr(
+                v,
+                disable_preproc_auto_orient=kwargs.get(
+                    "disable_preproc_auto_orient", False
+                ),
+            )
+            for v in images
+        ]
         mapped_kwargs = self.map_inference_kwargs(kwargs)
-        nsight_mark(nsight_frame_label(trace_frame_id, "gpu_start"))
-        with nsight_range(nsight_frame_label(trace_frame_id, "gpu_preprocess.submit")):
-            preprocessed = self._model.pre_process(np_images, **mapped_kwargs)
-        nsight_mark(nsight_frame_label(trace_frame_id, "gpu_preprocess_submitted"))
-        return preprocessed
+        return self._model.pre_process(np_images, **mapped_kwargs)
 
     def predict(self, img_in, **kwargs):
         mapped_kwargs = self.map_inference_kwargs(kwargs)
@@ -396,14 +384,8 @@ def predict(self, img_in, **kwargs):
         # postprocess off the current frame's postprocess() host path while
         # still preserving the correctness dependency for reused TRT outputs.
         self._submit_next_pending_gpu_work()
-        trace_frame_id = nsight_current_frame_id()
         pre_processing_meta = getattr(img_in, "_pre_processing_meta", None)
-        with nsight_range(nsight_frame_label(trace_frame_id, "gpu_forward.submit")):
-            fut = self._model.forward_async(
-                img_in, pre_processing_meta, **mapped_kwargs
-            )
-        nsight_mark(nsight_frame_label(trace_frame_id, "gpu_forward_submitted"))
-        fut._trace_frame_id = trace_frame_id  # type: ignore[attr-defined]
+        fut = self._model.forward_async(img_in, pre_processing_meta, **mapped_kwargs)
         fut._adapter_kwargs = {  # type: ignore[attr-defined]
             "mapped_kwargs": mapped_kwargs
         }
@@ -446,12 +428,7 @@ def _submit_future_gpu_work(
         fut._kwargs = mapped_kwargs  # type: ignore[attr-defined]
         submit_gpu_work = getattr(fut, "submit_gpu_work", None)
         if callable(submit_gpu_work):
-            trace_frame_id = getattr(fut, "_trace_frame_id", nsight_current_frame_id())
-            with nsight_range(
-                nsight_frame_label(trace_frame_id, "gpu_postprocess.submit")
-            ):
-                submit_gpu_work(meta)
-            nsight_mark(nsight_frame_label(trace_frame_id, "gpu_postprocess_submitted"))
+            submit_gpu_work(meta)
             self._gpu_submit_generation = getattr(self, "_gpu_submit_generation", 0) + 1
             fut._adapter_gpu_submit_generation = (  # type: ignore[attr-defined]
                 self._gpu_submit_generation
@@ -475,16 +452,12 @@ def _submit_response_build(
     ) -> None:
         fut._meta = meta  # type: ignore[attr-defined]
         fut._kwargs = mapped_kwargs  # type: ignore[attr-defined]
-        trace_frame_id = getattr(fut, "_trace_frame_id", nsight_current_frame_id())
-        fut._trace_frame_id = trace_frame_id  # type: ignore[attr-defined]
-        nsight_mark(nsight_frame_label(trace_frame_id, "cpu_response_released"))
         response_future = self._get_response_executor().submit(
             self._finalize_future,
             fut,
             meta,
             mapped_kwargs,
         )
-        response_future._trace_frame_id = trace_frame_id  # type: ignore[attr-defined]
         self._response_futures.append(response_future)
 
     def _submit_ready_responses(self) -> None:
@@ -585,26 +558,10 @@ def _finalize_future(
         # because _DirectInferenceFuture's post_process is memoised.
         fut._meta = preprocess_return_metadata  # type: ignore[attr-defined]
         fut._kwargs = mapped_kwargs  # type: ignore[attr-defined]
-        trace_frame_id = getattr(fut, "_trace_frame_id", None)
-        nsight_mark(nsight_frame_label(trace_frame_id, "cpu_response_start"))
-        with nsight_range(nsight_frame_label(trace_frame_id, "cpu_postprocess.total")):
-            with nsight_range(
-                nsight_frame_label(trace_frame_id, "cpu_postprocess.await_gpu_result")
-            ):
-                detections_list = fut.result()
-            for det in detections_list:
-                try:
-                    det._trace_frame_id = trace_frame_id  # type: ignore[attr-defined]
-                except AttributeError:
-                    pass
-            with nsight_range(
-                nsight_frame_label(trace_frame_id, "cpu_postprocess.build_response")
-            ):
-                responses = self._build_responses_from_detections(
-                    detections_list, preprocess_return_metadata, **mapped_kwargs
-                )
-        nsight_mark(nsight_frame_label(trace_frame_id, "cpu_response_complete"))
-        return responses
+        detections_list = fut.result()
+        return self._build_responses_from_detections(
+            detections_list, preprocess_return_metadata, **mapped_kwargs
+        )
 
     def _postprocess_sync(
         self,
@@ -637,20 +594,9 @@ def _build_responses_from_detections(
 
         responses: List[InstanceSegmentationInferenceResponse] = []
         for preproc_metadata, det in zip(preprocess_return_metadata, detections_list):
-            trace_frame_id = getattr(det, "_trace_frame_id", nsight_current_frame_id())
             finalize_pending = getattr(det, "_finalize_pending_postproc", None)
             if callable(finalize_pending):
-                with nsight_range(
-                    nsight_frame_label(trace_frame_id, "cpu_postprocess.accept_gpu_rle")
-                ):
-                    det = finalize_pending()
-                try:
-                    det._trace_frame_id = trace_frame_id  # type: ignore[attr-defined]
-                except Exception:
-                    pass
-                nsight_mark(
-                    nsight_frame_label(trace_frame_id, "cpu_predictions_accepted")
-                )
+                det = finalize_pending()
             H = preproc_metadata.original_size.height
             W = preproc_metadata.original_size.width
 
@@ -672,9 +618,6 @@ def _build_responses_from_detections(
                 device = mask_gpu.device if dense_mask_cuda else mask_packed_gpu.device
                 stream = torch.cuda.current_stream(device)
                 done_event.wait(stream)
-                nsight_mark(
-                    nsight_frame_label(trace_frame_id, "gpu_finish_wait_enqueued")
-                )
 
                 if (
                     defer_count_to_adapter
@@ -927,8 +870,7 @@ def draw_predictions(
 
 
 def rle_masks2poly(masks: InstancesRLEMasks) -> List[np.ndarray]:
-    with nsight_range("rfdetr.rle_masks2poly"):
-        return rle_masks_to_polygons(masks=masks)
+    return rle_masks_to_polygons(masks=masks)
 
 
 class InferenceModelsKeyPointsDetectionAdapter(Model):
diff --git a/inference/core/utils/nsight.py b/inference/core/utils/nsight.py
deleted file mode 100644
index d5c1157bfb..0000000000
--- a/inference/core/utils/nsight.py
+++ /dev/null
@@ -1,96 +0,0 @@
-import os
-import threading
-from contextlib import contextmanager
-from typing import Optional
-
-_TRACE_CONTEXT = threading.local()
-_NVTX = None
-_NVTX_INIT_ATTEMPTED = False
-
-
-def nsight_markers_enabled() -> bool:
-    return os.getenv("RFDETR_NSIGHT_MARKERS", "").lower() in {
-        "1",
-        "true",
-        "yes",
-        "on",
-    }
-
-
-def _get_nvtx():
-    global _NVTX, _NVTX_INIT_ATTEMPTED
-    if _NVTX_INIT_ATTEMPTED:
-        return _NVTX
-    _NVTX_INIT_ATTEMPTED = True
-    try:
-        import torch
-
-        _NVTX = torch.cuda.nvtx
-    except Exception:
-        _NVTX = None
-    return _NVTX
-
-
-def nsight_mark(message: str) -> None:
-    if not nsight_markers_enabled():
-        return
-    nvtx = _get_nvtx()
-    if nvtx is None:
-        return
-    try:
-        nvtx.mark(message)
-    except Exception:
-        return
-
-
-def nsight_range_push(message: str) -> None:
-    if not nsight_markers_enabled():
-        return
-    nvtx = _get_nvtx()
-    if nvtx is None:
-        return
-    try:
-        nvtx.range_push(message)
-    except Exception:
-        return
-
-
-def nsight_range_pop() -> None:
-    if not nsight_markers_enabled():
-        return
-    nvtx = _get_nvtx()
-    if nvtx is None:
-        return
-    try:
-        nvtx.range_pop()
-    except Exception:
-        return
-
-
-@contextmanager
-def nsight_range(message: str):
-    nsight_range_push(message)
-    try:
-        yield
-    finally:
-        nsight_range_pop()
-
-
-def nsight_current_frame_id() -> Optional[str]:
-    return getattr(_TRACE_CONTEXT, "frame_id", None)
-
-
-@contextmanager
-def nsight_frame_context(frame_id: Optional[str]):
-    previous = getattr(_TRACE_CONTEXT, "frame_id", None)
-    _TRACE_CONTEXT.frame_id = frame_id
-    try:
-        yield
-    finally:
-        _TRACE_CONTEXT.frame_id = previous
-
-
-def nsight_frame_label(frame_id: Optional[str], event: str) -> str:
-    if frame_id is None:
-        return f"rfdetr.{event}"
-    return f"rfdetr.frame={frame_id}.{event}"
diff --git a/inference/core/workflows/core_steps/common/utils.py b/inference/core/workflows/core_steps/common/utils.py
index 4229c1608e..2fe82690df 100644
--- a/inference/core/workflows/core_steps/common/utils.py
+++ b/inference/core/workflows/core_steps/common/utils.py
@@ -64,7 +64,6 @@
     WorkflowImageData,
 )
 from inference.core.workflows.prototypes.block import BlockResult
-from inference.core.utils.nsight import nsight_range
 
 T = TypeVar("T")
 
@@ -267,54 +266,44 @@ def convert_inference_detections_batch_to_sv_detections(
     predictions_key: str = "predictions",
     image_key: str = "image",
 ) -> List[sv.Detections]:
-    with nsight_range("workflow.to_sv.convert_inference_batch"):
-        batch_of_detections: List[sv.Detections] = []
-        for p in predictions:
-            width, height = p[image_key][WIDTH_KEY], p[image_key][HEIGHT_KEY]
-            with nsight_range("workflow.to_sv.convert.from_inference"):
-                with nsight_range("workflow.to_sv.convert.fast_polygon"):
-                    fast_result = _try_convert_polygon_predictions_to_sv_detections(
-                        prediction=p,
-                        predictions_key=predictions_key,
-                        image_key=image_key,
-                    )
-                if fast_result is None:
-                    detections = sv.Detections.from_inference(p)
-                    raw_predictions = p[predictions_key]
-                    if len(detections) != len(raw_predictions):
-                        with nsight_range(
-                            "workflow.to_sv.convert.filter_invalid_polygons"
-                        ):
-                            raw_predictions = filter_out_invalid_polygons(
-                                predictions=raw_predictions
-                            )
-                else:
-                    detections, raw_predictions = fast_result
-            with nsight_range("workflow.to_sv.convert.metadata_arrays"):
-                parent_ids = [d.get(PARENT_ID_KEY, "") for d in raw_predictions]
-                detection_ids = [
-                    _get_or_create_detection_id(d) for d in raw_predictions
-                ]
-                detections[DETECTION_ID_KEY] = np.array(detection_ids)
-                detections[PARENT_ID_KEY] = np.array(parent_ids)
-                detections[IMAGE_DIMENSIONS_KEY] = np.array(
-                    [[height, width]] * len(detections)
+    batch_of_detections: List[sv.Detections] = []
+    for p in predictions:
+        width, height = p[image_key][WIDTH_KEY], p[image_key][HEIGHT_KEY]
+        fast_result = _try_convert_polygon_predictions_to_sv_detections(
+            prediction=p,
+            predictions_key=predictions_key,
+            image_key=image_key,
+        )
+        if fast_result is None:
+            detections = sv.Detections.from_inference(p)
+            raw_predictions = p[predictions_key]
+            if len(detections) != len(raw_predictions):
+                raw_predictions = filter_out_invalid_polygons(
+                    predictions=raw_predictions
                 )
-                if INFERENCE_ID_KEY in p:
-                    detections[INFERENCE_ID_KEY] = np.array(
-                        [p[INFERENCE_ID_KEY]] * len(detections)
-                    )
-            with nsight_range("workflow.to_sv.convert.rle_masks"):
-                rle_masks = [
-                    d.get(RLE_MASK_KEY_IN_INFERENCE_RESPONSE) or d.get("rle")
-                    for d in raw_predictions
-                ]
-                if any(m is not None for m in rle_masks):
-                    detections.data[RLE_MASK_KEY_IN_SV_DETECTIONS] = np.array(
-                        rle_masks, dtype=object
-                    )
-            batch_of_detections.append(detections)
-        return batch_of_detections
+        else:
+            detections, raw_predictions = fast_result
+
+        parent_ids = [d.get(PARENT_ID_KEY, "") for d in raw_predictions]
+        detection_ids = [_get_or_create_detection_id(d) for d in raw_predictions]
+        detections[DETECTION_ID_KEY] = np.array(detection_ids)
+        detections[PARENT_ID_KEY] = np.array(parent_ids)
+        detections[IMAGE_DIMENSIONS_KEY] = np.array([[height, width]] * len(detections))
+        if INFERENCE_ID_KEY in p:
+            detections[INFERENCE_ID_KEY] = np.array(
+                [p[INFERENCE_ID_KEY]] * len(detections)
+            )
+
+        rle_masks = [
+            d.get(RLE_MASK_KEY_IN_INFERENCE_RESPONSE) or d.get("rle")
+            for d in raw_predictions
+        ]
+        if any(m is not None for m in rle_masks):
+            detections.data[RLE_MASK_KEY_IN_SV_DETECTIONS] = np.array(
+                rle_masks, dtype=object
+            )
+        batch_of_detections.append(detections)
+    return batch_of_detections
 
 
 def add_inference_keypoints_to_sv_detections(
diff --git a/inference/core/workflows/core_steps/models/roboflow/instance_segmentation/v3.py b/inference/core/workflows/core_steps/models/roboflow/instance_segmentation/v3.py
index a9f56d34f8..d2470e0ff5 100644
--- a/inference/core/workflows/core_steps/models/roboflow/instance_segmentation/v3.py
+++ b/inference/core/workflows/core_steps/models/roboflow/instance_segmentation/v3.py
@@ -21,7 +21,6 @@
     is_workflow_stream_flush_active,
 )
 from inference.core.managers.base import ModelManager
-from inference.core.utils.nsight import nsight_frame_label, nsight_mark, nsight_range
 from inference.core.workflows.core_steps.common.entities import StepExecutionMode
 from inference.core.workflows.core_steps.common.utils import (
     attach_parents_coordinates_to_batch_of_sv_detections,
@@ -413,17 +412,12 @@ def _submit_async_post_process_result(
             class_filter,
             model_id,
         )
-        trace_frame_id = getattr(predictions_future, "_trace_frame_id", None)
-        finalized_result_future._trace_frame_id = (  # type: ignore[attr-defined]
-            trace_frame_id
-        )
         return [
             {
                 "inference_id": None,
                 "predictions": self._submit_async_prediction_selector(
                     result_future=finalized_result_future,
                     image_index=image_index,
-                    trace_frame_id=trace_frame_id,
                 ),
                 "model_id": model_id,
             }
@@ -434,15 +428,12 @@ def _submit_async_prediction_selector(
         self,
         result_future: Future,
         image_index: int,
-        trace_frame_id: Optional[str],
     ) -> Future:
-        prediction_future = self._get_stream_response_executor().submit(
+        return self._get_stream_response_executor().submit(
             self._select_async_prediction_value,
             result_future,
             image_index,
         )
-        prediction_future._trace_frame_id = trace_frame_id  # type: ignore[attr-defined]
-        return prediction_future
 
     def _finalize_async_prediction_value(
         self,
@@ -451,39 +442,23 @@ def _finalize_async_prediction_value(
         class_filter: Optional[List[str]],
         model_id: str,
     ) -> BlockResult:
-        trace_frame_id = getattr(predictions_future, "_trace_frame_id", None)
-        nsight_mark(nsight_frame_label(trace_frame_id, "workflow_finalize_start"))
-        with nsight_range(
-            nsight_frame_label(trace_frame_id, "workflow_finalize.total")
-        ):
-            with nsight_range(
-                nsight_frame_label(trace_frame_id, "workflow_finalize.await_response")
-            ):
-                predictions = predictions_future.result()
-            if not isinstance(predictions, list):
-                predictions = [predictions]
-            with nsight_range(
-                nsight_frame_label(trace_frame_id, "workflow_finalize.response_to_dict")
-            ):
-                predictions = [
-                    (
-                        _is_response_dc_to_dict(e)
-                        if isinstance(e, InstanceSegmentationInferenceResponseDC)
-                        else e.model_dump(by_alias=True, exclude_none=True)
-                    )
-                    for e in predictions
-                ]
-            with nsight_range(
-                nsight_frame_label(trace_frame_id, "workflow_finalize.to_sv")
-            ):
-                result = self._post_process_result(
-                    images=images,
-                    predictions=predictions,
-                    class_filter=class_filter,
-                    model_id=model_id,
-                )
-        nsight_mark(nsight_frame_label(trace_frame_id, "workflow_finalize_complete"))
-        return result
+        predictions = predictions_future.result()
+        if not isinstance(predictions, list):
+            predictions = [predictions]
+        predictions = [
+            (
+                _is_response_dc_to_dict(e)
+                if isinstance(e, InstanceSegmentationInferenceResponseDC)
+                else e.model_dump(by_alias=True, exclude_none=True)
+            )
+            for e in predictions
+        ]
+        return self._post_process_result(
+            images=images,
+            predictions=predictions,
+            class_filter=class_filter,
+            model_id=model_id,
+        )
 
     def _select_async_prediction_value(
         self,
@@ -579,25 +554,19 @@ def _post_process_result(
         model_id: str,
     ) -> BlockResult:
         inference_ids = [p.get(INFERENCE_ID_KEY, None) for p in predictions]
-        with nsight_range("workflow.to_sv.convert_inference"):
-            predictions = convert_inference_detections_batch_to_sv_detections(
-                predictions
-            )
-        with nsight_range("workflow.to_sv.attach_prediction_type"):
-            predictions = attach_prediction_type_info_to_sv_detections_batch(
-                predictions=predictions,
-                prediction_type="instance-segmentation",
-            )
-        with nsight_range("workflow.to_sv.class_filter"):
-            predictions = filter_out_unwanted_classes_from_sv_detections_batch(
-                predictions=predictions,
-                classes_to_accept=class_filter,
-            )
-        with nsight_range("workflow.to_sv.attach_parents"):
-            predictions = attach_parents_coordinates_to_batch_of_sv_detections(
-                images=images,
-                predictions=predictions,
-            )
+        predictions = convert_inference_detections_batch_to_sv_detections(predictions)
+        predictions = attach_prediction_type_info_to_sv_detections_batch(
+            predictions=predictions,
+            prediction_type="instance-segmentation",
+        )
+        predictions = filter_out_unwanted_classes_from_sv_detections_batch(
+            predictions=predictions,
+            classes_to_accept=class_filter,
+        )
+        predictions = attach_parents_coordinates_to_batch_of_sv_detections(
+            images=images,
+            predictions=predictions,
+        )
         return [
             {
                 "inference_id": inference_id,
diff --git a/tests/inference/unit_tests/core/models/test_inference_models_adapters.py b/tests/inference/unit_tests/core/models/test_inference_models_adapters.py
index 860de92396..e1b48c7240 100644
--- a/tests/inference/unit_tests/core/models/test_inference_models_adapters.py
+++ b/tests/inference/unit_tests/core/models/test_inference_models_adapters.py
@@ -2,14 +2,12 @@
 
 from collections import deque
 from concurrent.futures import Future
-from contextlib import nullcontext
 from types import SimpleNamespace
 
 import pytest
 import torch
 
 from inference.core.exceptions import PostProcessingError
-from inference.core.models import inference_models_adapters as adapters_module
 from inference.core.models.inference_models_adapters import (
     InferenceModelsInstanceSegmentationAdapter,
     prepare_classification_response,
@@ -74,7 +72,6 @@ def _make_meta(tag: str):
 
 
 def _make_pipeline_adapter(
-    monkeypatch: pytest.MonkeyPatch,
     futures: list[_FakePipelineFuture],
     ops: list[str],
     pipeline_depth: int = 2,
@@ -96,12 +93,6 @@ def _make_pipeline_adapter(
         ]
     )
 
-    monkeypatch.setattr(adapters_module, "nsight_current_frame_id", lambda: 0)
-    monkeypatch.setattr(adapters_module, "nsight_frame_label", lambda *_args: "trace")
-    monkeypatch.setattr(adapters_module, "nsight_mark", lambda *_args, **_kwargs: None)
-    monkeypatch.setattr(
-        adapters_module, "nsight_range", lambda *_args, **_kwargs: nullcontext()
-    )
     return adapter
 
 
@@ -179,14 +170,11 @@ def test_prepare_classification_response_fails_on_class_count_mismatch() -> None
         )
 
 
-def test_pipeline_submits_previous_future_before_next_forward(
-    monkeypatch: pytest.MonkeyPatch,
-) -> None:
+def test_pipeline_submits_previous_future_before_next_forward() -> None:
     ops: list[str] = []
     future_1 = _FakePipelineFuture(name="f1", ops=ops)
     future_2 = _FakePipelineFuture(name="f2", ops=ops)
     adapter = _make_pipeline_adapter(
-        monkeypatch=monkeypatch,
         futures=[future_1, future_2],
         ops=ops,
         pipeline_depth=2,
@@ -209,14 +197,11 @@ def test_pipeline_submits_previous_future_before_next_forward(
     assert ops == ["forward:f1", "submit:f1", "forward:f2"]
 
 
-def test_pipeline_returns_previous_frame_response_using_previous_metadata(
-    monkeypatch: pytest.MonkeyPatch,
-) -> None:
+def test_pipeline_returns_previous_frame_response_using_previous_metadata() -> None:
     ops: list[str] = []
     future_1 = _FakePipelineFuture(name="f1", ops=ops)
     future_2 = _FakePipelineFuture(name="f2", ops=ops)
     adapter = _make_pipeline_adapter(
-        monkeypatch=monkeypatch,
         futures=[future_1, future_2],
         ops=ops,
         pipeline_depth=2,
@@ -246,13 +231,10 @@ def test_pipeline_returns_previous_frame_response_using_previous_metadata(
     ]
 
 
-def test_pipeline_flush_submits_remaining_gpu_work_before_finalizing(
-    monkeypatch: pytest.MonkeyPatch,
-) -> None:
+def test_pipeline_flush_submits_remaining_gpu_work_before_finalizing() -> None:
     ops: list[str] = []
     future_1 = _FakePipelineFuture(name="f1", ops=ops)
     adapter = _make_pipeline_adapter(
-        monkeypatch=monkeypatch,
         futures=[future_1],
         ops=ops,
         pipeline_depth=2,
@@ -269,15 +251,12 @@ def test_pipeline_flush_submits_remaining_gpu_work_before_finalizing(
     assert ops == ["forward:f1", "submit:f1", "result:f1"]
 
 
-def test_pipeline_depth_three_submits_oldest_pending_before_forward(
-    monkeypatch: pytest.MonkeyPatch,
-) -> None:
+def test_pipeline_depth_three_submits_oldest_pending_before_forward() -> None:
     ops: list[str] = []
     future_1 = _FakePipelineFuture(name="f1", ops=ops)
     future_2 = _FakePipelineFuture(name="f2", ops=ops)
     future_3 = _FakePipelineFuture(name="f3", ops=ops)
     adapter = _make_pipeline_adapter(
-        monkeypatch=monkeypatch,
         futures=[future_1, future_2, future_3],
         ops=ops,
         pipeline_depth=3,

From ae6c5b628423462faaeb3a868a69171214d8183c Mon Sep 17 00:00:00 2001
From: Aseem Saxena <aseem.bits@gmail.com>
Date: Wed, 3 Jun 2026 01:53:20 +0000
Subject: [PATCH 47/76] Tighten RF-DETR pipeline review coverage

---
 .../rfdetr_workflow_video_parity.py           | 779 ++++++++++++++++++
 .../core/models/inference_models_adapters.py  |  28 +-
 .../models/rfdetr/triton_postprocess.py       |  21 +-
 .../core/interfaces/stream/test_workflows.py  | 137 +++
 4 files changed, 953 insertions(+), 12 deletions(-)
 create mode 100644 development/stream_interface/rfdetr_workflow_video_parity.py
 create mode 100644 tests/inference/unit_tests/core/interfaces/stream/test_workflows.py

diff --git a/development/stream_interface/rfdetr_workflow_video_parity.py b/development/stream_interface/rfdetr_workflow_video_parity.py
new file mode 100644
index 0000000000..879a41a89b
--- /dev/null
+++ b/development/stream_interface/rfdetr_workflow_video_parity.py
@@ -0,0 +1,779 @@
+"""Compare RF-DETR workflow outputs on a video across two git refs.
+
+The driver mode runs the same one-block RF-DETR instance-segmentation workflow
+twice: a baseline ref with RF-DETR fast paths disabled, and a candidate ref with
+the full stack enabled. Each child run writes sink outputs keyed by video frame;
+the final compare checks that both runs emitted the same frame ids and
+semantically equivalent serialized workflow predictions.
+
+Example:
+
+    env PARITY_MODEL_PATH=/app/helloworld/inference/rfdetr-seg-nano-orin-trt-package \
+      python development/stream_interface/rfdetr_workflow_video_parity.py \
+        --video_reference vehicles_1080p.mp4 \
+        --base-ref main \
+        --candidate-ref opt-pipeline-integration
+"""
+
+import argparse
+import importlib.util
+import json
+import math
+import os
+from pathlib import Path
+import pickle
+import shutil
+import subprocess
+import sys
+import tempfile
+from typing import Any, Optional
+
+import numpy as np
+
+SCRIPT_REPO_ROOT = Path(__file__).resolve().parents[2]
+SELF = Path(__file__).resolve()
+PY = sys.executable
+MODEL_ID = "rfdetr-seg-nano"
+LOCAL_WORKFLOW_MODEL_ID = f"{MODEL_ID}/1"
+CONFIDENCE = 0.4
+DEFAULT_BASE_OUT = "/tmp/rfdetr_workflow_video_base.pkl"
+DEFAULT_CANDIDATE_OUT = "/tmp/rfdetr_workflow_video_candidate.pkl"
+TRT_PACKAGE_REQUIRED_FILES = (
+    "model_config.json",
+    "class_names.txt",
+    "inference_config.json",
+    "engine.plan",
+)
+
+BASE_FLAGS_OFF = {
+    "INFERENCE_MODELS_RFDETR_TRITON_POSTPROC_ENABLED": "false",
+    "INFERENCE_MODELS_RFDETR_TRITON_PREPROC_ENABLED": "false",
+    "RFDETR_PIPELINE_DEPTH": "1",
+    "ENABLE_AUTO_CUDA_GRAPHS_FOR_TRT_BACKEND": "false",
+}
+CANDIDATE_FLAGS_ON = {
+    "INFERENCE_MODELS_RFDETR_TRITON_POSTPROC_ENABLED": "true",
+    "INFERENCE_MODELS_RFDETR_TRITON_PREPROC_ENABLED": "true",
+    "RFDETR_PIPELINE_DEPTH": "2",
+    "ENABLE_AUTO_CUDA_GRAPHS_FOR_TRT_BACKEND": "true",
+}
+ALL_BACKENDS = {
+    "torch",
+    "torch-script",
+    "onnx",
+    "trt",
+    "hugging-face",
+    "ultralytics",
+    "custom",
+}
+_SV_DETECTIONS_SERIALIZER = None
+_VOLATILE_OUTPUT_KEYS = {"detection_id"}
+
+
+def _repo_import_roots(repo_root: Path) -> list[Path]:
+    return [repo_root, repo_root / "inference_models"]
+
+
+def _child_pythonpath(repo_root: Path, existing_pythonpath: Optional[str]) -> str:
+    entries = [str(path) for path in _repo_import_roots(repo_root) if path.exists()]
+    if existing_pythonpath:
+        entries.append(existing_pythonpath)
+    return os.pathsep.join(entries)
+
+
+def _prioritize_local_packages(repo_root: Path) -> None:
+    for search_root in reversed(_repo_import_roots(repo_root)):
+        search_root_str = str(search_root)
+        if search_root_str in sys.path:
+            sys.path.remove(search_root_str)
+        if search_root.exists():
+            sys.path.insert(0, search_root_str)
+    for module_name in list(sys.modules):
+        if module_name == "inference" or module_name.startswith("inference."):
+            sys.modules.pop(module_name, None)
+        if module_name == "inference_models" or module_name.startswith(
+            "inference_models."
+        ):
+            sys.modules.pop(module_name, None)
+
+
+def _bootstrap_repo_root(repo_root: str) -> Path:
+    repo_path = Path(repo_root).resolve()
+    os.chdir(repo_path)
+    _prioritize_local_packages(repo_path)
+    return repo_path
+
+
+def _git_output(repo_root: Path, *args: str) -> str:
+    return subprocess.check_output(
+        ["git", *args],
+        cwd=str(repo_root),
+        text=True,
+        stderr=subprocess.DEVNULL,
+    ).strip()
+
+
+def _safe_git_output(repo_root: Path, *args: str, default: str = "<unknown>") -> str:
+    try:
+        return _git_output(repo_root, *args)
+    except subprocess.CalledProcessError:
+        return default
+
+
+def _remove_worktree(worktree_root: Path) -> None:
+    subprocess.run(
+        ["git", "worktree", "remove", "--force", str(worktree_root)],
+        cwd=str(SCRIPT_REPO_ROOT),
+        check=False,
+        stdout=subprocess.DEVNULL,
+        stderr=subprocess.DEVNULL,
+    )
+    shutil.rmtree(worktree_root, ignore_errors=True)
+
+
+def _materialize_target(ref: str) -> dict:
+    if ref.lower() in {"working-tree", "worktree", "current"}:
+        return {
+            "label": (
+                f"{_safe_git_output(SCRIPT_REPO_ROOT, 'rev-parse', '--abbrev-ref', 'HEAD')} "
+                "(working-tree)"
+            ),
+            "repo_root": SCRIPT_REPO_ROOT,
+            "cleanup": None,
+        }
+    worktree_root = Path(tempfile.mkdtemp(prefix="rfdetr-video-parity-"))
+    subprocess.run(
+        ["git", "worktree", "add", "--detach", str(worktree_root), ref],
+        cwd=str(SCRIPT_REPO_ROOT),
+        check=True,
+    )
+    return {
+        "label": ref,
+        "repo_root": worktree_root,
+        "cleanup": lambda: _remove_worktree(worktree_root),
+    }
+
+
+def _is_trt_package(package_dir: Path) -> bool:
+    return package_dir.is_dir() and all(
+        (package_dir / filename).exists() for filename in TRT_PACKAGE_REQUIRED_FILES
+    )
+
+
+def _resolve_model_package() -> Optional[Path]:
+    explicit_model_path = os.environ.get("PARITY_MODEL_PATH")
+    if explicit_model_path and _is_trt_package(Path(explicit_model_path)):
+        return Path(explicit_model_path).resolve()
+    for root in (SCRIPT_REPO_ROOT, Path.cwd(), Path(tempfile.gettempdir())):
+        for name in (
+            "rfdetr-seg-nano-orin-trt-package",
+            "rfdetr-seg-nano-trt-package",
+        ):
+            package = root / name
+            if _is_trt_package(package):
+                return package.resolve()
+    return None
+
+
+def _prepare_local_workflow_model_bundle(repo_root: Path) -> str:
+    package = _resolve_model_package()
+    if package is None:
+        return MODEL_ID
+    os.environ.setdefault(
+        "ALLOW_INFERENCE_MODELS_DIRECTLY_ACCESS_LOCAL_PACKAGES", "true"
+    )
+    model_dir = repo_root / LOCAL_WORKFLOW_MODEL_ID
+    model_dir.parent.mkdir(parents=True, exist_ok=True)
+    if not model_dir.exists():
+        model_dir.symlink_to(package, target_is_directory=True)
+
+    model_cache_dir = (
+        Path(os.environ.get("MODEL_CACHE_DIR", "/tmp/cache")) / MODEL_ID / "1"
+    )
+    model_cache_dir.mkdir(parents=True, exist_ok=True)
+    model_type_path = model_cache_dir / "model_type.json"
+    model_metadata = {
+        "project_task_type": "instance-segmentation",
+        "model_type": "rfdetr-seg-nano",
+    }
+    model_type_path.write_text(json.dumps(model_metadata, indent=4))
+    return LOCAL_WORKFLOW_MODEL_ID
+
+
+def _load_local_inference(repo_root: Path):
+    spec = importlib.util.spec_from_file_location(
+        "inference",
+        repo_root / "inference" / "__init__.py",
+        submodule_search_locations=[str(repo_root / "inference")],
+    )
+    if spec is None or spec.loader is None:
+        raise RuntimeError(f"Could not load local inference package from {repo_root}")
+    module = importlib.util.module_from_spec(spec)
+    sys.modules["inference"] = module
+    spec.loader.exec_module(module)
+    return module
+
+
+def _build_workflow(model_id: str, confidence: float) -> dict:
+    return {
+        "version": "1.0",
+        "inputs": [{"type": "WorkflowImage", "name": "image"}],
+        "steps": [
+            {
+                "type": "roboflow_core/roboflow_instance_segmentation_model@v3",
+                "name": "segmentation",
+                "images": "$inputs.image",
+                "model_id": model_id,
+                "confidence_mode": "custom",
+                "custom_confidence": confidence,
+            },
+        ],
+        "outputs": [
+            {
+                "type": "JsonField",
+                "name": "predictions",
+                "selector": "$steps.segmentation.predictions",
+            },
+        ],
+    }
+
+
+def _jsonable(value: Any) -> Any:
+    if isinstance(value, dict):
+        return {
+            str(key): _jsonable(val)
+            for key, val in sorted(value.items())
+            if str(key) not in _VOLATILE_OUTPUT_KEYS
+        }
+    if isinstance(value, (list, tuple)):
+        return [_jsonable(element) for element in value]
+    if all(hasattr(value, attr) for attr in ("xyxy", "confidence", "class_id")):
+        if _SV_DETECTIONS_SERIALIZER is None:
+            raise RuntimeError("sv.Detections serializer was not initialized")
+        return _jsonable(_SV_DETECTIONS_SERIALIZER(value))
+    if isinstance(value, np.ndarray):
+        return _jsonable(value.tolist())
+    if isinstance(value, np.generic):
+        return value.item()
+    if isinstance(value, bytes):
+        return value.decode("ascii")
+    if hasattr(value, "model_dump"):
+        return _jsonable(value.model_dump(by_alias=True, exclude_none=True))
+    if hasattr(value, "dict"):
+        return _jsonable(value.dict())
+    return value
+
+
+def _as_list(value: Any) -> list:
+    return value if isinstance(value, list) else [value]
+
+
+def do_run(
+    out_path: str,
+    repo_root: str,
+    label: str,
+    video_reference: str,
+    backend: str,
+    confidence: float,
+) -> None:
+    global _SV_DETECTIONS_SERIALIZER
+    repo_path = _bootstrap_repo_root(repo_root)
+    os.environ.setdefault(
+        "ONNXRUNTIME_EXECUTION_PROVIDERS",
+        "[TensorrtExecutionProvider,CUDAExecutionProvider,CPUExecutionProvider]",
+    )
+    os.environ["DISABLED_INFERENCE_MODELS_BACKENDS"] = ",".join(
+        sorted(ALL_BACKENDS - {backend})
+    )
+    model_id = _prepare_local_workflow_model_bundle(repo_root=repo_path)
+    inference_module = _load_local_inference(repo_root=repo_path)
+    from inference.core.workflows.core_steps.common.serializers import (
+        serialise_sv_detections,
+    )
+
+    _SV_DETECTIONS_SERIALIZER = serialise_sv_detections
+    inference_pipeline = inference_module.InferencePipeline
+    signature = {
+        "git_head": _safe_git_output(repo_path, "rev-parse", "--short", "HEAD"),
+        "git_describe": _safe_git_output(
+            repo_path, "describe", "--always", "--dirty", "--broken"
+        ),
+    }
+    header = {
+        "_kind": "header",
+        "label": label,
+        "repo_root": str(repo_path),
+        "model_id": model_id,
+        "video_reference": video_reference,
+        "confidence": confidence,
+        "git_head": signature["git_head"],
+        "git_describe": signature["git_describe"],
+        "flags": {
+            key: os.environ.get(key)
+            for key in sorted({*BASE_FLAGS_OFF.keys(), *CANDIDATE_FLAGS_ON.keys()})
+        },
+    }
+    errors = []
+    records = 0
+    with open(out_path, "wb") as f:
+        pickle.dump(header, f)
+
+        def sink(predictions, video_frames) -> None:
+            nonlocal records
+            prediction_list = _as_list(predictions)
+            frame_list = _as_list(video_frames)
+            if len(prediction_list) != len(frame_list):
+                errors.append(
+                    f"sink length mismatch: {len(prediction_list)} predictions "
+                    f"for {len(frame_list)} frames"
+                )
+                return
+            for prediction, video_frame in zip(prediction_list, frame_list):
+                pickle.dump(
+                    {
+                        "_kind": "record",
+                        "frame_id": int(video_frame.frame_id),
+                        "source_id": video_frame.source_id,
+                        "prediction": _jsonable(prediction),
+                    },
+                    f,
+                )
+                records += 1
+
+        print(
+            "[run] "
+            f"label={label} repo_root={repo_path} head={signature['git_head']} "
+            f"model_id={model_id} video={video_reference}",
+            flush=True,
+        )
+        pipeline = inference_pipeline.init_with_workflow(
+            video_reference=video_reference,
+            workflow_specification=_build_workflow(model_id, confidence),
+            on_prediction=sink,
+            serialize_results=False,
+        )
+        pipeline.start()
+        pipeline.join()
+        if errors:
+            raise RuntimeError("; ".join(errors))
+        if records == 0:
+            raise RuntimeError("video workflow emitted no prediction records")
+        pickle.dump(
+            {
+                "_kind": "footer",
+                "label": label,
+                "n_records": records,
+            },
+            f,
+        )
+    print(f"[run] label={label} records={records} saved={out_path}", flush=True)
+
+
+def _iter_pickles(path: str):
+    with open(path, "rb") as f:
+        while True:
+            try:
+                yield pickle.load(f)
+            except EOFError:
+                return
+
+
+def _compare_values(
+    base: Any, candidate: Any, path: str, atol: float, errors: list
+) -> None:
+    if isinstance(base, dict) and isinstance(candidate, dict):
+        if set(base) != set(candidate):
+            errors.append(f"{path}: key mismatch {sorted(base)} != {sorted(candidate)}")
+            return
+        for key in sorted(base):
+            _compare_values(base[key], candidate[key], f"{path}.{key}", atol, errors)
+        return
+    if isinstance(base, list) and isinstance(candidate, list):
+        if len(base) != len(candidate):
+            errors.append(f"{path}: length mismatch {len(base)} != {len(candidate)}")
+            return
+        for index, (base_item, candidate_item) in enumerate(zip(base, candidate)):
+            _compare_values(base_item, candidate_item, f"{path}[{index}]", atol, errors)
+        return
+    if isinstance(base, (int, float)) and isinstance(candidate, (int, float)):
+        if not math.isclose(float(base), float(candidate), abs_tol=atol, rel_tol=0.0):
+            errors.append(f"{path}: numeric mismatch {base} != {candidate}")
+        return
+    if base != candidate:
+        errors.append(f"{path}: value mismatch {base!r} != {candidate!r}")
+
+
+def _extract_detection_list(prediction: Any) -> Optional[list]:
+    if not isinstance(prediction, dict) or "predictions" not in prediction:
+        return None
+    value = prediction["predictions"]
+    if isinstance(value, dict) and isinstance(value.get("predictions"), list):
+        return value["predictions"]
+    if isinstance(value, list):
+        return value
+    return None
+
+
+def _xyxy_from_detection(
+    detection: dict,
+) -> Optional[tuple[float, float, float, float]]:
+    try:
+        width = float(detection["width"])
+        height = float(detection["height"])
+        center_x = float(detection["x"])
+        center_y = float(detection["y"])
+    except (KeyError, TypeError, ValueError):
+        return None
+    half_width = width / 2.0
+    half_height = height / 2.0
+    return (
+        center_x - half_width,
+        center_y - half_height,
+        center_x + half_width,
+        center_y + half_height,
+    )
+
+
+def _box_iou(
+    left: tuple[float, float, float, float], right: tuple[float, float, float, float]
+) -> float:
+    x0 = max(left[0], right[0])
+    y0 = max(left[1], right[1])
+    x1 = min(left[2], right[2])
+    y1 = min(left[3], right[3])
+    iw = max(0.0, x1 - x0)
+    ih = max(0.0, y1 - y0)
+    inter = iw * ih
+    left_area = max(0.0, left[2] - left[0]) * max(0.0, left[3] - left[1])
+    right_area = max(0.0, right[2] - right[0]) * max(0.0, right[3] - right[1])
+    union = left_area + right_area - inter
+    return inter / union if union > 0 else 0.0
+
+
+def _compare_detection_lists(
+    base_detections: list,
+    candidate_detections: list,
+    frame_id: int,
+    min_box_iou: float,
+    max_score_delta: float,
+    stats: dict,
+    errors: list,
+) -> None:
+    stats["base_detections"] += len(base_detections)
+    stats["candidate_detections"] += len(candidate_detections)
+    if len(base_detections) != len(candidate_detections):
+        stats["count_mismatch_frames"] += 1
+        errors.append(
+            f"frame {frame_id}: detection count mismatch "
+            f"{len(base_detections)} != {len(candidate_detections)}"
+        )
+        return
+
+    used_base_indices = set()
+    for candidate_index, candidate_detection in enumerate(candidate_detections):
+        candidate_class = candidate_detection.get("class_id")
+        candidate_box = _xyxy_from_detection(candidate_detection)
+        if candidate_box is None:
+            errors.append(f"frame {frame_id}: candidate detection has no xyxy box")
+            continue
+        best_base_index = -1
+        best_iou = min_box_iou
+        for base_index, base_detection in enumerate(base_detections):
+            if base_index in used_base_indices:
+                continue
+            if base_detection.get("class_id") != candidate_class:
+                continue
+            base_box = _xyxy_from_detection(base_detection)
+            if base_box is None:
+                continue
+            box_iou = _box_iou(base_box, candidate_box)
+            if box_iou > best_iou:
+                best_iou = box_iou
+                best_base_index = base_index
+        if best_base_index < 0:
+            stats["unmatched_candidate_detections"] += 1
+            errors.append(
+                f"frame {frame_id}: candidate detection {candidate_index} "
+                f"class_id={candidate_class!r} has no matching base detection"
+            )
+            continue
+
+        used_base_indices.add(best_base_index)
+        stats["matched_detections"] += 1
+        stats["box_ious"].append(best_iou)
+        base_detection = base_detections[best_base_index]
+        base_score = float(base_detection.get("confidence", 0.0))
+        candidate_score = float(candidate_detection.get("confidence", 0.0))
+        score_delta = abs(base_score - candidate_score)
+        stats["score_deltas"].append(score_delta)
+        if score_delta > max_score_delta:
+            errors.append(
+                f"frame {frame_id}: score delta {score_delta:.6f} exceeds "
+                f"{max_score_delta:.6f}"
+            )
+        base_points = base_detection.get("points") or []
+        candidate_points = candidate_detection.get("points") or []
+        if len(base_points) != len(candidate_points):
+            stats["polygon_point_count_mismatches"] += 1
+
+    unmatched_base = len(base_detections) - len(used_base_indices)
+    stats["unmatched_base_detections"] += unmatched_base
+    if unmatched_base:
+        errors.append(f"frame {frame_id}: {unmatched_base} base detections unmatched")
+
+
+def do_compare(
+    base_path: str,
+    candidate_path: str,
+    atol: float,
+    min_box_iou: float,
+    max_score_delta: float,
+) -> None:
+    base_iter = _iter_pickles(base_path)
+    candidate_iter = _iter_pickles(candidate_path)
+    base_header = next(base_iter)
+    candidate_header = next(candidate_iter)
+    compared = 0
+    errors = []
+    stats = {
+        "base_detections": 0,
+        "candidate_detections": 0,
+        "matched_detections": 0,
+        "count_mismatch_frames": 0,
+        "unmatched_candidate_detections": 0,
+        "unmatched_base_detections": 0,
+        "polygon_point_count_mismatches": 0,
+        "box_ious": [],
+        "score_deltas": [],
+    }
+    base_footer = None
+    candidate_footer = None
+
+    for base_record, candidate_record in zip(base_iter, candidate_iter):
+        if (
+            base_record.get("_kind") == "footer"
+            or candidate_record.get("_kind") == "footer"
+        ):
+            base_footer = base_record
+            candidate_footer = candidate_record
+            break
+        if base_record["frame_id"] != candidate_record["frame_id"]:
+            errors.append(
+                f"frame id mismatch {base_record['frame_id']} != "
+                f"{candidate_record['frame_id']}"
+            )
+            break
+        compared += 1
+        base_detections = _extract_detection_list(base_record["prediction"])
+        candidate_detections = _extract_detection_list(candidate_record["prediction"])
+        if base_detections is not None and candidate_detections is not None:
+            _compare_detection_lists(
+                base_detections=base_detections,
+                candidate_detections=candidate_detections,
+                frame_id=base_record["frame_id"],
+                min_box_iou=min_box_iou,
+                max_score_delta=max_score_delta,
+                stats=stats,
+                errors=errors,
+            )
+        else:
+            _compare_values(
+                base_record["prediction"],
+                candidate_record["prediction"],
+                f"frame[{base_record['frame_id']}]",
+                atol,
+                errors,
+            )
+        if len(errors) >= 20:
+            break
+
+    if base_footer is None:
+        for obj in base_iter:
+            if obj.get("_kind") == "footer":
+                base_footer = obj
+                break
+    if candidate_footer is None:
+        for obj in candidate_iter:
+            if obj.get("_kind") == "footer":
+                candidate_footer = obj
+                break
+
+    print()
+    print(
+        f"==== RF-DETR workflow video parity: {base_header['label']} vs "
+        f"{candidate_header['label']} ===="
+    )
+    print(f"  base repo                  : {base_header['git_describe']}")
+    print(f"  candidate repo             : {candidate_header['git_describe']}")
+    print(
+        f"  frames base / candidate    : "
+        f"{base_footer['n_records']} / {candidate_footer['n_records']}"
+    )
+    print(f"  compared frames            : {compared}")
+    print(
+        f"  detections base / candidate: "
+        f"{stats['base_detections']} / {stats['candidate_detections']}"
+    )
+    print(f"  matched detections         : {stats['matched_detections']}")
+    print(f"  count-mismatch frames      : {stats['count_mismatch_frames']}")
+    print(
+        f"  unmatched base / candidate : "
+        f"{stats['unmatched_base_detections']} / "
+        f"{stats['unmatched_candidate_detections']}"
+    )
+    if stats["box_ious"]:
+        print(
+            f"  mean / min box IoU         : "
+            f"{np.mean(stats['box_ious']):.6f} / {np.min(stats['box_ious']):.6f}"
+        )
+    if stats["score_deltas"]:
+        print(
+            f"  mean / max |delta score|   : "
+            f"{np.mean(stats['score_deltas']):.3e} / "
+            f"{np.max(stats['score_deltas']):.3e}"
+        )
+    print(
+        f"  polygon point-count diffs  : " f"{stats['polygon_point_count_mismatches']}"
+    )
+    if errors:
+        print("  first mismatches:")
+        for error in errors[:20]:
+            print(f"    - {error}")
+        raise AssertionError(f"{len(errors)} parity mismatches found")
+    print("  result                     : all detections matched semantic thresholds")
+
+
+def _run_child(
+    repo_root: Path,
+    label: str,
+    out_path: str,
+    video_reference: str,
+    backend: str,
+    confidence: float,
+    flags: dict,
+) -> None:
+    env = os.environ.copy()
+    env.update(flags)
+    env["MPLCONFIGDIR"] = "/tmp/mpl"
+    model_package = _resolve_model_package()
+    if model_package is not None:
+        env["PARITY_MODEL_PATH"] = str(model_package)
+    env["PYTHONPATH"] = _child_pythonpath(repo_root, env.get("PYTHONPATH"))
+    video_path = Path(video_reference)
+    if not video_path.is_absolute():
+        candidate = SCRIPT_REPO_ROOT / video_path
+        if candidate.exists():
+            video_reference = str(candidate.resolve())
+    args = [
+        PY,
+        str(SELF),
+        "--mode",
+        "run",
+        "--repo-root",
+        str(repo_root),
+        "--label",
+        label,
+        "--out",
+        out_path,
+        "--video_reference",
+        video_reference,
+        "--backend",
+        backend,
+        "--confidence",
+        str(confidence),
+    ]
+    print(
+        "\n---- child ----\n"
+        f"  label={label}\n"
+        f"  repo_root={repo_root}\n"
+        f"  out={out_path}\n"
+        f"  flags={flags}",
+        flush=True,
+    )
+    subprocess.run(args, cwd=str(repo_root), env=env, check=True)
+
+
+def main() -> None:
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--mode", choices=("driver", "run", "compare"), default="driver"
+    )
+    parser.add_argument("--repo-root")
+    parser.add_argument("--label")
+    parser.add_argument("--out")
+    parser.add_argument("--base", default=DEFAULT_BASE_OUT)
+    parser.add_argument("--candidate", default=DEFAULT_CANDIDATE_OUT)
+    parser.add_argument("--base-ref", default="main")
+    parser.add_argument("--candidate-ref", default="working-tree")
+    parser.add_argument("--video_reference", default="vehicles_1080p.mp4")
+    parser.add_argument("--confidence", type=float, default=CONFIDENCE)
+    parser.add_argument("--backend", choices=("trt", "onnx", "torch"), default="trt")
+    parser.add_argument("--float-atol", type=float, default=1e-4)
+    parser.add_argument("--min-box-iou", type=float, default=0.5)
+    parser.add_argument("--max-score-delta", type=float, default=0.25)
+    parser.add_argument("--keep-worktrees", action="store_true")
+    args = parser.parse_args()
+
+    if args.mode == "run":
+        if not args.out:
+            raise ValueError("--out is required in run mode")
+        do_run(
+            out_path=args.out,
+            repo_root=args.repo_root or str(SCRIPT_REPO_ROOT),
+            label=args.label or "run",
+            video_reference=args.video_reference,
+            backend=args.backend,
+            confidence=args.confidence,
+        )
+        return
+    if args.mode == "compare":
+        do_compare(
+            args.base,
+            args.candidate,
+            args.float_atol,
+            args.min_box_iou,
+            args.max_score_delta,
+        )
+        return
+
+    base_target = _materialize_target(args.base_ref)
+    candidate_target = _materialize_target(args.candidate_ref)
+    cleanup_callbacks = [
+        target["cleanup"]
+        for target in (base_target, candidate_target)
+        if callable(target["cleanup"])
+    ]
+    try:
+        _run_child(
+            repo_root=Path(base_target["repo_root"]),
+            label=f"{base_target['label']} flags-off",
+            out_path=args.base,
+            video_reference=args.video_reference,
+            backend=args.backend,
+            confidence=args.confidence,
+            flags=BASE_FLAGS_OFF,
+        )
+        _run_child(
+            repo_root=Path(candidate_target["repo_root"]),
+            label=f"{candidate_target['label']} flags-on",
+            out_path=args.candidate,
+            video_reference=args.video_reference,
+            backend=args.backend,
+            confidence=args.confidence,
+            flags=CANDIDATE_FLAGS_ON,
+        )
+    finally:
+        if not args.keep_worktrees:
+            for cleanup in reversed(cleanup_callbacks):
+                cleanup()
+    do_compare(
+        args.base,
+        args.candidate,
+        args.float_atol,
+        args.min_box_iou,
+        args.max_score_delta,
+    )
+
+
+if __name__ == "__main__":
+    main()
diff --git a/inference/core/models/inference_models_adapters.py b/inference/core/models/inference_models_adapters.py
index acb67b571c..34c529712a 100644
--- a/inference/core/models/inference_models_adapters.py
+++ b/inference/core/models/inference_models_adapters.py
@@ -1,9 +1,10 @@
 import base64
 import io
 import os
-from collections import deque
+from collections import OrderedDict, deque
 from concurrent.futures import Future, ThreadPoolExecutor
 from io import BytesIO
+from threading import local
 from time import perf_counter
 from typing import Any, Deque, List, Optional, Tuple, Union
 
@@ -99,19 +100,32 @@
     "#FF39C9",
 ]
 
-# Pinned host buffers for async DtoH on the full-postproc Triton fast path.
-# Keyed by (name, dtype); reused across frames provided the cached buffer is
-# at least as large as the requested shape in every dimension.
-PINNED_HOST_BUFFERS: dict = {}
+_PINNED_HOST_BUFFER_CACHE_SIZE = 16
+_PINNED_HOST_BUFFER_CONTEXT = local()
 
 
 def get_pinned_buffer(name: str, shape, dtype: torch.dtype) -> torch.Tensor:
+    """Return a thread-local pinned CPU scratch tensor for async DtoH copies.
+
+    Response finalization can run on a worker thread while the inference thread
+    submits later GPU work. Keeping this cache thread-local avoids two workers
+    writing into the same scratch tensor. The small LRU cap prevents retaining a
+    new pinned allocation for every transient shape.
+    """
+    cache = getattr(_PINNED_HOST_BUFFER_CONTEXT, "cache", None)
+    if cache is None:
+        cache = OrderedDict()
+        _PINNED_HOST_BUFFER_CONTEXT.cache = cache
     key = (name, dtype)
-    buf = PINNED_HOST_BUFFERS.get(key)
+    buf = cache.get(key)
     if buf is not None and all(buf.shape[i] >= shape[i] for i in range(len(shape))):
+        cache.move_to_end(key)
         return buf[tuple(slice(0, s) for s in shape)]
     buf = torch.empty(shape, dtype=dtype, pin_memory=True)
-    PINNED_HOST_BUFFERS[key] = buf
+    cache[key] = buf
+    cache.move_to_end(key)
+    while len(cache) > _PINNED_HOST_BUFFER_CACHE_SIZE:
+        cache.popitem(last=False)
     return buf
 
 
diff --git a/inference_models/inference_models/models/rfdetr/triton_postprocess.py b/inference_models/inference_models/models/rfdetr/triton_postprocess.py
index d52cc7a79a..4479f7a482 100644
--- a/inference_models/inference_models/models/rfdetr/triton_postprocess.py
+++ b/inference_models/inference_models/models/rfdetr/triton_postprocess.py
@@ -21,7 +21,7 @@
 """
 
 import warnings
-from collections import OrderedDict, defaultdict
+from collections import OrderedDict
 from threading import Lock
 from typing import List, Optional, Tuple, Union
 
@@ -61,8 +61,10 @@
 _MAX_INTERPOLATION_WEIGHT_CACHE_ENTRIES = 16
 _INTERPOLATION_WEIGHT_CACHE = OrderedDict()
 _INTERPOLATION_WEIGHT_CACHE_LOCK = Lock()
-_PINNED_HOST_POOL = defaultdict(list)
+_MAX_PINNED_HOST_POOL_BUFFERS = 8
+_PINNED_HOST_POOL = OrderedDict()
 _PINNED_HOST_POOL_LOCK = Lock()
+_PINNED_HOST_POOL_SIZE = 0
 
 
 def _get_interpolation_weights(
@@ -160,10 +162,13 @@ def _interpolation_cache_key(
 
 def _acquire_pinned_host_buffer(source: torch.Tensor) -> torch.Tensor:
     """Return a pinned CPU tensor matching ``source`` for async DtoH copies."""
+    global _PINNED_HOST_POOL_SIZE
     key = (tuple(source.shape), source.dtype)
     with _PINNED_HOST_POOL_LOCK:
-        buffers = _PINNED_HOST_POOL[key]
+        buffers = _PINNED_HOST_POOL.get(key)
         if buffers:
+            _PINNED_HOST_POOL.move_to_end(key)
+            _PINNED_HOST_POOL_SIZE -= 1
             return buffers.pop()
     # Pinned memory is required for ``non_blocking=True`` GPU-to-CPU copies to
     # overlap with later GPU work; allocating it per frame is expensive.
@@ -171,10 +176,16 @@ def _acquire_pinned_host_buffer(source: torch.Tensor) -> torch.Tensor:
 
 
 def _release_pinned_host_buffer(buffer: torch.Tensor) -> None:
-    """Return a pinned host buffer to the small shape/dtype reuse pool."""
+    """Return a pinned host buffer to the bounded shape/dtype reuse pool."""
+    global _PINNED_HOST_POOL_SIZE
     key = (tuple(buffer.shape), buffer.dtype)
     with _PINNED_HOST_POOL_LOCK:
-        _PINNED_HOST_POOL[key].append(buffer)
+        if _PINNED_HOST_POOL_SIZE >= _MAX_PINNED_HOST_POOL_BUFFERS:
+            return
+        buffers = _PINNED_HOST_POOL.setdefault(key, [])
+        buffers.append(buffer)
+        _PINNED_HOST_POOL.move_to_end(key)
+        _PINNED_HOST_POOL_SIZE += 1
 
 
 def post_process_single_instance_segmentation_result_to_rle_masks_triton(
diff --git a/tests/inference/unit_tests/core/interfaces/stream/test_workflows.py b/tests/inference/unit_tests/core/interfaces/stream/test_workflows.py
new file mode 100644
index 0000000000..0ff4936eeb
--- /dev/null
+++ b/tests/inference/unit_tests/core/interfaces/stream/test_workflows.py
@@ -0,0 +1,137 @@
+from datetime import datetime
+from types import SimpleNamespace
+
+import numpy as np
+
+from inference.core.interfaces.camera.entities import VideoFrame
+from inference.core.interfaces.stream.model_handlers.workflows import WorkflowRunner
+from inference.core.interfaces.stream.model_handlers.workflows_context import (
+    is_workflow_stream_flush_active,
+)
+
+
+class _FakeExecutionEngine:
+    def __init__(self, stream_buffer_depth: int) -> None:
+        self._stream_buffer_depth = stream_buffer_depth
+        step = SimpleNamespace(
+            is_stream_pipelined=lambda: stream_buffer_depth > 0,
+            stream_pipeline_depth=lambda: stream_buffer_depth,
+        )
+        self._engine = SimpleNamespace(
+            _compiled_workflow=SimpleNamespace(
+                steps={"segmentation": SimpleNamespace(step=step)}
+            )
+        )
+        self.calls = []
+
+    def run(
+        self,
+        runtime_parameters,
+        fps,
+        serialize_results,
+        _is_preview,
+    ):
+        frame_number = runtime_parameters["image"][0]["video_metadata"].frame_number
+        flush_active = is_workflow_stream_flush_active()
+        self.calls.append(
+            {
+                "frame_number": frame_number,
+                "flush_active": flush_active,
+                "fps": fps,
+                "serialize_results": serialize_results,
+                "is_preview": _is_preview,
+            }
+        )
+        if flush_active:
+            prediction_frame = frame_number
+        else:
+            prediction_frame = frame_number - self._stream_buffer_depth
+        return [{"predictions": f"frame-{prediction_frame}"}]
+
+
+def _make_frame(frame_id: int) -> VideoFrame:
+    return VideoFrame(
+        image=np.zeros((8, 8, 3), dtype=np.uint8),
+        frame_id=frame_id,
+        frame_timestamp=datetime.fromtimestamp(frame_id),
+        fps=30.0,
+        measured_fps=None,
+        source_id=0,
+        comes_from_video_file=True,
+    )
+
+
+def test_workflow_runner_without_stream_buffering_returns_current_frame() -> None:
+    engine = _FakeExecutionEngine(stream_buffer_depth=0)
+    runner = WorkflowRunner(
+        workflows_parameters=None,
+        execution_engine=engine,
+        image_input_name="image",
+        video_metadata_input_name="video_metadata",
+        serialize_results=True,
+        _is_preview=True,
+    )
+    frame = _make_frame(1)
+
+    result = runner([frame])
+
+    assert result is not None
+    assert result.predictions == [{"predictions": "frame-1"}]
+    assert result.video_frames == [frame]
+    assert engine.calls == [
+        {
+            "frame_number": 1,
+            "flush_active": False,
+            "fps": 30.0,
+            "serialize_results": True,
+            "is_preview": True,
+        }
+    ]
+
+
+def test_workflow_runner_buffers_frames_until_delayed_prediction_arrives() -> None:
+    engine = _FakeExecutionEngine(stream_buffer_depth=1)
+    runner = WorkflowRunner(
+        workflows_parameters=None,
+        execution_engine=engine,
+        image_input_name="image",
+        video_metadata_input_name="video_metadata",
+    )
+    frame_1 = _make_frame(1)
+    frame_2 = _make_frame(2)
+
+    first_result = runner([frame_1])
+    second_result = runner([frame_2])
+    flushed_results = runner.flush()
+
+    assert first_result is None
+    assert second_result is not None
+    assert second_result.predictions == [{"predictions": "frame-1"}]
+    assert second_result.video_frames == [frame_1]
+    assert flushed_results is not None
+    assert len(flushed_results) == 1
+    assert flushed_results[0].predictions == [{"predictions": "frame-2"}]
+    assert flushed_results[0].video_frames == [frame_2]
+    assert engine.calls == [
+        {
+            "frame_number": 1,
+            "flush_active": False,
+            "fps": 30.0,
+            "serialize_results": False,
+            "is_preview": False,
+        },
+        {
+            "frame_number": 2,
+            "flush_active": False,
+            "fps": 30.0,
+            "serialize_results": False,
+            "is_preview": False,
+        },
+        {
+            "frame_number": 2,
+            "flush_active": True,
+            "fps": 30.0,
+            "serialize_results": False,
+            "is_preview": False,
+        },
+    ]

From 34b602255513c735979f9721d687e5a84c8c4718 Mon Sep 17 00:00:00 2001
From: Aseem Saxena <aseem.bits@gmail.com>
Date: Wed, 3 Jun 2026 02:06:40 +0000
Subject: [PATCH 48/76] Clarify RF-DETR pipeline async handoff

---
 .../core/models/inference_models_adapters.py  |  39 ++---
 .../roboflow/instance_segmentation/v3.py      |   3 +-
 .../inference_models/configuration.py         |  44 ++++++
 .../models/base/async_handoff.py              | 142 ++++++++++++++++++
 .../models/base/instance_segmentation.py      |   9 +-
 .../rfdetr_instance_segmentation_trt.py       |  19 ++-
 .../models/rfdetr/triton_postprocess.py       |  14 +-
 .../test_instance_segmentation_future.py      |  83 ++++++++++
 .../tests/unit_tests/test_configuration.py    |  48 ++++++
 9 files changed, 365 insertions(+), 36 deletions(-)
 create mode 100644 inference_models/inference_models/models/base/async_handoff.py
 create mode 100644 inference_models/tests/unit_tests/models/test_instance_segmentation_future.py
 create mode 100644 inference_models/tests/unit_tests/test_configuration.py

diff --git a/inference/core/models/inference_models_adapters.py b/inference/core/models/inference_models_adapters.py
index 34c529712a..d5286857e3 100644
--- a/inference/core/models/inference_models_adapters.py
+++ b/inference/core/models/inference_models_adapters.py
@@ -1,6 +1,5 @@
 import base64
 import io
-import os
 from collections import OrderedDict, deque
 from concurrent.futures import Future, ThreadPoolExecutor
 from io import BytesIO
@@ -24,8 +23,8 @@
     InstanceSegmentationInferenceResponse,
     InstanceSegmentationInferenceResponseDC,
     InstanceSegmentationPrediction,
-    InstanceSegmentationRLEPrediction,
     InstanceSegmentationPredictionDC,
+    InstanceSegmentationRLEPrediction,
     Keypoint,
     KeypointsDetectionInferenceResponse,
     KeypointsPrediction,
@@ -69,6 +68,17 @@
     PreProcessingOverrides,
     SemanticSegmentationModel,
 )
+from inference_models.configuration import get_rfdetr_pipeline_depth
+from inference_models.models.base.async_handoff import (
+    adapter_gpu_work_submitted,
+    attach_adapter_mapped_kwargs,
+    attach_async_response_future,
+    get_adapter_gpu_submit_generation,
+    get_adapter_mapped_kwargs,
+    get_deferred_postprocess_done_event,
+    get_deferred_postprocess_finalizer,
+    mark_adapter_gpu_work_submitted,
+)
 from inference_models.models.base.instance_segmentation import InferenceFuture
 from inference_models.models.base.semantic_segmentation import (
     SemanticSegmentationResult,
@@ -327,7 +337,7 @@ def __init__(self, model_id: str, api_key: str = None, **kwargs):
         # means two stages in parallel: while the GPU works on the current
         # frame, the CPU prepares/submits the next frame, then harvests the
         # previous response. The response delay is therefore depth - 1 frames.
-        self._pipeline_depth = max(1, int(os.getenv("RFDETR_PIPELINE_DEPTH", "1")))
+        self._pipeline_depth = get_rfdetr_pipeline_depth()
         self._response_delay = max(1, self._pipeline_depth - 1)
         # Per-adapter in-flight futures + metadata. Not thread-safe; the
         # InferencePipeline is single-producer and the adapter is owned by a
@@ -400,9 +410,7 @@ def predict(self, img_in, **kwargs):
         self._submit_next_pending_gpu_work()
         pre_processing_meta = getattr(img_in, "_pre_processing_meta", None)
         fut = self._model.forward_async(img_in, pre_processing_meta, **mapped_kwargs)
-        fut._adapter_kwargs = {  # type: ignore[attr-defined]
-            "mapped_kwargs": mapped_kwargs
-        }
+        attach_adapter_mapped_kwargs(fut, mapped_kwargs)
         if pre_processing_meta is not None:
             self._submit_future_gpu_work(fut, pre_processing_meta, mapped_kwargs)
         self._submit_ready_responses()
@@ -436,7 +444,7 @@ def _submit_future_gpu_work(
         meta: PreprocessingMetadata,
         mapped_kwargs: dict,
     ) -> None:
-        if getattr(fut, "_adapter_gpu_work_submitted", False):
+        if adapter_gpu_work_submitted(fut):
             return None
         fut._meta = meta  # type: ignore[attr-defined]
         fut._kwargs = mapped_kwargs  # type: ignore[attr-defined]
@@ -444,10 +452,7 @@ def _submit_future_gpu_work(
         if callable(submit_gpu_work):
             submit_gpu_work(meta)
             self._gpu_submit_generation = getattr(self, "_gpu_submit_generation", 0) + 1
-            fut._adapter_gpu_submit_generation = (  # type: ignore[attr-defined]
-                self._gpu_submit_generation
-            )
-            fut._adapter_gpu_work_submitted = True  # type: ignore[attr-defined]
+            mark_adapter_gpu_work_submitted(fut, self._gpu_submit_generation)
 
     def _submit_next_pending_gpu_work(self) -> None:
         if not self._pending_gpu_submissions:
@@ -477,10 +482,10 @@ def _submit_response_build(
     def _submit_ready_responses(self) -> None:
         while self._pending_futures:
             fut, meta, mapped_kwargs = self._pending_futures[0]
-            submit_generation = getattr(fut, "_adapter_gpu_submit_generation", None)
+            submit_generation = get_adapter_gpu_submit_generation(fut)
             if submit_generation is None:
                 self._submit_future_gpu_work(fut, meta, mapped_kwargs)
-                submit_generation = getattr(fut, "_adapter_gpu_submit_generation", None)
+                submit_generation = get_adapter_gpu_submit_generation(fut)
             if submit_generation is None:
                 break
             gpu_submit_generation = getattr(self, "_gpu_submit_generation", 0)
@@ -503,7 +508,7 @@ def postprocess(
                 predictions, preprocess_return_metadata, **kwargs
             )
         fut: InferenceFuture = predictions
-        mapped_kwargs = getattr(fut, "_adapter_kwargs", {}).get("mapped_kwargs", {})
+        mapped_kwargs = get_adapter_mapped_kwargs(fut)
         self._pending_gpu_submissions.append(
             (
                 fut,
@@ -529,7 +534,7 @@ def postprocess(
                 workflow_execution=True,
             )
             if responses:
-                responses[0]._async_response_future = response_future
+                attach_async_response_future(responses[0], response_future)
             return responses
         return response_future.result()
 
@@ -608,7 +613,7 @@ def _build_responses_from_detections(
 
         responses: List[InstanceSegmentationInferenceResponse] = []
         for preproc_metadata, det in zip(preprocess_return_metadata, detections_list):
-            finalize_pending = getattr(det, "_finalize_pending_postproc", None)
+            finalize_pending = get_deferred_postprocess_finalizer(det)
             if callable(finalize_pending):
                 det = finalize_pending()
             H = preproc_metadata.original_size.height
@@ -619,7 +624,7 @@ def _build_responses_from_detections(
             mask_packed_gpu = getattr(det, "_mask_packed_gpu", None)
             mask_cpu = getattr(det, "_mask_cpu", None)
             defer_count_to_adapter = getattr(det, "_defer_count_to_adapter", False)
-            done_event = getattr(det, "_postproc_done_event", None)
+            done_event = get_deferred_postprocess_done_event(det)
             dense_mask_cuda = isinstance(mask_gpu, torch.Tensor) and mask_gpu.is_cuda
             packed_mask_cuda = (
                 isinstance(mask_packed_gpu, torch.Tensor) and mask_packed_gpu.is_cuda
diff --git a/inference/core/workflows/core_steps/models/roboflow/instance_segmentation/v3.py b/inference/core/workflows/core_steps/models/roboflow/instance_segmentation/v3.py
index d2470e0ff5..8ebb6a7968 100644
--- a/inference/core/workflows/core_steps/models/roboflow/instance_segmentation/v3.py
+++ b/inference/core/workflows/core_steps/models/roboflow/instance_segmentation/v3.py
@@ -55,6 +55,7 @@
     WorkflowBlock,
     WorkflowBlockManifest,
 )
+from inference_models.models.base.async_handoff import get_async_response_future
 from inference_sdk import InferenceConfiguration, InferenceHTTPClient
 
 LONG_DESCRIPTION = """
@@ -388,7 +389,7 @@ def _extract_async_response_future(
         predictions: List[object],
     ) -> Optional[Future]:
         for prediction in predictions:
-            async_response_future = getattr(prediction, "_async_response_future", None)
+            async_response_future = get_async_response_future(prediction)
             if isinstance(async_response_future, Future):
                 return async_response_future
         return None
diff --git a/inference_models/inference_models/configuration.py b/inference_models/inference_models/configuration.py
index 5b10d0daf3..3289706b08 100644
--- a/inference_models/inference_models/configuration.py
+++ b/inference_models/inference_models/configuration.py
@@ -1,8 +1,10 @@
 import os
 import warnings
+from typing import Optional
 
 import torch
 
+from inference_models.errors import InvalidEnvVariable
 from inference_models.utils.environment import (
     get_boolean_from_env,
     get_comma_separated_list_of_integers_from_env,
@@ -299,6 +301,48 @@
     variable_name="INFERENCE_MODELS_RFDETR_TRITON_PREPROC_ENABLED",
     default=DEFAULT_INFERENCE_MODELS_RFDETR_TRITON_PREPROC_ENABLED,
 )
+RFDETR_PIPELINE_DEPTH_ENV_NAME = "RFDETR_PIPELINE_DEPTH"
+DEFAULT_RFDETR_PIPELINE_DEPTH = 1
+MIN_RFDETR_PIPELINE_DEPTH = 1
+
+
+def parse_rfdetr_pipeline_depth(value: Optional[str]) -> int:
+    """Parse and validate the RF-DETR streaming pipeline depth.
+
+    Depth is the number of in-flight CPU/GPU stages the stream adapter may keep
+    alive. ``1`` preserves the original synchronous behavior; values greater
+    than one enable delayed response finalization. Zero, negative, and
+    non-integer values are rejected instead of being silently clamped.
+    """
+    if value is None:
+        return DEFAULT_RFDETR_PIPELINE_DEPTH
+    try:
+        parsed = int(value)
+    except (TypeError, ValueError):
+        raise InvalidEnvVariable(
+            message=(
+                f"Expected environment variable `{RFDETR_PIPELINE_DEPTH_ENV_NAME}` "
+                f"to be an integer but got '{value}'"
+            ),
+            help_url="https://inference-models.roboflow.com/errors/runtime-environment/#invalidenvvariable",
+        )
+    if parsed < MIN_RFDETR_PIPELINE_DEPTH:
+        raise InvalidEnvVariable(
+            message=(
+                f"Expected environment variable `{RFDETR_PIPELINE_DEPTH_ENV_NAME}` "
+                f"to be >= {MIN_RFDETR_PIPELINE_DEPTH} but got '{value}'"
+            ),
+            help_url="https://inference-models.roboflow.com/errors/runtime-environment/#invalidenvvariable",
+        )
+    return parsed
+
+
+def get_rfdetr_pipeline_depth() -> int:
+    """Read and validate ``RFDETR_PIPELINE_DEPTH`` from the environment."""
+    return parse_rfdetr_pipeline_depth(os.getenv(RFDETR_PIPELINE_DEPTH_ENV_NAME))
+
+
+RFDETR_PIPELINE_DEPTH = get_rfdetr_pipeline_depth()
 INFERENCE_MODELS_ROBOFLOW_INSTANT_DEFAULT_CONFIDENCE = get_float_from_env(
     variable_name="INFERENCE_MODELS_ROBOFLOW_INSTANT_DEFAULT_CONFIDENCE",
     default=0.99,
diff --git a/inference_models/inference_models/models/base/async_handoff.py b/inference_models/inference_models/models/base/async_handoff.py
new file mode 100644
index 0000000000..377a54eb9c
--- /dev/null
+++ b/inference_models/inference_models/models/base/async_handoff.py
@@ -0,0 +1,142 @@
+"""Explicit handoff state shared by async inference pipeline components.
+
+The RF-DETR stream pipeline crosses package boundaries: inference-models owns
+CUDA execution and sparse postprocess, while inference owns workflow response
+assembly. This module centralizes the small amount of per-request state passed
+between those layers so call sites do not depend on scattered private attribute
+names.
+"""
+
+from dataclasses import dataclass, replace
+from typing import Any, Callable, Optional
+
+_ADAPTER_CONTEXT_ATTR = "_inference_adapter_future_context"
+_ASYNC_RESPONSE_FUTURE_ATTR = "_async_response_future"
+_DEFERRED_POSTPROCESS_ATTR = "_inference_deferred_postprocess_handoff"
+
+
+@dataclass(frozen=True)
+class AdapterFutureContext:
+    """State the inference adapter keeps on an in-flight model future."""
+
+    mapped_kwargs: dict
+    gpu_work_submitted: bool = False
+    gpu_submit_generation: Optional[int] = None
+
+
+@dataclass(frozen=True)
+class DeferredPostprocessHandoff:
+    """CUDA/postprocess state consumed later by response finalization."""
+
+    done_event: Any
+    trt_outputs_consumed_event: Any
+    finalize: Callable[[], Any]
+
+
+def attach_adapter_mapped_kwargs(future: Any, mapped_kwargs: dict) -> None:
+    """Store mapped inference kwargs on a model future."""
+    setattr(
+        future,
+        _ADAPTER_CONTEXT_ATTR,
+        AdapterFutureContext(mapped_kwargs=dict(mapped_kwargs)),
+    )
+
+
+def get_adapter_mapped_kwargs(future: Any) -> dict:
+    """Return mapped inference kwargs stored on a model future."""
+    context = getattr(future, _ADAPTER_CONTEXT_ATTR, None)
+    if not isinstance(context, AdapterFutureContext):
+        return {}
+    return context.mapped_kwargs
+
+
+def adapter_gpu_work_submitted(future: Any) -> bool:
+    """Return whether eager GPU postprocess was submitted for a future."""
+    context = getattr(future, _ADAPTER_CONTEXT_ATTR, None)
+    return isinstance(context, AdapterFutureContext) and context.gpu_work_submitted
+
+
+def mark_adapter_gpu_work_submitted(future: Any, generation: int) -> None:
+    """Mark a future's eager GPU postprocess submission generation."""
+    context = getattr(future, _ADAPTER_CONTEXT_ATTR, None)
+    if not isinstance(context, AdapterFutureContext):
+        context = AdapterFutureContext(mapped_kwargs={})
+    setattr(
+        future,
+        _ADAPTER_CONTEXT_ATTR,
+        replace(
+            context,
+            gpu_work_submitted=True,
+            gpu_submit_generation=generation,
+        ),
+    )
+
+
+def get_adapter_gpu_submit_generation(future: Any) -> Optional[int]:
+    """Return the generation when eager GPU postprocess was submitted."""
+    context = getattr(future, _ADAPTER_CONTEXT_ATTR, None)
+    if not isinstance(context, AdapterFutureContext):
+        return None
+    return context.gpu_submit_generation
+
+
+def attach_async_response_future(response: Any, response_future: Any) -> None:
+    """Attach a CPU response future to a placeholder workflow response."""
+    setattr(response, _ASYNC_RESPONSE_FUTURE_ATTR, response_future)
+
+
+def get_async_response_future(response: Any) -> Any:
+    """Return the CPU response future attached to a workflow response."""
+    return getattr(response, _ASYNC_RESPONSE_FUTURE_ATTR, None)
+
+
+def attach_deferred_postprocess_handoff(
+    detections: Any,
+    done_event: Any,
+    trt_outputs_consumed_event: Any,
+    finalize: Callable[[], Any],
+) -> None:
+    """Attach deferred sparse postprocess completion state to detections."""
+    setattr(
+        detections,
+        _DEFERRED_POSTPROCESS_ATTR,
+        DeferredPostprocessHandoff(
+            done_event=done_event,
+            trt_outputs_consumed_event=trt_outputs_consumed_event,
+            finalize=finalize,
+        ),
+    )
+
+
+def get_deferred_postprocess_handoff(
+    detections: Any,
+) -> Optional[DeferredPostprocessHandoff]:
+    """Return deferred postprocess state attached to detections, if present."""
+    handoff = getattr(detections, _DEFERRED_POSTPROCESS_ATTR, None)
+    if not isinstance(handoff, DeferredPostprocessHandoff):
+        return None
+    return handoff
+
+
+def get_deferred_postprocess_finalizer(detections: Any) -> Optional[Callable[[], Any]]:
+    """Return a callable that materializes deferred detections, if present."""
+    handoff = get_deferred_postprocess_handoff(detections)
+    if handoff is None:
+        return None
+    return handoff.finalize
+
+
+def get_deferred_postprocess_done_event(detections: Any) -> Any:
+    """Return the CUDA event recorded after deferred postprocess DtoH copies."""
+    handoff = get_deferred_postprocess_handoff(detections)
+    if handoff is None:
+        return None
+    return handoff.done_event
+
+
+def get_trt_outputs_consumed_event(detections: Any) -> Any:
+    """Return the event proving TensorRT graph outputs are safe to reuse."""
+    handoff = get_deferred_postprocess_handoff(detections)
+    if handoff is None:
+        return None
+    return handoff.trt_outputs_consumed_event
diff --git a/inference_models/inference_models/models/base/instance_segmentation.py b/inference_models/inference_models/models/base/instance_segmentation.py
index c5e46b19e0..2ee19d23b1 100644
--- a/inference_models/inference_models/models/base/instance_segmentation.py
+++ b/inference_models/inference_models/models/base/instance_segmentation.py
@@ -55,11 +55,10 @@ class _DirectInferenceFuture:
     Post-process output is memoised so ``result()`` may be called repeatedly.
     """
 
-    # No __slots__: adapters stash per-request context on the future
-    # (e.g. pipeline-depth-2 stashes `_adapter_kwargs` so `postprocess`
-    # can rebuild the decode call for the PREVIOUS frame even when the
-    # submit site passed `meta=None`). The Future is short-lived so the
-    # per-instance dict overhead is negligible.
+    # No __slots__: adapters attach per-request context through
+    # `models.base.async_handoff` so postprocess can rebuild the decode call
+    # for an older frame even when the original submit site had no metadata.
+    # The Future is short-lived, so the per-instance dict overhead is negligible.
 
     def __init__(
         self,
diff --git a/inference_models/inference_models/models/rfdetr/rfdetr_instance_segmentation_trt.py b/inference_models/inference_models/models/rfdetr/rfdetr_instance_segmentation_trt.py
index 5641d73e59..e234e050d3 100644
--- a/inference_models/inference_models/models/rfdetr/rfdetr_instance_segmentation_trt.py
+++ b/inference_models/inference_models/models/rfdetr/rfdetr_instance_segmentation_trt.py
@@ -10,11 +10,6 @@
     InstanceSegmentationModel,
     PreProcessingOverrides,
 )
-
-# Hoisted to module scope to avoid per-call `from ... import` inside the hot
-# forward_async path. Re-import inside the function added ~13µs/frame in the
-# instrumented run on Jetson Orin. Import here is a no-op on every call.
-from inference_models.models.base.instance_segmentation import _DirectInferenceFuture
 from inference_models.configuration import (
     DEFAULT_DEVICE,
     INFERENCE_MODELS_RFDETR_DEFAULT_CONFIDENCE,
@@ -27,6 +22,15 @@
     ModelInputError,
     ModelRuntimeError,
 )
+from inference_models.models.base.async_handoff import (
+    get_deferred_postprocess_done_event,
+    get_trt_outputs_consumed_event,
+)
+
+# Hoisted to module scope to avoid per-call `from ... import` inside the hot
+# forward_async path. Re-import inside the function added ~13µs/frame in the
+# instrumented run on Jetson Orin. Import here is a no-op on every call.
+from inference_models.models.base.instance_segmentation import _DirectInferenceFuture
 from inference_models.models.common.cuda import (
     use_cuda_context,
     use_primary_cuda_context,
@@ -449,8 +453,7 @@ def post_process(
                 )
             if graph_state is not None:
                 output_consumed_events = [
-                    getattr(result, "_trt_outputs_consumed_event", None)
-                    for result in results
+                    get_trt_outputs_consumed_event(result) for result in results
                 ]
                 if output_consumed_events and all(
                     event is not None for event in output_consumed_events
@@ -465,7 +468,7 @@ def post_process(
         should_sync = True
         if kwargs.get("defer_postprocess_sync", False):
             should_sync = not all(
-                getattr(result, "_postproc_done_event", None) is not None
+                get_deferred_postprocess_done_event(result) is not None
                 for result in results
             )
         if should_sync:
diff --git a/inference_models/inference_models/models/rfdetr/triton_postprocess.py b/inference_models/inference_models/models/rfdetr/triton_postprocess.py
index 4479f7a482..f229ccdd0b 100644
--- a/inference_models/inference_models/models/rfdetr/triton_postprocess.py
+++ b/inference_models/inference_models/models/rfdetr/triton_postprocess.py
@@ -30,6 +30,9 @@
 import torch.nn.functional as F
 from pycocotools import mask as mask_utils
 
+from inference_models.models.base.async_handoff import (
+    attach_deferred_postprocess_handoff,
+)
 from inference_models.models.base.instance_segmentation import InstanceDetections
 from inference_models.models.base.types import InstancesRLEMasks
 from inference_models.models.common.roboflow.model_packages import PreProcessingMetadata
@@ -738,11 +741,12 @@ def finalize() -> InstanceDetections:
             masks=[],
         ),
     )
-    # The stream adapter checks these private attributes to order reuse/finalize
-    # operations without forcing an immediate CUDA sync at this call site.
-    detections._postproc_done_event = done_event  # type: ignore[attr-defined]
-    detections._trt_outputs_consumed_event = outputs_consumed_event  # type: ignore[attr-defined]
-    detections._finalize_pending_postproc = finalize  # type: ignore[attr-defined]
+    attach_deferred_postprocess_handoff(
+        detections=detections,
+        done_event=done_event,
+        trt_outputs_consumed_event=outputs_consumed_event,
+        finalize=finalize,
+    )
     return detections
 
 
diff --git a/inference_models/tests/unit_tests/models/test_instance_segmentation_future.py b/inference_models/tests/unit_tests/models/test_instance_segmentation_future.py
new file mode 100644
index 0000000000..d27aff6608
--- /dev/null
+++ b/inference_models/tests/unit_tests/models/test_instance_segmentation_future.py
@@ -0,0 +1,83 @@
+from inference_models.models.base.instance_segmentation import _DirectInferenceFuture
+
+
+class _FakeEvent:
+    def __init__(self, query_result: bool) -> None:
+        self.query_result = query_result
+        self.query_calls = 0
+
+    def query(self) -> bool:
+        self.query_calls += 1
+        return self.query_result
+
+
+class _FakeInstanceSegmentationModel:
+    def __init__(self) -> None:
+        self.calls = []
+        self.result = ["detections"]
+
+    def post_process(self, raw, meta, **kwargs):
+        self.calls.append((raw, meta, dict(kwargs)))
+        return self.result
+
+
+def test_direct_inference_future_result_runs_postprocess_once() -> None:
+    model = _FakeInstanceSegmentationModel()
+    future = _DirectInferenceFuture(
+        model=model,
+        raw="raw-output",
+        meta="metadata",
+        evt=None,
+        kwargs={"confidence": 0.4},
+    )
+
+    first_result = future.result()
+    second_result = future.result()
+
+    assert first_result is model.result
+    assert second_result is model.result
+    assert model.calls == [
+        ("raw-output", "metadata", {"confidence": 0.4}),
+    ]
+    assert future.done() is True
+
+
+def test_direct_inference_future_submit_gpu_work_is_idempotent() -> None:
+    model = _FakeInstanceSegmentationModel()
+    future = _DirectInferenceFuture(
+        model=model,
+        raw="raw-output",
+        meta="initial-metadata",
+        evt=None,
+        kwargs={"mask_format": "rle"},
+    )
+
+    future.submit_gpu_work(meta="submitted-metadata")
+    future.submit_gpu_work(meta="ignored-metadata")
+    result = future.result()
+
+    assert result is model.result
+    assert future.preprocess_metadata == "submitted-metadata"
+    assert model.calls == [
+        ("raw-output", "submitted-metadata", {"mask_format": "rle"}),
+    ]
+
+
+def test_direct_inference_future_done_queries_event_until_cached() -> None:
+    model = _FakeInstanceSegmentationModel()
+    event = _FakeEvent(query_result=False)
+    future = _DirectInferenceFuture(
+        model=model,
+        raw="raw-output",
+        meta="metadata",
+        evt=event,
+        kwargs={},
+    )
+
+    assert future.done() is False
+    assert event.query_calls == 1
+
+    future.result()
+
+    assert future.done() is True
+    assert event.query_calls == 1
diff --git a/inference_models/tests/unit_tests/test_configuration.py b/inference_models/tests/unit_tests/test_configuration.py
new file mode 100644
index 0000000000..e1c5f55355
--- /dev/null
+++ b/inference_models/tests/unit_tests/test_configuration.py
@@ -0,0 +1,48 @@
+import pytest
+
+from inference_models.configuration import (
+    DEFAULT_RFDETR_PIPELINE_DEPTH,
+    get_rfdetr_pipeline_depth,
+    parse_rfdetr_pipeline_depth,
+)
+from inference_models.errors import InvalidEnvVariable
+
+
+def test_parse_rfdetr_pipeline_depth_uses_default_when_env_missing() -> None:
+    assert parse_rfdetr_pipeline_depth(None) == DEFAULT_RFDETR_PIPELINE_DEPTH
+
+
+@pytest.mark.parametrize(
+    "value, expected",
+    [
+        ("1", 1),
+        ("2", 2),
+        (" 3 ", 3),
+    ],
+)
+def test_parse_rfdetr_pipeline_depth_accepts_positive_integers(
+    value: str,
+    expected: int,
+) -> None:
+    assert parse_rfdetr_pipeline_depth(value) == expected
+
+
+@pytest.mark.parametrize("value", ["invalid", "1.5", "", "0", "-1"])
+def test_parse_rfdetr_pipeline_depth_rejects_invalid_values(value: str) -> None:
+    with pytest.raises(InvalidEnvVariable):
+        parse_rfdetr_pipeline_depth(value)
+
+
+def test_get_rfdetr_pipeline_depth_reads_environment(monkeypatch) -> None:
+    monkeypatch.setenv("RFDETR_PIPELINE_DEPTH", "2")
+    assert get_rfdetr_pipeline_depth() == 2
+
+
+@pytest.mark.parametrize("value", ["0", "-4", "invalid"])
+def test_get_rfdetr_pipeline_depth_rejects_invalid_environment(
+    monkeypatch,
+    value: str,
+) -> None:
+    monkeypatch.setenv("RFDETR_PIPELINE_DEPTH", value)
+    with pytest.raises(InvalidEnvVariable):
+        get_rfdetr_pipeline_depth()

From dd8282c6c1e45e0fd9a5fd4a000a458d3de22cd2 Mon Sep 17 00:00:00 2001
From: Aseem Saxena <aseem.bits@gmail.com>
Date: Wed, 3 Jun 2026 02:48:38 +0000
Subject: [PATCH 49/76] Tighten RF-DETR pipeline review readiness

---
 .../rfdetr_workflow_video_parity.py           |   1 +
 .../interfaces/stream/inference_pipeline.py   |  12 +-
 .../stream/model_handlers/workflows.py        |  58 ++++-
 .../model_handlers/workflows_context.py       |  18 --
 .../core/models/inference_models_adapters.py  |   6 +
 .../roboflow/instance_segmentation/v3.py      | 187 +++++++++++-----
 .../stream/test_interface_pipeline.py         |  89 +++++++-
 .../core/interfaces/stream/test_workflows.py  | 207 ++++++++++++++++--
 8 files changed, 463 insertions(+), 115 deletions(-)
 delete mode 100644 inference/core/interfaces/stream/model_handlers/workflows_context.py

diff --git a/development/stream_interface/rfdetr_workflow_video_parity.py b/development/stream_interface/rfdetr_workflow_video_parity.py
index 879a41a89b..23f5569c47 100644
--- a/development/stream_interface/rfdetr_workflow_video_parity.py
+++ b/development/stream_interface/rfdetr_workflow_video_parity.py
@@ -226,6 +226,7 @@ def _build_workflow(model_id: str, confidence: float) -> dict:
                 "model_id": model_id,
                 "confidence_mode": "custom",
                 "custom_confidence": confidence,
+                "enforce_dense_masks_in_inference_models": False,
             },
         ],
         "outputs": [
diff --git a/inference/core/interfaces/stream/inference_pipeline.py b/inference/core/interfaces/stream/inference_pipeline.py
index 01842076b0..712f7d3037 100644
--- a/inference/core/interfaces/stream/inference_pipeline.py
+++ b/inference/core/interfaces/stream/inference_pipeline.py
@@ -38,8 +38,8 @@
 )
 from inference.core.interfaces.stream.entities import (
     AnyPrediction,
-    InferenceHandlerResult,
     InferenceHandler,
+    InferenceHandlerResult,
     ModelConfig,
     SinkHandler,
 )
@@ -936,6 +936,7 @@ def _execute_inference(self) -> None:
             )
             logger.exception(f"Encountered inference error: {error}")
         finally:
+            self._close_inference_handler()
             self._predictions_queue.put(None)
             send_inference_pipeline_status_update(
                 severity=UpdateSeverity.INFO,
@@ -1029,6 +1030,15 @@ def _drain_inference_handler(self) -> None:
             fallback_video_frames=[],
         )
 
+    def _close_inference_handler(self) -> None:
+        close_fn = getattr(self._on_video_frame, "close", None)
+        if not callable(close_fn):
+            return None
+        try:
+            close_fn()
+        except Exception as error:
+            logger.warning(f"Could not close inference handler. Cause: {error}")
+
     def _handle_predictions_dispatching(
         self,
         predictions: List[AnyPrediction],
diff --git a/inference/core/interfaces/stream/model_handlers/workflows.py b/inference/core/interfaces/stream/model_handlers/workflows.py
index 5e8b274698..b7f7f904d7 100644
--- a/inference/core/interfaces/stream/model_handlers/workflows.py
+++ b/inference/core/interfaces/stream/model_handlers/workflows.py
@@ -1,14 +1,18 @@
+from dataclasses import dataclass
 from typing import Any, Dict, List, Optional
 
 from inference.core.interfaces.camera.entities import VideoFrame
 from inference.core.interfaces.stream.entities import InferenceHandlerResult
-from inference.core.interfaces.stream.model_handlers.workflows_context import (
-    workflow_stream_flush_context,
-)
 from inference.core.workflows.execution_engine.core import ExecutionEngine
 from inference.core.workflows.execution_engine.entities.base import VideoMetadata
 
 
+@dataclass(frozen=True)
+class _StreamPipelineStep:
+    step: Any
+    depth: int
+
+
 class WorkflowRunner:
     def __init__(
         self,
@@ -48,24 +52,44 @@ def __call__(
         )
 
     def flush(self) -> Optional[List[InferenceHandlerResult]]:
-        if self._stream_buffer_depth() <= 0:
+        stream_steps = self._stream_pipeline_steps()
+        if not stream_steps:
             self._pending_video_frames.clear()
             return None
         if not self._pending_video_frames:
             return None
+        if len(stream_steps) != 1:
+            raise RuntimeError("Stream pipeline flushing supports one pipelined step")
+        flush_fn = getattr(stream_steps[0].step, "flush_stream_pipeline", None)
+        if not callable(flush_fn):
+            raise RuntimeError(
+                "Stream-pipelined workflow step must implement flush_stream_pipeline()"
+            )
+        predictions = flush_fn()
+        if predictions is None:
+            predictions = []
+        if len(predictions) != len(self._pending_video_frames):
+            raise RuntimeError(
+                "Stream pipeline flush returned a different number of prediction "
+                "batches than pending video-frame batches"
+            )
         results = []
-        while self._pending_video_frames:
+        for prediction in predictions:
             emit_video_frames = self._pending_video_frames.pop(0)
-            with workflow_stream_flush_context():
-                predictions = self._run_workflow(video_frames=emit_video_frames)
             results.append(
                 InferenceHandlerResult(
-                    predictions=predictions,
+                    predictions=prediction,
                     video_frames=emit_video_frames,
                 )
             )
         return results
 
+    def close(self) -> None:
+        for stream_step in self._stream_pipeline_steps():
+            close_fn = getattr(stream_step.step, "close_stream_pipeline", None)
+            if callable(close_fn):
+                close_fn()
+
     def _run_workflow(self, video_frames: List[VideoFrame]) -> List[dict]:
         workflows_parameters: Dict[str, Any] = dict(self._workflows_parameters or {})
         # TODO: pass fps reflecting each stream to workflows_parameters
@@ -114,17 +138,27 @@ def _uses_stream_buffering(self) -> bool:
         return self._stream_buffer_depth() > 0
 
     def _stream_buffer_depth(self) -> int:
+        stream_steps = self._stream_pipeline_steps()
+        if not stream_steps:
+            return 0
+        return max(stream_step.depth for stream_step in stream_steps)
+
+    def _stream_pipeline_steps(self) -> List[_StreamPipelineStep]:
         engine = getattr(self._execution_engine, "_engine", None)
         compiled_workflow = getattr(engine, "_compiled_workflow", None)
         steps = getattr(compiled_workflow, "steps", {})
-        stream_buffer_depth = 0
+        stream_steps = []
         for initialised_step in steps.values():
             step_instance = getattr(initialised_step, "step", None)
             is_stream_pipelined = getattr(step_instance, "is_stream_pipelined", None)
             if callable(is_stream_pipelined) and is_stream_pipelined():
                 get_depth = getattr(step_instance, "stream_pipeline_depth", None)
                 if callable(get_depth):
-                    stream_buffer_depth = max(stream_buffer_depth, int(get_depth()))
+                    depth = int(get_depth())
                 else:
-                    stream_buffer_depth = max(stream_buffer_depth, 1)
-        return stream_buffer_depth
+                    depth = 1
+                if depth > 0:
+                    stream_steps.append(
+                        _StreamPipelineStep(step=step_instance, depth=depth)
+                    )
+        return stream_steps
diff --git a/inference/core/interfaces/stream/model_handlers/workflows_context.py b/inference/core/interfaces/stream/model_handlers/workflows_context.py
deleted file mode 100644
index 6c2c99e849..0000000000
--- a/inference/core/interfaces/stream/model_handlers/workflows_context.py
+++ /dev/null
@@ -1,18 +0,0 @@
-from contextlib import contextmanager
-import threading
-
-_WORKFLOW_STREAM_CONTEXT = threading.local()
-
-
-def is_workflow_stream_flush_active() -> bool:
-    return bool(getattr(_WORKFLOW_STREAM_CONTEXT, "flush_active", False))
-
-
-@contextmanager
-def workflow_stream_flush_context():
-    previous = is_workflow_stream_flush_active()
-    _WORKFLOW_STREAM_CONTEXT.flush_active = True
-    try:
-        yield
-    finally:
-        _WORKFLOW_STREAM_CONTEXT.flush_active = previous
diff --git a/inference/core/models/inference_models_adapters.py b/inference/core/models/inference_models_adapters.py
index d5286857e3..a038e88135 100644
--- a/inference/core/models/inference_models_adapters.py
+++ b/inference/core/models/inference_models_adapters.py
@@ -433,6 +433,12 @@ def flush(self) -> List[InstanceSegmentationInferenceResponse]:
             responses.extend(self._response_futures.popleft().result())
         return responses
 
+    def shutdown_pipeline(self) -> None:
+        if self._response_executor is None:
+            return None
+        self._response_executor.shutdown(wait=False)
+        self._response_executor = None
+
     def _get_response_executor(self) -> ThreadPoolExecutor:
         if self._response_executor is None:
             self._response_executor = ThreadPoolExecutor(max_workers=1)
diff --git a/inference/core/workflows/core_steps/models/roboflow/instance_segmentation/v3.py b/inference/core/workflows/core_steps/models/roboflow/instance_segmentation/v3.py
index 8ebb6a7968..f15882a784 100644
--- a/inference/core/workflows/core_steps/models/roboflow/instance_segmentation/v3.py
+++ b/inference/core/workflows/core_steps/models/roboflow/instance_segmentation/v3.py
@@ -1,5 +1,7 @@
+from collections import deque
 from concurrent.futures import Future, ThreadPoolExecutor
-from typing import List, Literal, Optional, Type, Union
+from dataclasses import dataclass
+from typing import Deque, List, Literal, Optional, Type, Union
 
 from pydantic import ConfigDict, Field, PositiveInt, model_validator
 
@@ -17,9 +19,6 @@
     WORKFLOWS_REMOTE_EXECUTION_MAX_STEP_BATCH_SIZE,
     WORKFLOWS_REMOTE_EXECUTION_MAX_STEP_CONCURRENT_REQUESTS,
 )
-from inference.core.interfaces.stream.model_handlers.workflows_context import (
-    is_workflow_stream_flush_active,
-)
 from inference.core.managers.base import ModelManager
 from inference.core.workflows.core_steps.common.entities import StepExecutionMode
 from inference.core.workflows.core_steps.common.utils import (
@@ -70,6 +69,13 @@
 """
 
 
+@dataclass(frozen=True)
+class _StreamPredictionContext:
+    images: Batch[WorkflowImageData]
+    class_filter: Optional[List[str]]
+    model_id: str
+
+
 class BlockManifest(WorkflowBlockManifest):
     model_config = ConfigDict(
         json_schema_extra={
@@ -243,6 +249,9 @@ def __init__(
         self._step_execution_mode = step_execution_mode
         self._last_model_id: Optional[str] = None
         self._stream_response_executor: Optional[ThreadPoolExecutor] = None
+        self._pending_stream_prediction_contexts: Deque[_StreamPredictionContext] = (
+            deque()
+        )
 
     @classmethod
     def get_init_parameters(cls) -> List[str]:
@@ -330,58 +339,47 @@ def run_locally(
             model_id=model_id,
             api_key=self._api_key,
         )
-        if is_workflow_stream_flush_active():
-            predictions = self._model_manager.flush(model_id=model_id)
-        else:
-            request = InstanceSegmentationInferenceRequest(
-                api_key=self._api_key,
-                model_id=model_id,
-                image=inference_images,
-                disable_active_learning=disable_active_learning,
-                active_learning_target_dataset=active_learning_target_dataset,
-                class_agnostic_nms=class_agnostic_nms,
-                class_filter=class_filter,
-                confidence=confidence,
-                iou_threshold=iou_threshold,
-                max_detections=max_detections,
-                max_candidates=max_candidates,
-                mask_decode_mode=mask_decode_mode,
-                tradeoff_factor=tradeoff_factor,
-                source="workflow-execution",
-                enforce_dense_masks_in_inference_models=enforce_dense_masks_in_inference_models,
-            )
-            predictions = self._model_manager.infer_from_request_sync(
-                model_id=model_id, request=request
-            )
+        stream_context = _StreamPredictionContext(
+            images=images,
+            class_filter=class_filter,
+            model_id=model_id,
+        )
+        if self.stream_pipeline_depth() > 0:
+            self._pending_stream_prediction_contexts.append(stream_context)
+        request = InstanceSegmentationInferenceRequest(
+            api_key=self._api_key,
+            model_id=model_id,
+            image=inference_images,
+            disable_active_learning=disable_active_learning,
+            active_learning_target_dataset=active_learning_target_dataset,
+            class_agnostic_nms=class_agnostic_nms,
+            class_filter=class_filter,
+            confidence=confidence,
+            iou_threshold=iou_threshold,
+            max_detections=max_detections,
+            max_candidates=max_candidates,
+            mask_decode_mode=mask_decode_mode,
+            tradeoff_factor=tradeoff_factor,
+            source="workflow-execution",
+            enforce_dense_masks_in_inference_models=enforce_dense_masks_in_inference_models,
+        )
+        predictions = self._model_manager.infer_from_request_sync(
+            model_id=model_id, request=request
+        )
         if not isinstance(predictions, list):
             predictions = [predictions]
         async_response_future = self._extract_async_response_future(
             predictions=predictions
         )
         if async_response_future is not None:
+            stream_context = self._pop_stream_prediction_context(default=stream_context)
             return self._submit_async_post_process_result(
                 predictions_future=async_response_future,
-                images=images,
-                class_filter=class_filter,
-                model_id=model_id,
+                stream_context=stream_context,
             )
-        # The adapter returns dataclass responses when source="workflow-execution"
-        # (cheaper construct + dict-walk than pydantic). Any other response type
-        # (e.g. if a non-rfdetr backend is bound to the same block) falls back
-        # to `model_dump`.
-        predictions = [
-            (
-                _is_response_dc_to_dict(e)
-                if isinstance(e, InstanceSegmentationInferenceResponseDC)
-                else e.model_dump(by_alias=True, exclude_none=True)
-            )
-            for e in predictions
-        ]
-        return self._post_process_result(
-            images=images,
+        return self._finalize_prediction_responses(
             predictions=predictions,
-            class_filter=class_filter,
-            model_id=model_id,
+            stream_context=stream_context,
         )
 
     def _extract_async_response_future(
@@ -402,16 +400,12 @@ def _get_stream_response_executor(self) -> ThreadPoolExecutor:
     def _submit_async_post_process_result(
         self,
         predictions_future: Future,
-        images: Batch[WorkflowImageData],
-        class_filter: Optional[List[str]],
-        model_id: str,
+        stream_context: _StreamPredictionContext,
     ) -> BlockResult:
         finalized_result_future = self._get_stream_response_executor().submit(
             self._finalize_async_prediction_value,
             predictions_future,
-            images,
-            class_filter,
-            model_id,
+            stream_context,
         )
         return [
             {
@@ -420,9 +414,9 @@ def _submit_async_post_process_result(
                     result_future=finalized_result_future,
                     image_index=image_index,
                 ),
-                "model_id": model_id,
+                "model_id": stream_context.model_id,
             }
-            for image_index in range(len(images))
+            for image_index in range(len(stream_context.images))
         ]
 
     def _submit_async_prediction_selector(
@@ -439,13 +433,25 @@ def _submit_async_prediction_selector(
     def _finalize_async_prediction_value(
         self,
         predictions_future: Future,
-        images: Batch[WorkflowImageData],
-        class_filter: Optional[List[str]],
-        model_id: str,
+        stream_context: _StreamPredictionContext,
     ) -> BlockResult:
         predictions = predictions_future.result()
         if not isinstance(predictions, list):
             predictions = [predictions]
+        return self._finalize_prediction_responses(
+            predictions=predictions,
+            stream_context=stream_context,
+        )
+
+    def _finalize_prediction_responses(
+        self,
+        predictions: List[object],
+        stream_context: _StreamPredictionContext,
+    ) -> BlockResult:
+        # The adapter returns dataclass responses when source="workflow-execution"
+        # (cheaper construct + dict-walk than pydantic). Any other response type
+        # (e.g. if a non-rfdetr backend is bound to the same block) falls back
+        # to `model_dump`.
         predictions = [
             (
                 _is_response_dc_to_dict(e)
@@ -455,12 +461,20 @@ def _finalize_async_prediction_value(
             for e in predictions
         ]
         return self._post_process_result(
-            images=images,
+            images=stream_context.images,
             predictions=predictions,
-            class_filter=class_filter,
-            model_id=model_id,
+            class_filter=stream_context.class_filter,
+            model_id=stream_context.model_id,
         )
 
+    def _pop_stream_prediction_context(
+        self,
+        default: _StreamPredictionContext,
+    ) -> _StreamPredictionContext:
+        if self._pending_stream_prediction_contexts:
+            return self._pending_stream_prediction_contexts.popleft()
+        return default
+
     def _select_async_prediction_value(
         self,
         result_future: Future,
@@ -491,6 +505,59 @@ def stream_pipeline_depth(self) -> int:
         model = self._model_manager[self._last_model_id]
         return max(0, int(getattr(model, "_pipeline_depth", 1)) - 1)
 
+    def flush_stream_pipeline(self) -> List[BlockResult]:
+        if (
+            self._last_model_id is None
+            or self._last_model_id not in self._model_manager
+        ):
+            self._pending_stream_prediction_contexts.clear()
+            return []
+        model = self._model_manager[self._last_model_id]
+        flush_fn = getattr(model, "flush", None)
+        if not callable(flush_fn):
+            self._pending_stream_prediction_contexts.clear()
+            return []
+        predictions = flush_fn()
+        if not isinstance(predictions, list):
+            predictions = [predictions]
+
+        results = []
+        offset = 0
+        while self._pending_stream_prediction_contexts:
+            stream_context = self._pending_stream_prediction_contexts.popleft()
+            batch_size = len(stream_context.images)
+            prediction_batch = predictions[offset : offset + batch_size]
+            offset += batch_size
+            if len(prediction_batch) != batch_size:
+                raise RuntimeError(
+                    "Stream pipeline flush returned fewer predictions than expected"
+                )
+            results.append(
+                self._finalize_prediction_responses(
+                    predictions=prediction_batch,
+                    stream_context=stream_context,
+                )
+            )
+        if offset != len(predictions):
+            raise RuntimeError(
+                "Stream pipeline flush returned more predictions than expected"
+            )
+        return results
+
+    def close_stream_pipeline(self) -> None:
+        if self._stream_response_executor is not None:
+            self._stream_response_executor.shutdown(wait=False)
+            self._stream_response_executor = None
+        if (
+            self._last_model_id is None
+            or self._last_model_id not in self._model_manager
+        ):
+            return None
+        model = self._model_manager[self._last_model_id]
+        shutdown_fn = getattr(model, "shutdown_pipeline", None)
+        if callable(shutdown_fn):
+            shutdown_fn()
+
     def run_remotely(
         self,
         images: Batch[WorkflowImageData],
diff --git a/tests/inference/unit_tests/core/interfaces/stream/test_interface_pipeline.py b/tests/inference/unit_tests/core/interfaces/stream/test_interface_pipeline.py
index d7b7b5068e..18516ba080 100644
--- a/tests/inference/unit_tests/core/interfaces/stream/test_interface_pipeline.py
+++ b/tests/inference/unit_tests/core/interfaces/stream/test_interface_pipeline.py
@@ -1,4 +1,5 @@
 from collections import defaultdict
+from concurrent.futures import Future
 from datetime import datetime
 from functools import partial
 from queue import Queue
@@ -28,8 +29,14 @@
     VideoSource,
     lock_state_transition,
 )
-from inference.core.interfaces.stream.entities import ModelConfig
-from inference.core.interfaces.stream.inference_pipeline import InferencePipeline
+from inference.core.interfaces.stream.entities import (
+    InferenceHandlerResult,
+    ModelConfig,
+)
+from inference.core.interfaces.stream.inference_pipeline import (
+    InferencePipeline,
+    _resolve_prediction_futures,
+)
 from inference.core.interfaces.stream.model_handlers.roboflow_models import (
     default_process_frame,
 )
@@ -140,6 +147,84 @@ def infer(self, image: Any, **kwargs) -> List[ObjectDetectionInferenceResponse]:
         ] * len(image)
 
 
+class _PredictionReadyWatchdog:
+    def __init__(self) -> None:
+        self.ready_frames = []
+
+    def on_model_prediction_ready(self, frames):
+        self.ready_frames.append(frames)
+
+
+class _FlushableInferenceHandler:
+    def __init__(self, results):
+        self.results = results
+        self.flush_calls = 0
+        self.close_calls = 0
+
+    def flush(self):
+        self.flush_calls += 1
+        return self.results
+
+    def close(self) -> None:
+        self.close_calls += 1
+
+
+def test_inference_pipeline_drain_enqueues_flush_results_with_bound_frames() -> None:
+    frame_1 = VideoFrame(
+        image=np.zeros((8, 8, 3), dtype=np.uint8),
+        frame_id=1,
+        frame_timestamp=datetime.now(),
+        source_id=0,
+    )
+    frame_2 = VideoFrame(
+        image=np.zeros((8, 8, 3), dtype=np.uint8),
+        frame_id=2,
+        frame_timestamp=datetime.now(),
+        source_id=0,
+    )
+    handler = _FlushableInferenceHandler(
+        results=[
+            InferenceHandlerResult(predictions=["p1"], video_frames=[frame_1]),
+            InferenceHandlerResult(predictions=["p2"], video_frames=[frame_2]),
+        ]
+    )
+    watchdog = _PredictionReadyWatchdog()
+    pipeline = object.__new__(InferencePipeline)
+    pipeline._on_video_frame = handler
+    pipeline._watchdog = watchdog
+    pipeline._predictions_queue = Queue()
+    pipeline._status_update_handlers = []
+
+    pipeline._drain_inference_handler()
+
+    assert handler.flush_calls == 1
+    assert pipeline._predictions_queue.get_nowait() == (["p1"], [frame_1])
+    assert pipeline._predictions_queue.get_nowait() == (["p2"], [frame_2])
+    assert watchdog.ready_frames == [[frame_1], [frame_2]]
+
+
+def test_resolve_prediction_futures_recursively_resolves_nested_values() -> None:
+    inner = Future()
+    inner.set_result("resolved")
+    outer = Future()
+    outer.set_result({"detections": [inner]})
+
+    assert _resolve_prediction_futures((outer, {"raw": inner})) == (
+        {"detections": ["resolved"]},
+        {"raw": "resolved"},
+    )
+
+
+def test_inference_pipeline_close_calls_handler_close_hook() -> None:
+    handler = _FlushableInferenceHandler(results=[])
+    pipeline = object.__new__(InferencePipeline)
+    pipeline._on_video_frame = handler
+
+    pipeline._close_inference_handler()
+
+    assert handler.close_calls == 1
+
+
 @pytest.mark.timeout(90)
 @pytest.mark.slow
 def test_inference_pipeline_works_correctly_against_video_file(
diff --git a/tests/inference/unit_tests/core/interfaces/stream/test_workflows.py b/tests/inference/unit_tests/core/interfaces/stream/test_workflows.py
index 0ff4936eeb..1820ca124f 100644
--- a/tests/inference/unit_tests/core/interfaces/stream/test_workflows.py
+++ b/tests/inference/unit_tests/core/interfaces/stream/test_workflows.py
@@ -1,3 +1,4 @@
+from concurrent.futures import Future
 from datetime import datetime
 from types import SimpleNamespace
 
@@ -5,21 +6,40 @@
 
 from inference.core.interfaces.camera.entities import VideoFrame
 from inference.core.interfaces.stream.model_handlers.workflows import WorkflowRunner
-from inference.core.interfaces.stream.model_handlers.workflows_context import (
-    is_workflow_stream_flush_active,
+from inference.core.workflows.core_steps.common.entities import StepExecutionMode
+from inference.core.workflows.core_steps.models.roboflow.instance_segmentation.v3 import (
+    RoboflowInstanceSegmentationModelBlockV3,
 )
+from inference_models.models.base.async_handoff import attach_async_response_future
+
+
+class _FakePipelinedStep:
+    def __init__(self, stream_buffer_depth: int) -> None:
+        self._stream_buffer_depth = stream_buffer_depth
+        self.flush_calls = 0
+        self.close_calls = 0
+
+    def is_stream_pipelined(self) -> bool:
+        return self._stream_buffer_depth > 0
+
+    def stream_pipeline_depth(self) -> int:
+        return self._stream_buffer_depth
+
+    def flush_stream_pipeline(self):
+        self.flush_calls += 1
+        return [[{"predictions": "frame-2"}]]
+
+    def close_stream_pipeline(self) -> None:
+        self.close_calls += 1
 
 
 class _FakeExecutionEngine:
     def __init__(self, stream_buffer_depth: int) -> None:
         self._stream_buffer_depth = stream_buffer_depth
-        step = SimpleNamespace(
-            is_stream_pipelined=lambda: stream_buffer_depth > 0,
-            stream_pipeline_depth=lambda: stream_buffer_depth,
-        )
+        self.step = _FakePipelinedStep(stream_buffer_depth=stream_buffer_depth)
         self._engine = SimpleNamespace(
             _compiled_workflow=SimpleNamespace(
-                steps={"segmentation": SimpleNamespace(step=step)}
+                steps={"segmentation": SimpleNamespace(step=self.step)}
             )
         )
         self.calls = []
@@ -32,23 +52,95 @@ def run(
         _is_preview,
     ):
         frame_number = runtime_parameters["image"][0]["video_metadata"].frame_number
-        flush_active = is_workflow_stream_flush_active()
         self.calls.append(
             {
                 "frame_number": frame_number,
-                "flush_active": flush_active,
                 "fps": fps,
                 "serialize_results": serialize_results,
                 "is_preview": _is_preview,
             }
         )
-        if flush_active:
-            prediction_frame = frame_number
-        else:
-            prediction_frame = frame_number - self._stream_buffer_depth
+        prediction_frame = frame_number - self._stream_buffer_depth
         return [{"predictions": f"frame-{prediction_frame}"}]
 
 
+class _ImmediateExecutor:
+    def submit(self, fn, *args, **kwargs) -> Future:
+        future = Future()
+        try:
+            future.set_result(fn(*args, **kwargs))
+        except BaseException as error:  # pragma: no cover - defensive
+            future.set_exception(error)
+        return future
+
+
+class _FakeWorkflowImage:
+    def __init__(self, tag: str) -> None:
+        self.tag = tag
+
+    def to_inference_format(self, numpy_preferred: bool):
+        assert numpy_preferred is True
+        return {
+            "type": "numpy_object",
+            "value": np.zeros((8, 8, 3), dtype=np.uint8),
+        }
+
+
+class _FakeResponse:
+    def __init__(self, tag: str) -> None:
+        self.tag = tag
+
+    def model_dump(self, by_alias: bool, exclude_none: bool):
+        assert by_alias is True
+        assert exclude_none is True
+        return {"tag": self.tag}
+
+
+class _FakeStreamModel:
+    _pipeline_depth = 2
+
+    def __init__(self) -> None:
+        self.flush_calls = 0
+        self.shutdown_calls = 0
+
+    def flush(self):
+        self.flush_calls += 1
+        return [_FakeResponse("tail-final")]
+
+    def shutdown_pipeline(self) -> None:
+        self.shutdown_calls += 1
+
+
+class _FakeModelManager:
+    def __init__(self, inference_results) -> None:
+        self._inference_results = list(inference_results)
+        self.model = _FakeStreamModel()
+        self.add_model_calls = []
+        self.infer_calls = 0
+
+    def add_model(self, model_id: str, api_key: str) -> None:
+        self.add_model_calls.append((model_id, api_key))
+
+    def infer_from_request_sync(self, model_id: str, request):
+        self.infer_calls += 1
+        return self._inference_results.pop(0)
+
+    def __contains__(self, model_id: str) -> bool:
+        return model_id == "model"
+
+    def __getitem__(self, model_id: str):
+        assert model_id == "model"
+        return self.model
+
+
+def _make_async_placeholder(response_tag: str) -> _FakeResponse:
+    future = Future()
+    future.set_result([_FakeResponse(response_tag)])
+    response = _FakeResponse("placeholder")
+    attach_async_response_future(response=response, response_future=future)
+    return response
+
+
 def _make_frame(frame_id: int) -> VideoFrame:
     return VideoFrame(
         image=np.zeros((8, 8, 3), dtype=np.uint8),
@@ -81,7 +173,6 @@ def test_workflow_runner_without_stream_buffering_returns_current_frame() -> Non
     assert engine.calls == [
         {
             "frame_number": 1,
-            "flush_active": False,
             "fps": 30.0,
             "serialize_results": True,
             "is_preview": True,
@@ -112,26 +203,98 @@ def test_workflow_runner_buffers_frames_until_delayed_prediction_arrives() -> No
     assert len(flushed_results) == 1
     assert flushed_results[0].predictions == [{"predictions": "frame-2"}]
     assert flushed_results[0].video_frames == [frame_2]
+    assert engine.step.flush_calls == 1
+    runner.close()
+    assert engine.step.close_calls == 1
     assert engine.calls == [
         {
             "frame_number": 1,
-            "flush_active": False,
             "fps": 30.0,
             "serialize_results": False,
             "is_preview": False,
         },
         {
             "frame_number": 2,
-            "flush_active": False,
             "fps": 30.0,
             "serialize_results": False,
             "is_preview": False,
         },
+    ]
+
+
+def test_instance_segmentation_stream_flush_drains_model_without_rerunning_workflow() -> (
+    None
+):
+    manager = _FakeModelManager(
+        inference_results=[
+            [_FakeResponse("priming")],
+            [_make_async_placeholder("first-final")],
+        ]
+    )
+    block = RoboflowInstanceSegmentationModelBlockV3(
+        model_manager=manager,
+        api_key="api-key",
+        step_execution_mode=StepExecutionMode.LOCAL,
+    )
+    block._get_stream_response_executor = lambda: _ImmediateExecutor()
+    block._post_process_result = lambda images, predictions, class_filter, model_id: [
         {
-            "frame_number": 2,
-            "flush_active": True,
-            "fps": 30.0,
-            "serialize_results": False,
-            "is_preview": False,
-        },
+            "predictions": f"{images[0].tag}:{predictions[0]['tag']}",
+            "class_filter": class_filter,
+            "model_id": model_id,
+        }
+    ]
+
+    first_result = block.run_locally(
+        images=[_FakeWorkflowImage("frame-1")],
+        model_id="model",
+        class_agnostic_nms=None,
+        class_filter=["car"],
+        confidence=0.4,
+        iou_threshold=None,
+        max_detections=None,
+        max_candidates=None,
+        mask_decode_mode="accurate",
+        tradeoff_factor=None,
+        disable_active_learning=None,
+        active_learning_target_dataset=None,
+        enforce_dense_masks_in_inference_models=False,
+    )
+    second_result = block.run_locally(
+        images=[_FakeWorkflowImage("frame-2")],
+        model_id="model",
+        class_agnostic_nms=None,
+        class_filter=["car"],
+        confidence=0.4,
+        iou_threshold=None,
+        max_detections=None,
+        max_candidates=None,
+        mask_decode_mode="accurate",
+        tradeoff_factor=None,
+        disable_active_learning=None,
+        active_learning_target_dataset=None,
+        enforce_dense_masks_in_inference_models=False,
+    )
+    flushed_results = block.flush_stream_pipeline()
+
+    assert first_result == [
+        {
+            "predictions": "frame-1:priming",
+            "class_filter": ["car"],
+            "model_id": "model",
+        }
+    ]
+    assert second_result[0]["predictions"].result() == "frame-1:first-final"
+    assert flushed_results == [
+        [
+            {
+                "predictions": "frame-2:tail-final",
+                "class_filter": ["car"],
+                "model_id": "model",
+            }
+        ]
     ]
+    assert manager.infer_calls == 2
+    assert manager.model.flush_calls == 1
+    block.close_stream_pipeline()
+    assert manager.model.shutdown_calls == 1

From 33af52d32205bb2e55b43ac2921209b4c3ed0701 Mon Sep 17 00:00:00 2001
From: Aseem Saxena <aseem.bits@gmail.com>
Date: Wed, 3 Jun 2026 02:58:21 +0000
Subject: [PATCH 50/76] Gate RF-DETR stream pipeline by model capability

---
 .../core/models/inference_models_adapters.py  | 20 ++++++++++++++--
 .../models/base/instance_segmentation.py      | 24 +++++++++++++++----
 .../rfdetr_instance_segmentation_trt.py       |  4 ++++
 .../models/test_inference_models_adapters.py  | 24 +++++++++++++++++++
 4 files changed, 66 insertions(+), 6 deletions(-)

diff --git a/inference/core/models/inference_models_adapters.py b/inference/core/models/inference_models_adapters.py
index a038e88135..502d031d92 100644
--- a/inference/core/models/inference_models_adapters.py
+++ b/inference/core/models/inference_models_adapters.py
@@ -336,8 +336,10 @@ def __init__(self, model_id: str, api_key: str = None, **kwargs):
         # (preprocess→forward→postprocess on each frame, in order). depth=2
         # means two stages in parallel: while the GPU works on the current
         # frame, the CPU prepares/submits the next frame, then harvests the
-        # previous response. The response delay is therefore depth - 1 frames.
-        self._pipeline_depth = get_rfdetr_pipeline_depth()
+        # previous response. Only models that explicitly support the deferred
+        # GPU handoff contract can use this; other instance-segmentation
+        # backends keep depth=1 even if RFDETR_PIPELINE_DEPTH is set.
+        self._pipeline_depth = self._resolve_pipeline_depth()
         self._response_delay = max(1, self._pipeline_depth - 1)
         # Per-adapter in-flight futures + metadata. Not thread-safe; the
         # InferencePipeline is single-producer and the adapter is owned by a
@@ -354,6 +356,20 @@ def __init__(self, model_id: str, api_key: str = None, **kwargs):
             Future[List[InstanceSegmentationInferenceResponse]]
         ] = deque()
 
+    def _resolve_pipeline_depth(self) -> int:
+        requested_depth = get_rfdetr_pipeline_depth()
+        if requested_depth <= 1 or self._model_supports_stream_pipeline():
+            return requested_depth
+        return 1
+
+    def _model_supports_stream_pipeline(self) -> bool:
+        supports_stream_pipeline = getattr(
+            self._model, "supports_stream_pipeline", False
+        )
+        if callable(supports_stream_pipeline):
+            return bool(supports_stream_pipeline())
+        return bool(supports_stream_pipeline)
+
     def map_inference_kwargs(self, kwargs: dict) -> dict:
         kwargs["input_color_format"] = "bgr"
         pre_processing_overrides = PreProcessingOverrides(
diff --git a/inference_models/inference_models/models/base/instance_segmentation.py b/inference_models/inference_models/models/base/instance_segmentation.py
index 2ee19d23b1..2bc522d5a1 100644
--- a/inference_models/inference_models/models/base/instance_segmentation.py
+++ b/inference_models/inference_models/models/base/instance_segmentation.py
@@ -37,8 +37,10 @@ class InferenceFuture(Protocol):
 
     The returned object lets a caller start a subsequent ``infer_async`` call
     while the GPU is still executing the previous one. Calling ``result()``
-    blocks on a single GPU event, then runs CPU-side post-processing and
-    returns the decoded detections. ``done()`` is a non-blocking probe.
+    materializes the post-processing result for that request; backends may
+    either block there or return a deferred result whose own consumers perform
+    the final CUDA synchronization. ``done()`` is a non-blocking probe of the
+    forward completion event.
     """
 
     def result(self) -> List["InstanceDetections"]: ...
@@ -51,8 +53,10 @@ class _DirectInferenceFuture:
 
     Holds the raw forward output plus the preprocessing metadata needed by
     ``post_process``. The event is recorded on the stream that produced the
-    raw output; ``result()`` synchronizes on it before running CPU decode.
-    Post-process output is memoised so ``result()`` may be called repeatedly.
+    raw output. ``result()`` runs or returns the model's post-processing result;
+    optimized backends order post-processing with CUDA events and defer host
+    synchronization until CPU-visible tensors are copied. Post-process output is
+    memoised so ``result()`` may be called repeatedly.
     """
 
     # No __slots__: adapters attach per-request context through
@@ -218,6 +222,18 @@ def class_names(self) -> List[str]:
     def supported_mask_formats(self) -> Set[InstanceSegmentationMaskFormat]:
         pass
 
+    @property
+    def supports_stream_pipeline(self) -> bool:
+        """Whether this model can safely use adapter-level stream pipelining.
+
+        The default async future only defers ``post_process`` to ``result()`` and
+        does not guarantee the non-blocking deferred GPU handoff that the stream
+        adapter relies on for depth>1 scheduling. Models that implement that
+        contract, such as RF-DETR TensorRT with CUDA graph output handoff, opt in
+        by overriding this property.
+        """
+        return False
+
     def infer(
         self,
         images: Union[torch.Tensor, List[torch.Tensor], np.ndarray, List[np.ndarray]],
diff --git a/inference_models/inference_models/models/rfdetr/rfdetr_instance_segmentation_trt.py b/inference_models/inference_models/models/rfdetr/rfdetr_instance_segmentation_trt.py
index e234e050d3..1edb4dc053 100644
--- a/inference_models/inference_models/models/rfdetr/rfdetr_instance_segmentation_trt.py
+++ b/inference_models/inference_models/models/rfdetr/rfdetr_instance_segmentation_trt.py
@@ -247,6 +247,10 @@ def class_names(self) -> List[str]:
     def supported_mask_formats(self) -> Set[InstanceSegmentationMaskFormat]:
         return {"dense", "rle"}
 
+    @property
+    def supports_stream_pipeline(self) -> bool:
+        return self._trt_cuda_graph_cache is not None
+
     def pre_process(
         self,
         images: Union[torch.Tensor, List[torch.Tensor], np.ndarray, List[np.ndarray]],
diff --git a/tests/inference/unit_tests/core/models/test_inference_models_adapters.py b/tests/inference/unit_tests/core/models/test_inference_models_adapters.py
index e1b48c7240..c1e88fb958 100644
--- a/tests/inference/unit_tests/core/models/test_inference_models_adapters.py
+++ b/tests/inference/unit_tests/core/models/test_inference_models_adapters.py
@@ -96,6 +96,30 @@ def _make_pipeline_adapter(
     return adapter
 
 
+def test_pipeline_depth_falls_back_to_one_for_unsupported_models(monkeypatch) -> None:
+    monkeypatch.setattr(
+        "inference.core.models.inference_models_adapters.get_rfdetr_pipeline_depth",
+        lambda: 2,
+    )
+    adapter = object.__new__(InferenceModelsInstanceSegmentationAdapter)
+    adapter._model = SimpleNamespace(supports_stream_pipeline=False)
+
+    assert adapter._resolve_pipeline_depth() == 1
+
+
+def test_pipeline_depth_honors_requested_depth_for_supported_models(
+    monkeypatch,
+) -> None:
+    monkeypatch.setattr(
+        "inference.core.models.inference_models_adapters.get_rfdetr_pipeline_depth",
+        lambda: 3,
+    )
+    adapter = object.__new__(InferenceModelsInstanceSegmentationAdapter)
+    adapter._model = SimpleNamespace(supports_stream_pipeline=True)
+
+    assert adapter._resolve_pipeline_depth() == 3
+
+
 def test_prepare_multi_label_response_uses_class_ids_for_predicted_classes() -> None:
     """The model's `post_process` is the source of truth for which classes
     are "predicted" (it owns the priority chain user → per-class → global

From 4b1645f21d163b8a12c87a036f8a3600edb41bed Mon Sep 17 00:00:00 2001
From: Aseem Saxena <aseem.bits@gmail.com>
Date: Wed, 3 Jun 2026 03:57:17 +0000
Subject: [PATCH 51/76] Use existing Roboflow frame handler

---
 .../interfaces/stream/inference_pipeline.py     |  5 +++--
 .../stream/model_handlers/roboflow_models.py    | 17 -----------------
 2 files changed, 3 insertions(+), 19 deletions(-)

diff --git a/inference/core/interfaces/stream/inference_pipeline.py b/inference/core/interfaces/stream/inference_pipeline.py
index 712f7d3037..e30d404eb5 100644
--- a/inference/core/interfaces/stream/inference_pipeline.py
+++ b/inference/core/interfaces/stream/inference_pipeline.py
@@ -44,7 +44,7 @@
     SinkHandler,
 )
 from inference.core.interfaces.stream.model_handlers.roboflow_models import (
-    RoboflowModelHandler,
+    default_process_frame,
 )
 from inference.core.interfaces.stream.sinks import active_learning_sink, multi_sink
 from inference.core.interfaces.stream.utils import (
@@ -253,7 +253,8 @@ def init(
             tradeoff_factor=tradeoff_factor,
         )
         model = get_model(model_id=model_id, api_key=api_key)
-        on_video_frame = RoboflowModelHandler(
+        on_video_frame = partial(
+            default_process_frame,
             model=model,
             inference_config=inference_config,
         )
diff --git a/inference/core/interfaces/stream/model_handlers/roboflow_models.py b/inference/core/interfaces/stream/model_handlers/roboflow_models.py
index c60badea4e..cb2d995a9e 100644
--- a/inference/core/interfaces/stream/model_handlers/roboflow_models.py
+++ b/inference/core/interfaces/stream/model_handlers/roboflow_models.py
@@ -33,20 +33,3 @@ def default_process_frame(
         )
         for p in predictions
     ]
-
-
-class RoboflowModelHandler:
-    def __init__(
-        self,
-        model: OnnxRoboflowInferenceModel,
-        inference_config: ModelConfig,
-    ):
-        self._model = model
-        self._inference_config = inference_config
-
-    def __call__(self, video_frame: List[VideoFrame]) -> List[dict]:
-        return default_process_frame(
-            video_frame=video_frame,
-            model=self._model,
-            inference_config=self._inference_config,
-        )

From a3171636ac69c385a33c94403938c111fe3f34dc Mon Sep 17 00:00:00 2001
From: Aseem Saxena <aseem.bits@gmail.com>
Date: Wed, 3 Jun 2026 04:02:23 +0000
Subject: [PATCH 52/76] Isolate pipelined workflow runner

---
 .../interfaces/stream/inference_pipeline.py   |   7 +-
 .../stream/model_handlers/workflows.py        | 166 ++++++++++--------
 .../core/interfaces/stream/test_workflows.py  |  35 +++-
 3 files changed, 132 insertions(+), 76 deletions(-)

diff --git a/inference/core/interfaces/stream/inference_pipeline.py b/inference/core/interfaces/stream/inference_pipeline.py
index e30d404eb5..31dced0c4b 100644
--- a/inference/core/interfaces/stream/inference_pipeline.py
+++ b/inference/core/interfaces/stream/inference_pipeline.py
@@ -608,6 +608,7 @@ def init_with_workflow(
         try:
             from inference.core.interfaces.stream.model_handlers.workflows import (
                 WorkflowRunner,
+                wrap_workflow_runner_for_stream_pipeline,
             )
             from inference.core.roboflow_api import get_workflow_specification
             from inference.core.workflows.execution_engine.core import ExecutionEngine
@@ -656,7 +657,7 @@ def init_with_workflow(
                 workflow_id=workflow_id,
                 profiler=profiler,
             )
-            on_video_frame = WorkflowRunner(
+            workflow_runner = WorkflowRunner(
                 workflows_parameters=workflows_parameters,
                 execution_engine=execution_engine,
                 image_input_name=image_input_name,
@@ -664,6 +665,10 @@ def init_with_workflow(
                 serialize_results=serialize_results,
                 _is_preview=_is_preview,
             )
+            on_video_frame = wrap_workflow_runner_for_stream_pipeline(
+                workflow_runner=workflow_runner,
+                execution_engine=execution_engine,
+            )
         except ImportError as error:
             raise CannotInitialiseModelError(
                 f"Could not initialise workflow processing due to lack of dependencies required. "
diff --git a/inference/core/interfaces/stream/model_handlers/workflows.py b/inference/core/interfaces/stream/model_handlers/workflows.py
index b7f7f904d7..bba89e3b46 100644
--- a/inference/core/interfaces/stream/model_handlers/workflows.py
+++ b/inference/core/interfaces/stream/model_handlers/workflows.py
@@ -29,12 +29,69 @@ def __init__(
         self._video_metadata_input_name = video_metadata_input_name
         self._serialize_results = serialize_results
         self._is_preview = _is_preview
+
+    def __call__(self, video_frames: List[VideoFrame]) -> List[dict]:
+        return self._run_workflow(video_frames=video_frames)
+
+    def _run_workflow(self, video_frames: List[VideoFrame]) -> List[dict]:
+        workflows_parameters: Dict[str, Any] = dict(self._workflows_parameters or {})
+        # TODO: pass fps reflecting each stream to workflows_parameters
+        fps = video_frames[0].fps
+        if video_frames[0].measured_fps:
+            fps = video_frames[0].measured_fps
+        if fps is None:
+            # for FPS reporting we expect 0 when FPS cannot be determined
+            fps = 0
+        video_metadata_for_images = [
+            VideoMetadata(
+                video_identifier=(
+                    str(video_frame.source_id)
+                    if video_frame.source_id
+                    else "default_source"
+                ),
+                frame_number=video_frame.frame_id,
+                frame_timestamp=video_frame.frame_timestamp,
+                fps=video_frame.fps,
+                measured_fps=video_frame.measured_fps,
+                comes_from_video_file=video_frame.comes_from_video_file,
+            )
+            for video_frame in video_frames
+        ]
+        workflows_parameters[self._image_input_name] = [
+            {
+                "type": "numpy_object",
+                "value": video_frame.image,
+                "video_metadata": video_metadata,
+            }
+            for video_frame, video_metadata in zip(
+                video_frames, video_metadata_for_images
+            )
+        ]
+        workflows_parameters[self._video_metadata_input_name] = (
+            video_metadata_for_images
+        )
+        return self._execution_engine.run(
+            runtime_parameters=workflows_parameters,
+            fps=fps,
+            serialize_results=self._serialize_results,
+            _is_preview=self._is_preview,
+        )
+
+
+class PipelinedWorkflowRunner:
+    def __init__(
+        self,
+        workflow_runner: WorkflowRunner,
+        stream_steps: List[_StreamPipelineStep],
+    ) -> None:
+        self._workflow_runner = workflow_runner
+        self._stream_steps = stream_steps
         self._pending_video_frames: List[List[VideoFrame]] = []
 
     def __call__(
         self, video_frames: List[VideoFrame]
     ) -> Optional[InferenceHandlerResult]:
-        predictions = self._run_workflow(video_frames=video_frames)
+        predictions = self._workflow_runner(video_frames=video_frames)
         stream_buffer_depth = self._stream_buffer_depth()
         if stream_buffer_depth <= 0:
             self._pending_video_frames.clear()
@@ -52,7 +109,7 @@ def __call__(
         )
 
     def flush(self) -> Optional[List[InferenceHandlerResult]]:
-        stream_steps = self._stream_pipeline_steps()
+        stream_steps = self._stream_steps
         if not stream_steps:
             self._pending_video_frames.clear()
             return None
@@ -85,80 +142,49 @@ def flush(self) -> Optional[List[InferenceHandlerResult]]:
         return results
 
     def close(self) -> None:
-        for stream_step in self._stream_pipeline_steps():
+        for stream_step in self._stream_steps:
             close_fn = getattr(stream_step.step, "close_stream_pipeline", None)
             if callable(close_fn):
                 close_fn()
 
-    def _run_workflow(self, video_frames: List[VideoFrame]) -> List[dict]:
-        workflows_parameters: Dict[str, Any] = dict(self._workflows_parameters or {})
-        # TODO: pass fps reflecting each stream to workflows_parameters
-        fps = video_frames[0].fps
-        if video_frames[0].measured_fps:
-            fps = video_frames[0].measured_fps
-        if fps is None:
-            # for FPS reporting we expect 0 when FPS cannot be determined
-            fps = 0
-        video_metadata_for_images = [
-            VideoMetadata(
-                video_identifier=(
-                    str(video_frame.source_id)
-                    if video_frame.source_id
-                    else "default_source"
-                ),
-                frame_number=video_frame.frame_id,
-                frame_timestamp=video_frame.frame_timestamp,
-                fps=video_frame.fps,
-                measured_fps=video_frame.measured_fps,
-                comes_from_video_file=video_frame.comes_from_video_file,
-            )
-            for video_frame in video_frames
-        ]
-        workflows_parameters[self._image_input_name] = [
-            {
-                "type": "numpy_object",
-                "value": video_frame.image,
-                "video_metadata": video_metadata,
-            }
-            for video_frame, video_metadata in zip(
-                video_frames, video_metadata_for_images
-            )
-        ]
-        workflows_parameters[self._video_metadata_input_name] = (
-            video_metadata_for_images
-        )
-        return self._execution_engine.run(
-            runtime_parameters=workflows_parameters,
-            fps=fps,
-            serialize_results=self._serialize_results,
-            _is_preview=self._is_preview,
-        )
-
-    def _uses_stream_buffering(self) -> bool:
-        return self._stream_buffer_depth() > 0
-
     def _stream_buffer_depth(self) -> int:
-        stream_steps = self._stream_pipeline_steps()
+        stream_steps = self._stream_steps
         if not stream_steps:
             return 0
         return max(stream_step.depth for stream_step in stream_steps)
 
-    def _stream_pipeline_steps(self) -> List[_StreamPipelineStep]:
-        engine = getattr(self._execution_engine, "_engine", None)
-        compiled_workflow = getattr(engine, "_compiled_workflow", None)
-        steps = getattr(compiled_workflow, "steps", {})
-        stream_steps = []
-        for initialised_step in steps.values():
-            step_instance = getattr(initialised_step, "step", None)
-            is_stream_pipelined = getattr(step_instance, "is_stream_pipelined", None)
-            if callable(is_stream_pipelined) and is_stream_pipelined():
-                get_depth = getattr(step_instance, "stream_pipeline_depth", None)
-                if callable(get_depth):
-                    depth = int(get_depth())
-                else:
-                    depth = 1
-                if depth > 0:
-                    stream_steps.append(
-                        _StreamPipelineStep(step=step_instance, depth=depth)
-                    )
-        return stream_steps
+
+def wrap_workflow_runner_for_stream_pipeline(
+    workflow_runner: WorkflowRunner,
+    execution_engine: ExecutionEngine,
+):
+    stream_steps = _stream_pipeline_steps(execution_engine=execution_engine)
+    if not stream_steps:
+        return workflow_runner
+    return PipelinedWorkflowRunner(
+        workflow_runner=workflow_runner,
+        stream_steps=stream_steps,
+    )
+
+
+def _stream_pipeline_steps(
+    execution_engine: ExecutionEngine,
+) -> List[_StreamPipelineStep]:
+    engine = getattr(execution_engine, "_engine", None)
+    compiled_workflow = getattr(engine, "_compiled_workflow", None)
+    steps = getattr(compiled_workflow, "steps", {})
+    stream_steps = []
+    for initialised_step in steps.values():
+        step_instance = getattr(initialised_step, "step", None)
+        is_stream_pipelined = getattr(step_instance, "is_stream_pipelined", None)
+        if callable(is_stream_pipelined) and is_stream_pipelined():
+            get_depth = getattr(step_instance, "stream_pipeline_depth", None)
+            if callable(get_depth):
+                depth = int(get_depth())
+            else:
+                depth = 1
+            if depth > 0:
+                stream_steps.append(
+                    _StreamPipelineStep(step=step_instance, depth=depth)
+                )
+    return stream_steps
diff --git a/tests/inference/unit_tests/core/interfaces/stream/test_workflows.py b/tests/inference/unit_tests/core/interfaces/stream/test_workflows.py
index 1820ca124f..dd5f131898 100644
--- a/tests/inference/unit_tests/core/interfaces/stream/test_workflows.py
+++ b/tests/inference/unit_tests/core/interfaces/stream/test_workflows.py
@@ -5,7 +5,11 @@
 import numpy as np
 
 from inference.core.interfaces.camera.entities import VideoFrame
-from inference.core.interfaces.stream.model_handlers.workflows import WorkflowRunner
+from inference.core.interfaces.stream.model_handlers.workflows import (
+    PipelinedWorkflowRunner,
+    WorkflowRunner,
+    wrap_workflow_runner_for_stream_pipeline,
+)
 from inference.core.workflows.core_steps.common.entities import StepExecutionMode
 from inference.core.workflows.core_steps.models.roboflow.instance_segmentation.v3 import (
     RoboflowInstanceSegmentationModelBlockV3,
@@ -167,9 +171,7 @@ def test_workflow_runner_without_stream_buffering_returns_current_frame() -> Non
 
     result = runner([frame])
 
-    assert result is not None
-    assert result.predictions == [{"predictions": "frame-1"}]
-    assert result.video_frames == [frame]
+    assert result == [{"predictions": "frame-1"}]
     assert engine.calls == [
         {
             "frame_number": 1,
@@ -180,14 +182,37 @@ def test_workflow_runner_without_stream_buffering_returns_current_frame() -> Non
     ]
 
 
+def test_wrap_workflow_runner_leaves_non_pipelined_workflows_unchanged() -> None:
+    engine = _FakeExecutionEngine(stream_buffer_depth=0)
+    runner = WorkflowRunner(
+        workflows_parameters=None,
+        execution_engine=engine,
+        image_input_name="image",
+        video_metadata_input_name="video_metadata",
+    )
+
+    assert (
+        wrap_workflow_runner_for_stream_pipeline(
+            workflow_runner=runner,
+            execution_engine=engine,
+        )
+        is runner
+    )
+
+
 def test_workflow_runner_buffers_frames_until_delayed_prediction_arrives() -> None:
     engine = _FakeExecutionEngine(stream_buffer_depth=1)
-    runner = WorkflowRunner(
+    workflow_runner = WorkflowRunner(
         workflows_parameters=None,
         execution_engine=engine,
         image_input_name="image",
         video_metadata_input_name="video_metadata",
     )
+    runner = wrap_workflow_runner_for_stream_pipeline(
+        workflow_runner=workflow_runner,
+        execution_engine=engine,
+    )
+    assert isinstance(runner, PipelinedWorkflowRunner)
     frame_1 = _make_frame(1)
     frame_2 = _make_frame(2)
 

From 5d799bf2a82b2c5b9c48d7f85a78a291738ed49e Mon Sep 17 00:00:00 2001
From: Aseem Saxena <aseem.bits@gmail.com>
Date: Wed, 3 Jun 2026 05:06:34 +0000
Subject: [PATCH 53/76] Fix late stream pipeline activation buffering

---
 .../stream/model_handlers/workflows.py        | 37 +++++----
 .../roboflow/instance_segmentation/v3.py      |  3 +
 .../core/interfaces/stream/test_workflows.py  | 83 +++++++++++++++++++
 3 files changed, 107 insertions(+), 16 deletions(-)

diff --git a/inference/core/interfaces/stream/model_handlers/workflows.py b/inference/core/interfaces/stream/model_handlers/workflows.py
index bba89e3b46..9606e048d3 100644
--- a/inference/core/interfaces/stream/model_handlers/workflows.py
+++ b/inference/core/interfaces/stream/model_handlers/workflows.py
@@ -10,7 +10,6 @@
 @dataclass(frozen=True)
 class _StreamPipelineStep:
     step: Any
-    depth: int
 
 
 class WorkflowRunner:
@@ -148,10 +147,10 @@ def close(self) -> None:
                 close_fn()
 
     def _stream_buffer_depth(self) -> int:
-        stream_steps = self._stream_steps
-        if not stream_steps:
-            return 0
-        return max(stream_step.depth for stream_step in stream_steps)
+        return max(
+            (_stream_step_depth(stream_step) for stream_step in self._stream_steps),
+            default=0,
+        )
 
 
 def wrap_workflow_runner_for_stream_pipeline(
@@ -176,15 +175,21 @@ def _stream_pipeline_steps(
     stream_steps = []
     for initialised_step in steps.values():
         step_instance = getattr(initialised_step, "step", None)
-        is_stream_pipelined = getattr(step_instance, "is_stream_pipelined", None)
-        if callable(is_stream_pipelined) and is_stream_pipelined():
-            get_depth = getattr(step_instance, "stream_pipeline_depth", None)
-            if callable(get_depth):
-                depth = int(get_depth())
-            else:
-                depth = 1
-            if depth > 0:
-                stream_steps.append(
-                    _StreamPipelineStep(step=step_instance, depth=depth)
-                )
+        if _is_stream_pipeline_step(step_instance=step_instance):
+            stream_steps.append(_StreamPipelineStep(step=step_instance))
     return stream_steps
+
+
+def _is_stream_pipeline_step(step_instance: Any) -> bool:
+    is_stream_pipelined = getattr(step_instance, "is_stream_pipelined", None)
+    if callable(is_stream_pipelined) and is_stream_pipelined():
+        return True
+    can_activate_pipeline = getattr(step_instance, "can_activate_stream_pipeline", None)
+    return callable(can_activate_pipeline) and can_activate_pipeline()
+
+
+def _stream_step_depth(stream_step: _StreamPipelineStep) -> int:
+    get_depth = getattr(stream_step.step, "stream_pipeline_depth", None)
+    if not callable(get_depth):
+        return 0
+    return max(0, int(get_depth()))
diff --git a/inference/core/workflows/core_steps/models/roboflow/instance_segmentation/v3.py b/inference/core/workflows/core_steps/models/roboflow/instance_segmentation/v3.py
index f15882a784..59c28084ba 100644
--- a/inference/core/workflows/core_steps/models/roboflow/instance_segmentation/v3.py
+++ b/inference/core/workflows/core_steps/models/roboflow/instance_segmentation/v3.py
@@ -499,6 +499,9 @@ def is_stream_pipelined(self) -> bool:
             and getattr(model, "_pipeline_depth", 1) > 1
         )
 
+    def can_activate_stream_pipeline(self) -> bool:
+        return self._step_execution_mode is StepExecutionMode.LOCAL
+
     def stream_pipeline_depth(self) -> int:
         if not self.is_stream_pipelined():
             return 0
diff --git a/tests/inference/unit_tests/core/interfaces/stream/test_workflows.py b/tests/inference/unit_tests/core/interfaces/stream/test_workflows.py
index dd5f131898..806560c434 100644
--- a/tests/inference/unit_tests/core/interfaces/stream/test_workflows.py
+++ b/tests/inference/unit_tests/core/interfaces/stream/test_workflows.py
@@ -68,6 +68,55 @@ def run(
         return [{"predictions": f"frame-{prediction_frame}"}]
 
 
+class _FakeLateActivatingStep:
+    def __init__(self, active_stream_buffer_depth: int) -> None:
+        self._active_stream_buffer_depth = active_stream_buffer_depth
+        self._stream_buffer_depth = 0
+        self.flush_calls = 0
+
+    def can_activate_stream_pipeline(self) -> bool:
+        return True
+
+    def is_stream_pipelined(self) -> bool:
+        return self._stream_buffer_depth > 0
+
+    def stream_pipeline_depth(self) -> int:
+        return self._stream_buffer_depth
+
+    def activate(self) -> None:
+        self._stream_buffer_depth = self._active_stream_buffer_depth
+
+    def flush_stream_pipeline(self):
+        self.flush_calls += 1
+        return [[{"predictions": "frame-2"}]]
+
+
+class _FakeLateActivatingExecutionEngine:
+    def __init__(self, active_stream_buffer_depth: int) -> None:
+        self.step = _FakeLateActivatingStep(
+            active_stream_buffer_depth=active_stream_buffer_depth
+        )
+        self._engine = SimpleNamespace(
+            _compiled_workflow=SimpleNamespace(
+                steps={"segmentation": SimpleNamespace(step=self.step)}
+            )
+        )
+
+    def run(
+        self,
+        runtime_parameters,
+        fps,
+        serialize_results,
+        _is_preview,
+    ):
+        frame_number = runtime_parameters["image"][0]["video_metadata"].frame_number
+        # Real RF-DETR workflow steps only know whether stream pipelining is
+        # active after the first model request has loaded the concrete model.
+        self.step.activate()
+        prediction_frame = frame_number - self.step.stream_pipeline_depth()
+        return [{"predictions": f"frame-{prediction_frame}"}]
+
+
 class _ImmediateExecutor:
     def submit(self, fn, *args, **kwargs) -> Future:
         future = Future()
@@ -247,6 +296,40 @@ def test_workflow_runner_buffers_frames_until_delayed_prediction_arrives() -> No
     ]
 
 
+def test_workflow_runner_buffers_when_stream_pipeline_activates_after_first_run() -> (
+    None
+):
+    engine = _FakeLateActivatingExecutionEngine(active_stream_buffer_depth=1)
+    workflow_runner = WorkflowRunner(
+        workflows_parameters=None,
+        execution_engine=engine,
+        image_input_name="image",
+        video_metadata_input_name="video_metadata",
+    )
+    runner = wrap_workflow_runner_for_stream_pipeline(
+        workflow_runner=workflow_runner,
+        execution_engine=engine,
+    )
+    assert isinstance(runner, PipelinedWorkflowRunner)
+    assert engine.step.stream_pipeline_depth() == 0
+    frame_1 = _make_frame(1)
+    frame_2 = _make_frame(2)
+
+    first_result = runner([frame_1])
+    second_result = runner([frame_2])
+    flushed_results = runner.flush()
+
+    assert first_result is None
+    assert second_result is not None
+    assert second_result.predictions == [{"predictions": "frame-1"}]
+    assert second_result.video_frames == [frame_1]
+    assert flushed_results is not None
+    assert len(flushed_results) == 1
+    assert flushed_results[0].predictions == [{"predictions": "frame-2"}]
+    assert flushed_results[0].video_frames == [frame_2]
+    assert engine.step.flush_calls == 1
+
+
 def test_instance_segmentation_stream_flush_drains_model_without_rerunning_workflow() -> (
     None
 ):

From 30bd49fb0256fa88fa27848df37722c4e4529713 Mon Sep 17 00:00:00 2001
From: Aseem Saxena <aseem.bits@gmail.com>
Date: Wed, 3 Jun 2026 05:13:32 +0000
Subject: [PATCH 54/76] Clarify RF-DETR TRT async stream handoff

---
 .../rfdetr/rfdetr_instance_segmentation_trt.py       | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/inference_models/inference_models/models/rfdetr/rfdetr_instance_segmentation_trt.py b/inference_models/inference_models/models/rfdetr/rfdetr_instance_segmentation_trt.py
index 1edb4dc053..4ce004b4f7 100644
--- a/inference_models/inference_models/models/rfdetr/rfdetr_instance_segmentation_trt.py
+++ b/inference_models/inference_models/models/rfdetr/rfdetr_instance_segmentation_trt.py
@@ -300,6 +300,9 @@ def forward(
         cache = self._trt_cuda_graph_cache if not disable_cuda_graphs else None
         preproc_event = getattr(self, "_fast_preproc_event", None)
         if preproc_event is not None:
+            # The Triton preprocess fast path runs on a separate CUDA stream.
+            # TensorRT consumes that tensor on `_inference_stream`, so record an
+            # explicit stream dependency instead of synchronizing the host.
             self._inference_stream.wait_event(preproc_event)
             self._fast_preproc_event = None
         with self._lock:
@@ -349,10 +352,16 @@ def forward_async(
                 )
         graph_state = getattr(raw[0], "_trt_graph_state", None)
         if graph_state is None:
+            # Dynamic TensorRT execution does not expose graph-owned output
+            # buffers, so the future must wait for inference completion before
+            # handing outputs to postprocess.
             self._inference_stream.synchronize()
             return _DirectInferenceFuture(self, raw, pre_processing_meta, None, kwargs)
         produce_event = getattr(raw[0], "_trt_produce_event", None)
         if kwargs.get("reuse_trt_graph_outputs", False):
+            # The stream pipeline schedules postprocess before launching the
+            # next graph replay. That ordering lets postprocess read TensorRT's
+            # graph output buffers directly and avoids the DtoD clone below.
             future_kwargs = dict(kwargs)
             future_kwargs["defer_postprocess_sync"] = True
             return _DirectInferenceFuture(
@@ -383,6 +392,9 @@ def forward_async(
         torch.cuda.set_stream(stream)
         try:
             raw0, raw1, raw2 = raw
+            # Non-pipelined async callers may launch the next graph replay
+            # before this future is consumed. Clone into a small ring so the
+            # next TensorRT run can safely reuse its graph output buffers.
             clones[0].copy_(raw0, non_blocking=True)
             clones[1].copy_(raw1, non_blocking=True)
             clones[2].copy_(raw2, non_blocking=True)

From 9582947ca8933be06d65286ee059d30acdf8e6ab Mon Sep 17 00:00:00 2001
From: Aseem Saxena <aseem.bits@gmail.com>
Date: Thu, 4 Jun 2026 19:56:31 +0000
Subject: [PATCH 55/76] Gate RF-DETR opt paths behind disabled defaults

---
 .../interfaces/stream/inference_pipeline.py   | 41 ++++++++++++---
 .../core/models/inference_models_adapters.py  | 28 ++++++++--
 .../roboflow/instance_segmentation/v3.py      |  6 ++-
 .../rfdetr_instance_segmentation_trt.py       | 19 +++++--
 .../core/interfaces/stream/test_workflows.py  | 22 ++++++++
 .../models/test_inference_models_adapters.py  | 51 +++++++++++++++++++
 .../core/utils/test_rle_to_polygon.py         | 42 +++++++++++++--
 7 files changed, 190 insertions(+), 19 deletions(-)

diff --git a/inference/core/interfaces/stream/inference_pipeline.py b/inference/core/interfaces/stream/inference_pipeline.py
index 31dced0c4b..79b04def9f 100644
--- a/inference/core/interfaces/stream/inference_pipeline.py
+++ b/inference/core/interfaces/stream/inference_pipeline.py
@@ -1,3 +1,4 @@
+import os
 from concurrent.futures import Future, ThreadPoolExecutor
 from datetime import datetime
 from enum import Enum
@@ -921,12 +922,29 @@ def _execute_inference(self) -> None:
                 self._watchdog.on_model_inference_started(
                     frames=video_frames,
                 )
-                inference_result = self._on_video_frame(video_frames)
-                self._queue_inference_result(
-                    inference_result=inference_result,
-                    fallback_video_frames=video_frames,
+                predictions = self._on_video_frame(video_frames)
+                if _rfdetr_stream_pipeline_enabled():
+                    self._queue_inference_result(
+                        inference_result=predictions,
+                        fallback_video_frames=video_frames,
+                    )
+                    continue
+                self._watchdog.on_model_prediction_ready(
+                    frames=video_frames,
                 )
-            self._drain_inference_handler()
+                self._predictions_queue.put((predictions, video_frames))
+                send_inference_pipeline_status_update(
+                    severity=UpdateSeverity.DEBUG,
+                    event_type=INFERENCE_COMPLETED_EVENT,
+                    payload={
+                        "frames_ids": [f.frame_id for f in video_frames],
+                        "frames_timestamps": [f.frame_timestamp for f in video_frames],
+                        "sources_id": [f.source_id for f in video_frames],
+                    },
+                    status_update_handlers=self._status_update_handlers,
+                )
+            if _rfdetr_stream_pipeline_enabled():
+                self._drain_inference_handler()
 
         except Exception as error:
             payload = {
@@ -942,7 +960,8 @@ def _execute_inference(self) -> None:
             )
             logger.exception(f"Encountered inference error: {error}")
         finally:
-            self._close_inference_handler()
+            if _rfdetr_stream_pipeline_enabled():
+                self._close_inference_handler()
             self._predictions_queue.put(None)
             send_inference_pipeline_status_update(
                 severity=UpdateSeverity.INFO,
@@ -960,7 +979,8 @@ def _dispatch_inference_results(self) -> None:
                 self._predictions_queue.task_done()
                 break
             predictions, video_frames = inference_results
-            predictions = _resolve_prediction_futures(predictions)
+            if _rfdetr_stream_pipeline_enabled():
+                predictions = _resolve_prediction_futures(predictions)
             if self._on_prediction is not None:
                 self._handle_predictions_dispatching(
                     predictions=predictions,
@@ -1158,3 +1178,10 @@ def _resolve_prediction_futures(value: Any) -> Any:
             key: _resolve_prediction_futures(element) for key, element in value.items()
         }
     return value
+
+
+def _rfdetr_stream_pipeline_enabled() -> bool:
+    try:
+        return int(os.getenv("RFDETR_PIPELINE_DEPTH", "1").strip()) > 1
+    except ValueError:
+        return False
diff --git a/inference/core/models/inference_models_adapters.py b/inference/core/models/inference_models_adapters.py
index 502d031d92..8b385f4c07 100644
--- a/inference/core/models/inference_models_adapters.py
+++ b/inference/core/models/inference_models_adapters.py
@@ -10,6 +10,7 @@
 import numpy as np
 import torch
 from PIL import Image, ImageDraw, ImageFont
+from pycocotools import mask as mask_utils
 
 from inference.core.entities.requests import (
     ClassificationInferenceRequest,
@@ -49,7 +50,7 @@
 from inference.core.models.base import Model
 from inference.core.roboflow_api import get_extra_weights_provider_headers
 from inference.core.utils.image_utils import load_image_bgr, load_image_rgb
-from inference.core.utils.postprocess import bitpacked_masks2poly, masks2poly
+from inference.core.utils.postprocess import bitpacked_masks2poly, mask2poly, masks2poly
 from inference.core.utils.rle_to_polygon import rle_masks_to_polygons
 from inference.core.utils.visualisation import draw_detection_predictions
 from inference.models.aliases import resolve_roboflow_model_alias
@@ -68,7 +69,10 @@
     PreProcessingOverrides,
     SemanticSegmentationModel,
 )
-from inference_models.configuration import get_rfdetr_pipeline_depth
+from inference_models.configuration import (
+    INFERENCE_MODELS_RFDETR_TRITON_POSTPROC_ENABLED,
+    get_rfdetr_pipeline_depth,
+)
 from inference_models.models.base.async_handoff import (
     adapter_gpu_work_submitted,
     attach_adapter_mapped_kwargs,
@@ -631,7 +635,11 @@ def _build_responses_from_detections(
         # dataclasses avoid pydantic validation + `model_dump` overhead per
         # frame. Keep the pydantic path for RLE responses and for non-workflow
         # callers that rely on the response model type.
-        use_dc = kwargs.get("source") == "workflow-execution" and not return_in_rle
+        use_dc = (
+            kwargs.get("source") == "workflow-execution"
+            and not return_in_rle
+            and getattr(self, "_pipeline_depth", 1) > 1
+        )
 
         responses: List[InstanceSegmentationInferenceResponse] = []
         for preproc_metadata, det in zip(preprocess_return_metadata, detections_list):
@@ -911,7 +919,19 @@ def draw_predictions(
 
 
 def rle_masks2poly(masks: InstancesRLEMasks) -> List[np.ndarray]:
-    return rle_masks_to_polygons(masks=masks)
+    if INFERENCE_MODELS_RFDETR_TRITON_POSTPROC_ENABLED:
+        return rle_masks_to_polygons(masks=masks)
+
+    segments = []
+    h, w = masks.image_size
+    for counts in masks.masks:
+        rle_dict = {"size": [h, w], "counts": counts}
+        decoded_rle = np.ascontiguousarray(mask_utils.decode(rle_dict))
+        if not np.any(decoded_rle):
+            segments.append(np.zeros((0, 2), dtype=np.float32))
+            continue
+        segments.append(mask2poly(decoded_rle))
+    return segments
 
 
 class InferenceModelsKeyPointsDetectionAdapter(Model):
diff --git a/inference/core/workflows/core_steps/models/roboflow/instance_segmentation/v3.py b/inference/core/workflows/core_steps/models/roboflow/instance_segmentation/v3.py
index 59c28084ba..6732ecbb11 100644
--- a/inference/core/workflows/core_steps/models/roboflow/instance_segmentation/v3.py
+++ b/inference/core/workflows/core_steps/models/roboflow/instance_segmentation/v3.py
@@ -54,6 +54,7 @@
     WorkflowBlock,
     WorkflowBlockManifest,
 )
+from inference_models.configuration import get_rfdetr_pipeline_depth
 from inference_models.models.base.async_handoff import get_async_response_future
 from inference_sdk import InferenceConfiguration, InferenceHTTPClient
 
@@ -500,7 +501,10 @@ def is_stream_pipelined(self) -> bool:
         )
 
     def can_activate_stream_pipeline(self) -> bool:
-        return self._step_execution_mode is StepExecutionMode.LOCAL
+        return (
+            self._step_execution_mode is StepExecutionMode.LOCAL
+            and get_rfdetr_pipeline_depth() > 1
+        )
 
     def stream_pipeline_depth(self) -> int:
         if not self.is_stream_pipelined():
diff --git a/inference_models/inference_models/models/rfdetr/rfdetr_instance_segmentation_trt.py b/inference_models/inference_models/models/rfdetr/rfdetr_instance_segmentation_trt.py
index 4ce004b4f7..19d3b3ec1a 100644
--- a/inference_models/inference_models/models/rfdetr/rfdetr_instance_segmentation_trt.py
+++ b/inference_models/inference_models/models/rfdetr/rfdetr_instance_segmentation_trt.py
@@ -14,6 +14,7 @@
     DEFAULT_DEVICE,
     INFERENCE_MODELS_RFDETR_DEFAULT_CONFIDENCE,
     INFERENCE_MODELS_RFDETR_TRITON_PREPROC_ENABLED,
+    get_rfdetr_pipeline_depth,
 )
 from inference_models.entities import ColorFormat, Confidence
 from inference_models.errors import (
@@ -231,11 +232,13 @@ def __init__(
         self._trt_cuda_graph_cache = trt_cuda_graph_cache
         self._lock = threading.Lock()
         self._inference_stream = torch.cuda.Stream(device=self._device)
-        self._pre_process_cuda_stream = torch.cuda.Stream(device=self._device)
-        self._post_process_cuda_stream = torch.cuda.Stream(device=self._device)
         self._thread_local_storage = threading.local()
         self.recommended_parameters = recommended_parameters
+        self._stream_pipeline_enabled = get_rfdetr_pipeline_depth() > 1
         self._fast_preprocess_enabled = INFERENCE_MODELS_RFDETR_TRITON_PREPROC_ENABLED
+        if self._stream_pipeline_enabled:
+            self._pre_process_cuda_stream = torch.cuda.Stream(device=self._device)
+            self._post_process_cuda_stream = torch.cuda.Stream(device=self._device)
         if self._fast_preprocess_enabled:
             self._fast_preprocess_runtime = FastPreprocessRuntime(device=self._device)
 
@@ -283,7 +286,7 @@ def pre_process(
                 pre_processing_overrides=pre_processing_overrides,
             )
         self._pre_process_stream.synchronize()
-        if self._fast_preprocess_enabled:
+        if self._stream_pipeline_enabled:
             setattr(
                 pre_processed_images,
                 "_pre_processing_meta",
@@ -493,6 +496,8 @@ def post_process(
 
     @property
     def _pre_process_stream(self) -> torch.cuda.Stream:
+        if self._stream_pipeline_enabled:
+            return self._pre_process_cuda_stream
         if not hasattr(self._thread_local_storage, "pre_process_stream"):
             self._thread_local_storage.pre_process_stream = torch.cuda.Stream(
                 device=self._device
@@ -501,4 +506,10 @@ def _pre_process_stream(self) -> torch.cuda.Stream:
 
     @property
     def _post_process_stream(self) -> torch.cuda.Stream:
-        return self._post_process_cuda_stream
+        if self._stream_pipeline_enabled:
+            return self._post_process_cuda_stream
+        if not hasattr(self._thread_local_storage, "post_process_stream"):
+            self._thread_local_storage.post_process_stream = torch.cuda.Stream(
+                device=self._device
+            )
+        return self._thread_local_storage.post_process_stream
diff --git a/tests/inference/unit_tests/core/interfaces/stream/test_workflows.py b/tests/inference/unit_tests/core/interfaces/stream/test_workflows.py
index 806560c434..e39d759bd7 100644
--- a/tests/inference/unit_tests/core/interfaces/stream/test_workflows.py
+++ b/tests/inference/unit_tests/core/interfaces/stream/test_workflows.py
@@ -330,6 +330,28 @@ def test_workflow_runner_buffers_when_stream_pipeline_activates_after_first_run(
     assert engine.step.flush_calls == 1
 
 
+def test_instance_segmentation_stream_pipeline_activation_requires_depth_above_one(
+    monkeypatch,
+) -> None:
+    block = RoboflowInstanceSegmentationModelBlockV3(
+        model_manager=_FakeModelManager(inference_results=[]),
+        api_key="api-key",
+        step_execution_mode=StepExecutionMode.LOCAL,
+    )
+
+    monkeypatch.setattr(
+        "inference.core.workflows.core_steps.models.roboflow.instance_segmentation.v3.get_rfdetr_pipeline_depth",
+        lambda: 1,
+    )
+    assert block.can_activate_stream_pipeline() is False
+
+    monkeypatch.setattr(
+        "inference.core.workflows.core_steps.models.roboflow.instance_segmentation.v3.get_rfdetr_pipeline_depth",
+        lambda: 2,
+    )
+    assert block.can_activate_stream_pipeline() is True
+
+
 def test_instance_segmentation_stream_flush_drains_model_without_rerunning_workflow() -> (
     None
 ):
diff --git a/tests/inference/unit_tests/core/models/test_inference_models_adapters.py b/tests/inference/unit_tests/core/models/test_inference_models_adapters.py
index c1e88fb958..85691d4bc8 100644
--- a/tests/inference/unit_tests/core/models/test_inference_models_adapters.py
+++ b/tests/inference/unit_tests/core/models/test_inference_models_adapters.py
@@ -7,6 +7,10 @@
 import pytest
 import torch
 
+from inference.core.entities.responses.inference import (
+    InstanceSegmentationInferenceResponse,
+    InstanceSegmentationInferenceResponseDC,
+)
 from inference.core.exceptions import PostProcessingError
 from inference.core.models.inference_models_adapters import (
     InferenceModelsInstanceSegmentationAdapter,
@@ -15,6 +19,7 @@
 )
 from inference_models import (
     ClassificationPrediction,
+    InstanceDetections,
     MultiLabelClassificationPrediction,
 )
 
@@ -120,6 +125,52 @@ def test_pipeline_depth_honors_requested_depth_for_supported_models(
     assert adapter._resolve_pipeline_depth() == 3
 
 
+def test_workflow_response_fast_dataclass_path_is_disabled_at_depth_one() -> None:
+    adapter = object.__new__(InferenceModelsInstanceSegmentationAdapter)
+    adapter._pipeline_depth = 1
+    adapter.class_names = ["car"]
+    metadata = [SimpleNamespace(original_size=SimpleNamespace(width=4, height=4))]
+    detections = [
+        InstanceDetections(
+            xyxy=torch.tensor([[1, 1, 3, 3]], dtype=torch.int32),
+            confidence=torch.tensor([0.9], dtype=torch.float32),
+            class_id=torch.tensor([0], dtype=torch.int32),
+            mask=torch.zeros((1, 4, 4), dtype=torch.uint8),
+        )
+    ]
+
+    responses = adapter._build_responses_from_detections(
+        detections,
+        metadata,
+        source="workflow-execution",
+    )
+
+    assert isinstance(responses[0], InstanceSegmentationInferenceResponse)
+
+
+def test_workflow_response_fast_dataclass_path_is_enabled_above_depth_one() -> None:
+    adapter = object.__new__(InferenceModelsInstanceSegmentationAdapter)
+    adapter._pipeline_depth = 2
+    adapter.class_names = ["car"]
+    metadata = [SimpleNamespace(original_size=SimpleNamespace(width=4, height=4))]
+    detections = [
+        InstanceDetections(
+            xyxy=torch.tensor([[1, 1, 3, 3]], dtype=torch.int32),
+            confidence=torch.tensor([0.9], dtype=torch.float32),
+            class_id=torch.tensor([0], dtype=torch.int32),
+            mask=torch.zeros((1, 4, 4), dtype=torch.uint8),
+        )
+    ]
+
+    responses = adapter._build_responses_from_detections(
+        detections,
+        metadata,
+        source="workflow-execution",
+    )
+
+    assert isinstance(responses[0], InstanceSegmentationInferenceResponseDC)
+
+
 def test_prepare_multi_label_response_uses_class_ids_for_predicted_classes() -> None:
     """The model's `post_process` is the source of truth for which classes
     are "predicted" (it owns the priority chain user → per-class → global
diff --git a/tests/inference/unit_tests/core/utils/test_rle_to_polygon.py b/tests/inference/unit_tests/core/utils/test_rle_to_polygon.py
index 3878ddbdeb..696390a937 100644
--- a/tests/inference/unit_tests/core/utils/test_rle_to_polygon.py
+++ b/tests/inference/unit_tests/core/utils/test_rle_to_polygon.py
@@ -182,13 +182,49 @@ def test_rle_masks_to_polygons_matches_legacy_dense_path_for_lazy_uncompressed_c
         _assert_polygons_exactly_equal(actual=actual, expected=expected)
 
 
-def test_adapter_rle_masks2poly_matches_legacy_dense_path() -> None:
-    from inference.core.models.inference_models_adapters import rle_masks2poly
+def test_adapter_rle_masks2poly_matches_legacy_dense_path(
+    monkeypatch: pytest.MonkeyPatch,
+) -> None:
+    from inference.core.models import inference_models_adapters
+
+    monkeypatch.setattr(
+        inference_models_adapters,
+        "INFERENCE_MODELS_RFDETR_TRITON_POSTPROC_ENABLED",
+        False,
+    )
 
     for mask in _deterministic_masks():
         instances = _to_instances([mask])
 
-        actual = rle_masks2poly(masks=instances)
+        actual = inference_models_adapters.rle_masks2poly(masks=instances)
         expected = _legacy_rle_masks2poly(masks=instances)
 
         _assert_polygons_exactly_equal(actual=actual, expected=expected)
+
+
+def test_adapter_rle_masks2poly_uses_sparse_path_only_with_triton_postproc(
+    monkeypatch: pytest.MonkeyPatch,
+) -> None:
+    from inference.core.models import inference_models_adapters
+
+    instances = _to_instances([np.zeros((8, 8), dtype=np.uint8)])
+    sentinel = [np.array([[1.0, 2.0]], dtype=np.float32)]
+    calls = []
+
+    def fake_sparse_converter(masks):
+        calls.append(masks)
+        return sentinel
+
+    monkeypatch.setattr(
+        inference_models_adapters,
+        "INFERENCE_MODELS_RFDETR_TRITON_POSTPROC_ENABLED",
+        True,
+    )
+    monkeypatch.setattr(
+        inference_models_adapters,
+        "rle_masks_to_polygons",
+        fake_sparse_converter,
+    )
+
+    assert inference_models_adapters.rle_masks2poly(masks=instances) is sentinel
+    assert calls == [instances]

From da00f9a934a6cdcc6c879f51792b8dbc46915f1f Mon Sep 17 00:00:00 2001
From: Aseem Saxena <aseem.bits@gmail.com>
Date: Thu, 4 Jun 2026 21:58:11 +0000
Subject: [PATCH 56/76] Support larger RF-DETR Triton postprocess masks

---
 .../rfdetr_seg_trt_1080_benchmark.md          |  70 ++++++
 .../models/rfdetr/triton_postprocess.py       | 204 +++++++++++++++---
 .../models/rfdetr/test_triton_postprocess.py  |  87 +++++++-
 3 files changed, 334 insertions(+), 27 deletions(-)
 create mode 100644 development/stream_interface/rfdetr_seg_trt_1080_benchmark.md

diff --git a/development/stream_interface/rfdetr_seg_trt_1080_benchmark.md b/development/stream_interface/rfdetr_seg_trt_1080_benchmark.md
new file mode 100644
index 0000000000..17dcac8efa
--- /dev/null
+++ b/development/stream_interface/rfdetr_seg_trt_1080_benchmark.md
@@ -0,0 +1,70 @@
+# RF-DETR Seg TensorRT 1080p Variant Benchmark
+
+This note records the June 4, 2026 check for the largest RF-DETR segmentation
+variant that can run the `vehicles_1080p.mp4` stream workflow at 30 FPS on the
+Jetson Orin NX 8GB target used for PR 2405.
+
+## Context
+
+The public non-nano RF-DETR segmentation TensorRT packages are built for L4/T4,
+so they are not directly loadable on Jetson Orin. For this benchmark, local Orin
+FP16 TensorRT packages were compiled from the public ONNX packages and wired into
+the workflow as untracked local directories.
+
+The Triton sparse RLE postprocess path previously rejected non-nano mask sizes
+because it scanned the source mask with one Triton vector and capped source mask
+area below the `small` model's 96x96 mask. The current patch adds a tiled source
+mask bounds pass and raises the supported sparse path shape limit to RF-DETR Seg
+2XLarge's 192x192 mask with 300 queries and COCO class logits.
+
+## Benchmark Command
+
+Use the stream workflow with the optimization flags enabled:
+
+```bash
+env \
+  PYTHONPATH=/app/helloworld/inference/inference_models:/app/helloworld/inference \
+  USE_INFERENCE_MODELS=True \
+  ALLOW_INFERENCE_MODELS_UNTRUSTED_PACKAGES=True \
+  ALLOW_INFERENCE_MODELS_DIRECTLY_ACCESS_LOCAL_PACKAGES=True \
+  INFERENCE_MODELS_RFDETR_TRITON_POSTPROC_ENABLED=true \
+  INFERENCE_MODELS_RFDETR_TRITON_PREPROC_ENABLED=true \
+  RFDETR_PIPELINE_DEPTH=2 \
+  ENABLE_AUTO_CUDA_GRAPHS_FOR_TRT_BACKEND=true \
+  python development/stream_interface/rfdetr_nano_seg_trt_workflow.py \
+    --video_reference vehicles_1080p.mp4 \
+    --model_id rfdetr-seg-large/1 \
+    --backend trt
+```
+
+Change `--model_id` to the local package alias for each variant. A depth-3
+sanity run was also performed for `xlarge`.
+
+## Results
+
+| Variant | Input size | Pipeline depth | FPS |
+| --- | ---: | ---: | ---: |
+| `rfdetr-seg-small/1` | 384 | 2 | 63.85 |
+| `rfdetr-seg-large/1` | 504 | 2 | 35.49 |
+| `rfdetr-seg-xlarge/1` | 624 | 2 | 20.94 |
+| `rfdetr-seg-xlarge/1` | 624 | 3 | 20.91 |
+| `rfdetr-seg-2xlarge/1` | 768 | 2 | 12.90 |
+
+`large` is the largest tested non-nano RF-DETR Seg variant that clears 30 FPS on
+this 1080p workload with all optimization flags enabled. `xlarge` remains below
+30 FPS even when increasing pipeline depth from 2 to 3.
+
+## Verification
+
+The focused postprocess test suite passed after the 2XLarge shape-limit patch:
+
+```bash
+PYTHONPATH=/app/helloworld/inference/inference_models:/app/helloworld/inference \
+  python -m pytest tests/unit_tests/models/rfdetr/test_triton_postprocess.py
+```
+
+Result:
+
+```text
+24 passed, 23 warnings
+```
diff --git a/inference_models/inference_models/models/rfdetr/triton_postprocess.py b/inference_models/inference_models/models/rfdetr/triton_postprocess.py
index f229ccdd0b..8a81c79db7 100644
--- a/inference_models/inference_models/models/rfdetr/triton_postprocess.py
+++ b/inference_models/inference_models/models/rfdetr/triton_postprocess.py
@@ -61,6 +61,12 @@
 _SPARSE_MAX_TOTAL_RUNS = 8192
 _SPARSE_MAX_CLASSES_PER_QUERY = 4
 _SPARSE_TOPK_MAX_TOTAL_RUNS = _SPARSE_MAX_TOTAL_RUNS * _SPARSE_MAX_CLASSES_PER_QUERY
+# RF-DETR Seg 2XLarge emits 192x192 masks with 300 queries and COCO class
+# logits. The sparse path supports that shape by scanning source-mask support
+# in fixed tiles instead of one giant Triton vector.
+_SPARSE_SOURCE_BOUNDS_BLOCK_PIXELS = 1024
+_MAX_RFDETR_SEG_2XLARGE_MASK_PIXELS = 192 * 192
+_MAX_QUERY_CLASS_PRODUCTS = 65536
 _MAX_INTERPOLATION_WEIGHT_CACHE_ENTRIES = 16
 _INTERPOLATION_WEIGHT_CACHE = OrderedDict()
 _INTERPOLATION_WEIGHT_CACHE_LOCK = Lock()
@@ -191,6 +197,21 @@ def _release_pinned_host_buffer(buffer: torch.Tensor) -> None:
         _PINNED_HOST_POOL_SIZE += 1
 
 
+def _allocate_source_bounds(
+    rows: int,
+    mask_height: int,
+    mask_width: int,
+    device: torch.device,
+) -> torch.Tensor:
+    """Allocate source-mask positive-support bounds for sparse RLE kernels."""
+    bounds = torch.empty((rows, 4), dtype=torch.int32, device=device)
+    bounds[:, 0].fill_(mask_height)
+    bounds[:, 1].fill_(-1)
+    bounds[:, 2].fill_(mask_width)
+    bounds[:, 3].fill_(-1)
+    return bounds
+
+
 def post_process_single_instance_segmentation_result_to_rle_masks_triton(
     image_bboxes: torch.Tensor,
     image_scores: torch.Tensor,
@@ -277,6 +298,12 @@ def post_process_single_instance_segmentation_result_to_rle_masks_triton(
             dtype=torch.int32,
             device=image_scores.device,
         )
+        source_bounds = _allocate_source_bounds(
+            rows=num_queries,
+            mask_height=mask_height,
+            mask_width=mask_width,
+            device=image_scores.device,
+        )
         _select_topk_query_class_metadata_kernel[(num_queries,)](
             image_scores,
             image_bboxes,
@@ -296,10 +323,31 @@ def post_process_single_instance_segmentation_result_to_rle_masks_triton(
             FLAG_WRITE_QUERY_METADATA=True,
             FLAG_OVERFLOW_CLASSES=False,
         )
+        _positive_source_bounds_kernel[
+            (
+                num_queries,
+                triton.cdiv(
+                    mask_height * mask_width,
+                    _SPARSE_SOURCE_BOUNDS_BLOCK_PIXELS,
+                ),
+            )
+        ](
+            image_masks,
+            query_metadata,
+            source_bounds,
+            mask_height,
+            mask_width,
+            image_masks.stride(0),
+            image_masks.stride(1),
+            image_masks.stride(2),
+            BLOCK_PIXELS=_SPARSE_SOURCE_BOUNDS_BLOCK_PIXELS,
+            METADATA_STRIDE=_HEADER_SIZE,
+        )
         _sparse_atomic_rle_from_metadata_kernel[
             (num_queries, triton.cdiv(_SPARSE_MAX_ROI_WIDTH, _SPARSE_BLOCK_COLS))
         ](
             image_masks,
+            source_bounds,
             y_idx,
             y_weight,
             x_idx,
@@ -314,7 +362,6 @@ def post_process_single_instance_segmentation_result_to_rle_masks_triton(
             image_masks.stride(0),
             image_masks.stride(1),
             image_masks.stride(2),
-            BLOCK_MASK=triton.next_power_of_2(mask_height * mask_width),
             BLOCK_OUT_H=triton.next_power_of_2(output_height),
             BLOCK_OUT_W=triton.next_power_of_2(output_width),
             BLOCK_ROI_H=_BLOCK_ROI_H,
@@ -336,7 +383,7 @@ def post_process_single_instance_segmentation_result_to_rle_masks_triton(
         return _deferred_instance_detections_from_sparse_query_records(
             class_metadata_host=class_metadata_host,
             records_host=records_host,
-            keepalive_tensors=(query_metadata, class_metadata, records),
+            keepalive_tensors=(query_metadata, class_metadata, source_bounds, records),
             done_event=done_event,
             outputs_consumed_event=outputs_consumed_event,
             max_total_runs=_SPARSE_MAX_TOTAL_RUNS,
@@ -357,6 +404,12 @@ def post_process_single_instance_segmentation_result_to_rle_masks_triton(
         dtype=torch.int32,
         device=image_scores.device,
     )
+    source_bounds = _allocate_source_bounds(
+        rows=num_queries,
+        mask_height=mask_height,
+        mask_width=mask_width,
+        device=image_scores.device,
+    )
     _select_best_query_metadata_kernel[(num_queries,)](
         image_scores,
         image_bboxes,
@@ -373,10 +426,28 @@ def post_process_single_instance_segmentation_result_to_rle_masks_triton(
         METADATA_STRIDE=_HEADER_SIZE,
         FLAG_MULTICLASS=True,
     )
+    _positive_source_bounds_kernel[
+        (
+            num_queries,
+            triton.cdiv(mask_height * mask_width, _SPARSE_SOURCE_BOUNDS_BLOCK_PIXELS),
+        )
+    ](
+        image_masks,
+        metadata,
+        source_bounds,
+        mask_height,
+        mask_width,
+        image_masks.stride(0),
+        image_masks.stride(1),
+        image_masks.stride(2),
+        BLOCK_PIXELS=_SPARSE_SOURCE_BOUNDS_BLOCK_PIXELS,
+        METADATA_STRIDE=_HEADER_SIZE,
+    )
     _sparse_atomic_rle_from_metadata_kernel[
         (num_queries, triton.cdiv(_SPARSE_MAX_ROI_WIDTH, _SPARSE_BLOCK_COLS))
     ](
         image_masks,
+        source_bounds,
         y_idx,
         y_weight,
         x_idx,
@@ -391,7 +462,6 @@ def post_process_single_instance_segmentation_result_to_rle_masks_triton(
         image_masks.stride(0),
         image_masks.stride(1),
         image_masks.stride(2),
-        BLOCK_MASK=triton.next_power_of_2(mask_height * mask_width),
         BLOCK_OUT_H=triton.next_power_of_2(output_height),
         BLOCK_OUT_W=triton.next_power_of_2(output_width),
         BLOCK_ROI_H=_BLOCK_ROI_H,
@@ -433,6 +503,12 @@ def post_process_single_instance_segmentation_result_to_rle_masks_triton(
         dtype=torch.int32,
         device=image_scores.device,
     )
+    source_bounds = _allocate_source_bounds(
+        rows=topk_metadata_rows,
+        mask_height=mask_height,
+        mask_width=mask_width,
+        device=image_scores.device,
+    )
     _select_topk_query_class_metadata_kernel[(num_queries,)](
         image_scores,
         image_bboxes,
@@ -452,6 +528,23 @@ def post_process_single_instance_segmentation_result_to_rle_masks_triton(
         FLAG_WRITE_QUERY_METADATA=False,
         FLAG_OVERFLOW_CLASSES=True,
     )
+    _positive_source_bounds_kernel[
+        (
+            topk_metadata_rows,
+            triton.cdiv(mask_height * mask_width, _SPARSE_SOURCE_BOUNDS_BLOCK_PIXELS),
+        )
+    ](
+        image_masks,
+        metadata,
+        source_bounds,
+        mask_height,
+        mask_width,
+        image_masks.stride(0),
+        image_masks.stride(1),
+        image_masks.stride(2),
+        BLOCK_PIXELS=_SPARSE_SOURCE_BOUNDS_BLOCK_PIXELS,
+        METADATA_STRIDE=_HEADER_SIZE,
+    )
     _sparse_atomic_rle_from_metadata_kernel[
         (
             topk_metadata_rows,
@@ -459,6 +552,7 @@ def post_process_single_instance_segmentation_result_to_rle_masks_triton(
         )
     ](
         image_masks,
+        source_bounds,
         y_idx,
         y_weight,
         x_idx,
@@ -473,7 +567,6 @@ def post_process_single_instance_segmentation_result_to_rle_masks_triton(
         image_masks.stride(0),
         image_masks.stride(1),
         image_masks.stride(2),
-        BLOCK_MASK=triton.next_power_of_2(mask_height * mask_width),
         BLOCK_OUT_H=triton.next_power_of_2(output_height),
         BLOCK_OUT_W=triton.next_power_of_2(output_width),
         BLOCK_ROI_H=_BLOCK_ROI_H,
@@ -814,8 +907,8 @@ def _unsupported_triton_postprocess_reason(
     output_height = image_meta.original_size.height
     output_width = image_meta.original_size.width
     if (
-        num_queries * num_classes > 16384
-        or mask_height * mask_width > 8192
+        num_queries * num_classes > _MAX_QUERY_CLASS_PRODUCTS
+        or mask_height * mask_width > _MAX_RFDETR_SEG_2XLARGE_MASK_PIXELS
         or output_height <= 0
         or output_width <= 0
         or output_height > 4096
@@ -1274,9 +1367,80 @@ def _select_topk_query_class_metadata_kernel(
                 tl.store(query_metadata + query_meta_base + 15, 0.0)
             work_scores = tl.where(class_offsets == selected_class, -1.0, work_scores)
 
+    @triton.jit
+    def _positive_source_bounds_kernel(
+        masks,
+        metadata,
+        source_bounds,
+        mask_height: tl.constexpr,
+        mask_width: tl.constexpr,
+        mask_stride_q: tl.constexpr,
+        mask_stride_h: tl.constexpr,
+        mask_stride_w: tl.constexpr,
+        BLOCK_PIXELS: tl.constexpr,
+        METADATA_STRIDE: tl.constexpr,
+    ):
+        """Compute positive source-mask support bounds for one metadata row.
+
+        The older sparse RLE kernel scanned the whole source mask in a single
+        Triton vector to derive this support. RF-DETR Seg 2XLarge has 192x192
+        masks, so this helper scans the source mask in fixed-size tiles and
+        atomically reduces into ``source_bounds``.
+        """
+        rank = tl.program_id(0)
+        tile = tl.program_id(1)
+        meta_base = rank * METADATA_STRIDE
+        is_valid_detection = tl.load(metadata + meta_base + 0) > 0.5
+        query_index = tl.load(metadata + meta_base + 9).to(tl.int32)
+
+        pixel_offsets = tile * BLOCK_PIXELS + tl.arange(0, BLOCK_PIXELS)
+        mask_active = pixel_offsets < (mask_height * mask_width)
+        source_y = pixel_offsets // mask_width
+        source_x = pixel_offsets - source_y * mask_width
+        mask_values = tl.load(
+            masks
+            + query_index * mask_stride_q
+            + source_y * mask_stride_h
+            + source_x * mask_stride_w,
+            mask=is_valid_detection & mask_active,
+            other=-1.0,
+        )
+        positive_source = is_valid_detection & mask_active & (mask_values > 0.0)
+        local_y_min = tl.min(tl.where(positive_source, source_y, mask_height), axis=0)
+        local_y_max = tl.max(tl.where(positive_source, source_y, -1), axis=0)
+        local_x_min = tl.min(tl.where(positive_source, source_x, mask_width), axis=0)
+        local_x_max = tl.max(tl.where(positive_source, source_x, -1), axis=0)
+        has_positive_source = local_y_max >= 0
+        bounds_base = rank * 4
+        tl.atomic_min(
+            source_bounds + bounds_base + 0,
+            local_y_min,
+            sem="relaxed",
+            mask=has_positive_source,
+        )
+        tl.atomic_max(
+            source_bounds + bounds_base + 1,
+            local_y_max,
+            sem="relaxed",
+            mask=has_positive_source,
+        )
+        tl.atomic_min(
+            source_bounds + bounds_base + 2,
+            local_x_min,
+            sem="relaxed",
+            mask=has_positive_source,
+        )
+        tl.atomic_max(
+            source_bounds + bounds_base + 3,
+            local_x_max,
+            sem="relaxed",
+            mask=has_positive_source,
+        )
+
     @triton.jit
     def _sparse_atomic_rle_from_metadata_kernel(
         masks,
+        source_bounds,
         y_idx,
         y_weight,
         x_idx,
@@ -1291,7 +1455,6 @@ def _sparse_atomic_rle_from_metadata_kernel(
         mask_stride_q: tl.constexpr,
         mask_stride_h: tl.constexpr,
         mask_stride_w: tl.constexpr,
-        BLOCK_MASK: tl.constexpr,
         BLOCK_OUT_H: tl.constexpr,
         BLOCK_OUT_W: tl.constexpr,
         BLOCK_ROI_H: tl.constexpr,
@@ -1331,6 +1494,9 @@ def _sparse_atomic_rle_from_metadata_kernel(
                 ``0`` active flag and column ``9`` source query id. It writes
                 columns ``11:15`` with ``roi_y_start, roi_y_end, roi_x_start,
                 roi_x_end`` for diagnostics.
+            source_bounds: CUDA int32 tensor with shape ``[metadata_rows, 4]``
+                containing ``source_y_min, source_y_max, source_x_min,
+                source_x_max`` for positive source logits in the selected mask.
             records: CUDA int32 tensor with shape ``[MAX_TOTAL_RUNS + 1, 3]``.
                 ``records[0, 0]`` is atomically incremented for every emitted
                 run and ``records[0, 1]`` is set when capacity is exceeded.
@@ -1344,8 +1510,6 @@ def _sparse_atomic_rle_from_metadata_kernel(
             mask_stride_q: Stride between query masks in ``masks``.
             mask_stride_h: Row stride for ``masks``.
             mask_stride_w: Column stride for ``masks``.
-            BLOCK_MASK: Power-of-two tile covering ``mask_height * mask_width``
-                so the kernel can find positive source support in one vector.
             BLOCK_OUT_H: Power-of-two tile covering all output rows.
             BLOCK_OUT_W: Power-of-two tile covering all output columns.
             BLOCK_ROI_H: Number of output rows scanned per inner row tile.
@@ -1366,26 +1530,14 @@ def _sparse_atomic_rle_from_metadata_kernel(
         if not is_valid_detection:
             return
 
-        mask_offsets = tl.arange(0, BLOCK_MASK)
-        mask_active = mask_offsets < (mask_height * mask_width)
-        source_y = mask_offsets // mask_width
-        source_x = mask_offsets - source_y * mask_width
-        mask_values = tl.load(
-            masks
-            + query_index * mask_stride_q
-            + source_y * mask_stride_h
-            + source_x * mask_stride_w,
-            mask=mask_active,
-            other=-1.0,
-        )
-        positive_source = mask_active & (mask_values > 0.0)
         # Any output pixel depending only on non-positive source pixels cannot
         # cross the >0 threshold, so derive the minimal candidate ROI from the
         # positive source support plus a one-pixel interpolation halo.
-        source_y_min = tl.min(tl.where(positive_source, source_y, mask_height), axis=0)
-        source_y_max = tl.max(tl.where(positive_source, source_y, -1), axis=0)
-        source_x_min = tl.min(tl.where(positive_source, source_x, mask_width), axis=0)
-        source_x_max = tl.max(tl.where(positive_source, source_x, -1), axis=0)
+        bounds_base = rank * 4
+        source_y_min = tl.load(source_bounds + bounds_base + 0)
+        source_y_max = tl.load(source_bounds + bounds_base + 1)
+        source_x_min = tl.load(source_bounds + bounds_base + 2)
+        source_x_max = tl.load(source_bounds + bounds_base + 3)
         has_positive_source = source_y_max >= 0
         if not has_positive_source:
             return
diff --git a/inference_models/tests/unit_tests/models/rfdetr/test_triton_postprocess.py b/inference_models/tests/unit_tests/models/rfdetr/test_triton_postprocess.py
index 282a5c0ae7..c64cd686e6 100644
--- a/inference_models/tests/unit_tests/models/rfdetr/test_triton_postprocess.py
+++ b/inference_models/tests/unit_tests/models/rfdetr/test_triton_postprocess.py
@@ -343,7 +343,7 @@ def test_rfdetr_triton_postproc_unsupported_reason_matrix(
     elif case == "class_mapping_too_small":
         kwargs["classes_re_mapping"] = _class_mapping(torch.device("cpu"), 1)
     elif case == "input_size_exceeds_limits":
-        kwargs = _support_kwargs(num_queries=129, num_classes=128)
+        kwargs = _support_kwargs(mask_size=(193, 193))
     elif case == "padding":
         kwargs["image_meta"] = _metadata(padding=(1, 0, 0, 0))
     elif case == "static_crop":
@@ -357,6 +357,37 @@ def test_rfdetr_triton_postproc_unsupported_reason_matrix(
     assert not _supports_triton_postprocess_path(**kwargs)
 
 
+def test_rfdetr_triton_postproc_accepts_2xlarge_shape_limits(monkeypatch) -> None:
+    monkeypatch.setattr(triton_postprocess, "triton", object())
+    device = torch.device("cpu")
+    num_queries = 300
+    num_classes = 91
+    kwargs = {
+        "image_bboxes": torch.empty(
+            (num_queries, 4),
+            dtype=torch.float32,
+            device=device,
+        ),
+        "image_scores": torch.empty(
+            (num_queries, num_classes),
+            dtype=torch.float32,
+            device=device,
+        ),
+        "image_masks": torch.empty(
+            (num_queries, 192, 192),
+            dtype=torch.float32,
+            device=device,
+        ),
+        "image_meta": _metadata(height=1080, width=1920),
+        "threshold": 0.4,
+        "classes_re_mapping": _class_mapping(device, num_classes=num_classes),
+    }
+
+    reason = _unsupported_triton_postprocess_reason(**kwargs)
+
+    assert reason == "cuda_device_required"
+
+
 @pytest.mark.parametrize("case", ["no_class_mapping", "tensor_threshold", "padding"])
 def test_rfdetr_triton_postproc_unsupported_cases_use_reference_path(
     monkeypatch,
@@ -546,6 +577,60 @@ def test_rfdetr_triton_postproc_matches_reference_rle_path() -> None:
     _assert_detections_equal(actual, expected)
 
 
+@pytest.mark.skipif(
+    not torch.cuda.is_available() or triton_postprocess.triton is None,
+    reason="CUDA and Triton are required",
+)
+def test_rfdetr_triton_postproc_matches_reference_with_large_source_mask() -> None:
+    cpu = torch.device("cpu")
+    cuda = torch.device("cuda")
+    bboxes_cpu = torch.tensor(
+        [
+            [0.50, 0.50, 0.50, 0.50],
+            [0.25, 0.25, 0.20, 0.20],
+        ],
+        dtype=torch.float32,
+        device=cpu,
+    )
+    logits_cpu = torch.tensor(
+        [
+            [4.0, -4.0],
+            [-4.0, -4.0],
+        ],
+        dtype=torch.float32,
+        device=cpu,
+    )
+    masks_cpu = torch.full((2, 96, 96), -2.0, dtype=torch.float32, device=cpu)
+    masks_cpu[0, 24:72, 24:72] = 2.0
+    scores_cpu = torch.sigmoid(logits_cpu)
+    metadata = _metadata(height=128, width=128)
+    expected = _post_process_single_instance_segmentation_result_to_rle_masks(
+        image_bboxes=bboxes_cpu,
+        image_logits=scores_cpu,
+        image_masks=masks_cpu,
+        image_meta=metadata,
+        threshold=0.4,
+        num_classes=2,
+        classes_re_mapping=_class_mapping(cpu),
+    )
+    cuda_kwargs = {
+        "image_bboxes": bboxes_cpu.to(cuda),
+        "image_scores": scores_cpu.to(cuda),
+        "image_masks": masks_cpu.to(cuda),
+        "image_meta": metadata,
+        "threshold": 0.4,
+        "classes_re_mapping": _class_mapping(cuda),
+    }
+
+    assert _unsupported_triton_postprocess_reason(**cuda_kwargs) is None
+    actual = post_process_single_instance_segmentation_result_to_rle_masks_triton(
+        **cuda_kwargs
+    )
+
+    assert actual is not None
+    _assert_detections_equal(actual, expected)
+
+
 @pytest.mark.skipif(
     not torch.cuda.is_available() or triton_postprocess.triton is None,
     reason="CUDA and Triton are required",

From 16e26e1491809c7bb4a01720b9ad05c490e34d3e Mon Sep 17 00:00:00 2001
From: Aseem Saxena <aseem.bits@gmail.com>
Date: Thu, 4 Jun 2026 22:55:04 +0000
Subject: [PATCH 57/76] Fix RF-DETR preprocess buffer handoff

---
 .../inference_models/models/common/trt.py     | 14 +++++
 .../rfdetr/triton_preprocess_runtime.py       | 53 ++++++++++++-------
 2 files changed, 49 insertions(+), 18 deletions(-)

diff --git a/inference_models/inference_models/models/common/trt.py b/inference_models/inference_models/models/common/trt.py
index 0c2f730b1c..d01d33690a 100644
--- a/inference_models/inference_models/models/common/trt.py
+++ b/inference_models/inference_models/models/common/trt.py
@@ -732,7 +732,21 @@ def _execute_trt_engine(
                     != pre_processed_images.data_ptr()
                 ):
                     trt_cuda_graph_state.input_buffer.copy_(pre_processed_images)
+                    input_consumed_event = torch.cuda.Event()
+                    input_consumed_event.record(stream)
+                    pre_processed_images._trt_consumed_event = (  # type: ignore[attr-defined]
+                        input_consumed_event
+                    )
                 trt_cuda_graph_state.cuda_graph.replay()
+                if (
+                    trt_cuda_graph_state.input_buffer.data_ptr()
+                    == pre_processed_images.data_ptr()
+                ):
+                    input_consumed_event = torch.cuda.Event()
+                    input_consumed_event.record(stream)
+                    pre_processed_images._trt_consumed_event = (  # type: ignore[attr-defined]
+                        input_consumed_event
+                    )
                 if synchronize:
                     results = [
                         buf.clone() for buf in trt_cuda_graph_state.output_buffers
diff --git a/inference_models/inference_models/models/rfdetr/triton_preprocess_runtime.py b/inference_models/inference_models/models/rfdetr/triton_preprocess_runtime.py
index 7e72c28cad..baf4c7de12 100644
--- a/inference_models/inference_models/models/rfdetr/triton_preprocess_runtime.py
+++ b/inference_models/inference_models/models/rfdetr/triton_preprocess_runtime.py
@@ -76,10 +76,10 @@ class FastPreprocessState:
     size used by the Triton kernels.
 
     Attributes:
-        pinned_host: Pinned CPU HWC uint8 staging tensor. The incoming numpy
+        pinned_hosts: Ring of pinned CPU HWC uint8 staging tensors. The incoming numpy
             image is copied here first so the following host-to-device copy can
             be submitted as ``non_blocking=True`` on the preprocessing stream.
-        src_gpu: CUDA HWC uint8 tensor consumed by the horizontal Triton kernel.
+        src_gpus: Ring of CUDA HWC uint8 tensors consumed by the horizontal Triton kernel.
         out_buffers: Ring of CUDA fp32 ``(1, 3, target_h, target_w)`` outputs.
             The returned tensor can still be owned by TensorRT or response
             finalization while Python prepares the next frame, so the ring avoids
@@ -99,8 +99,8 @@ class FastPreprocessState:
         "src_w",
         "target_h",
         "target_w",
-        "pinned_host",
-        "src_gpu",
+        "pinned_hosts",
+        "src_gpus",
         "out_buffers",
         "tmp_buffers",
         "out_buffer_index",
@@ -114,8 +114,8 @@ def __init__(
         src_w: int,
         target_h: int,
         target_w: int,
-        pinned_host: torch.Tensor,
-        src_gpu: torch.Tensor,
+        pinned_hosts: List[torch.Tensor],
+        src_gpus: List[torch.Tensor],
         out_buffers: List[torch.Tensor],
         tmp_buffers: List[torch.Tensor],
         tables: ResampleTables,
@@ -125,8 +125,8 @@ def __init__(
         self.src_w = src_w
         self.target_h = target_h
         self.target_w = target_w
-        self.pinned_host = pinned_host
-        self.src_gpu = src_gpu
+        self.pinned_hosts = pinned_hosts
+        self.src_gpus = src_gpus
         self.out_buffers = out_buffers
         self.tmp_buffers = tmp_buffers
         self.out_buffer_index = 0
@@ -143,8 +143,14 @@ def build(
         device: torch.device,
     ) -> "FastPreprocessState":
         """Allocate shape-specific buffers and build GPU resample tables."""
-        pinned_host = torch.empty((src_h, src_w, 3), dtype=torch.uint8, pin_memory=True)
-        src_gpu = torch.empty((src_h, src_w, 3), dtype=torch.uint8, device=device)
+        pinned_hosts = [
+            torch.empty((src_h, src_w, 3), dtype=torch.uint8, pin_memory=True)
+            for _ in range(_BUFFER_RING_SIZE)
+        ]
+        src_gpus = [
+            torch.empty((src_h, src_w, 3), dtype=torch.uint8, device=device)
+            for _ in range(_BUFFER_RING_SIZE)
+        ]
         out_buffers = [
             torch.empty((1, 3, target_h, target_w), dtype=torch.float32, device=device)
             for _ in range(_BUFFER_RING_SIZE)
@@ -165,8 +171,8 @@ def build(
             src_w=src_w,
             target_h=target_h,
             target_w=target_w,
-            pinned_host=pinned_host,
-            src_gpu=src_gpu,
+            pinned_hosts=pinned_hosts,
+            src_gpus=src_gpus,
             out_buffers=out_buffers,
             tmp_buffers=tmp_buffers,
             tables=tables,
@@ -182,13 +188,17 @@ def is_stale(self, src_h: int, src_w: int, target_h: int, target_w: int) -> bool
             or self.target_w != target_w
         )
 
-    def next_buffers(self) -> Tuple[torch.Tensor, torch.Tensor]:
+    def next_buffers(
+        self,
+    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
         """Return the next output/scratch pair from the ring."""
         idx = self.out_buffer_index
+        pinned_host = self.pinned_hosts[idx]
+        src_gpu = self.src_gpus[idx]
         out = self.out_buffers[idx]
         tmp = self.tmp_buffers[idx]
         self.out_buffer_index = (idx + 1) % len(self.out_buffers)
-        return out, tmp
+        return pinned_host, src_gpu, out, tmp
 
 
 class FastPreprocessRuntime:
@@ -288,13 +298,19 @@ def try_preprocess(
             )
             self._state = state
 
-        np.copyto(state.pinned_host.numpy(), candidate, casting="no")
-        out_buffer, tmp_buffer = state.next_buffers()
+        pinned_host, src_gpu, out_buffer, tmp_buffer = state.next_buffers()
+        preproc_ready_event = getattr(pinned_host, "_preproc_ready_event", None)
+        if preproc_ready_event is not None:
+            preproc_ready_event.synchronize()
+        np.copyto(pinned_host.numpy(), candidate, casting="no")
 
         with torch.cuda.stream(stream):
-            state.src_gpu.copy_(state.pinned_host, non_blocking=True)
+            trt_consumed_event = getattr(out_buffer, "_trt_consumed_event", None)
+            if trt_consumed_event is not None:
+                stream.wait_event(trt_consumed_event)
+            src_gpu.copy_(pinned_host, non_blocking=True)
             triton_preprocess_rfdetr_stretch_two_pass_preallocated(
-                src=state.src_gpu,
+                src=src_gpu,
                 out=out_buffer,
                 tmp=tmp_buffer,
                 tables=state.tables,
@@ -308,6 +324,7 @@ def try_preprocess(
             ready_event = torch.cuda.Event()
             ready_event.record(stream)
             out_buffer._trt_ready_event = ready_event  # type: ignore[attr-defined]
+            pinned_host._preproc_ready_event = ready_event  # type: ignore[attr-defined]
             out_buffer.record_stream(stream)
 
         metadata = [

From 61060cf3886fa4e670adcef20bdc630ae5cb6009 Mon Sep 17 00:00:00 2001
From: Aseem Saxena <aseem.bits@gmail.com>
Date: Thu, 4 Jun 2026 22:57:20 +0000
Subject: [PATCH 58/76] Remove orphan RF-DETR postprocess block

---
 .../inference_models/models/rfdetr/common.py  | 27 ++-----------------
 1 file changed, 2 insertions(+), 25 deletions(-)

diff --git a/inference_models/inference_models/models/rfdetr/common.py b/inference_models/inference_models/models/rfdetr/common.py
index 8b0b3a2e9c..774aee5ce2 100644
--- a/inference_models/inference_models/models/rfdetr/common.py
+++ b/inference_models/inference_models/models/rfdetr/common.py
@@ -1,13 +1,12 @@
-from typing import List, Optional, Union
+from typing import List, Optional, Tuple, Union
 
 import torch
 from torchvision.transforms import functional
 
-from inference_models import Detections, InstanceDetections, InstancesRLEMasks
+from inference_models import Detections, InstanceDetections, InstancesRLEMasks, KeyPoints
 from inference_models.configuration import (
     INFERENCE_MODELS_RFDETR_TRITON_POSTPROC_ENABLED,
 )
-from inference_models import Detections, InstanceDetections, InstancesRLEMasks, KeyPoints
 from inference_models.entities import ImageDimensions
 from inference_models.errors import CorruptedModelPackageError
 from inference_models.models.common.roboflow.model_packages import (
@@ -410,28 +409,6 @@ def post_process_instance_segmentation_results_to_rle_masks(
             pre_processing_meta,
         )
     ]
-        if len(aligned_boxes) > 0:
-            aligned_boxes_tensor = torch.stack(aligned_boxes, dim=0)
-            final_results.append(
-                InstanceDetections(
-                    xyxy=aligned_boxes_tensor.round().int(),
-                    confidence=confidence,
-                    class_id=top_classes.int(),
-                    mask=instances_masks,
-                )
-            )
-        else:
-            final_results.append(
-                InstanceDetections(
-                    xyxy=torch.empty(
-                        (0, 4), dtype=torch.int32, device=image_bboxes.device
-                    ),
-                    class_id=top_classes.int(),
-                    confidence=confidence,
-                    mask=instances_masks,
-                )
-            )
-    return final_results
 
 
 def cxcywh_to_xyxy(boxes):

From 1b0233e377caff5264b0197d96f439a42a096b28 Mon Sep 17 00:00:00 2001
From: Sergii Bondariev <sergii@roboflow.com>
Date: Thu, 4 Jun 2026 23:43:45 -0700
Subject: [PATCH 59/76] Add integration test for RF-DETR keypoints preview onnx
 (#2416)

* add changelog for 0.29.0

* add integration test for rfdetr keypoints preview onnx
---
 inference_models/docs/changelog.md            |   4 +
 .../tests/integration_tests/conftest.py       |  22 ++
 .../integration_tests/models/conftest.py      |   9 +
 ...tr_keypoints_detection_predictions_onnx.py | 199 ++++++++++++++++++
 4 files changed, 234 insertions(+)
 create mode 100644 inference_models/tests/integration_tests/models/test_rfdetr_keypoints_detection_predictions_onnx.py

diff --git a/inference_models/docs/changelog.md b/inference_models/docs/changelog.md
index 19905bd793..4487819615 100644
--- a/inference_models/docs/changelog.md
+++ b/inference_models/docs/changelog.md
@@ -1,5 +1,9 @@
 # Changelog
 
+## `0.29.0`
+
+- Added RF-DETR preview keypoint support (ONNX backend).
+
 ## `0.28.7`
 
 - Added YOLO26 semantic segmentation support (ONNX, TorchScript, and TensorRT backends).
diff --git a/inference_models/tests/integration_tests/conftest.py b/inference_models/tests/integration_tests/conftest.py
index 8411a89d04..451b6dbc4a 100644
--- a/inference_models/tests/integration_tests/conftest.py
+++ b/inference_models/tests/integration_tests/conftest.py
@@ -72,6 +72,10 @@
     "https://storage.googleapis.com/roboflow-tests-assets/test-images/basketball.jpg"
 )
 BASKETBALL_IMAGE_PATH = os.path.join(ASSETS_DIR, "basketball.jpg")
+GLUE_STICKS_IMAGE_URL = (
+    "https://storage.googleapis.com/roboflow-tests-assets/test-images/glue-sticks.jpg"
+)
+GLUE_STICKS_IMAGE_PATH = os.path.join(ASSETS_DIR, "glue-sticks.jpg")
 
 
 def _download_if_not_exists(file_path: str, url: str, lock_timeout: int = 180) -> None:
@@ -284,6 +288,24 @@ def people_walking_image_torch() -> torch.Tensor:
     return torchvision.io.read_image(PEOPLE_WALKING_IMAGE_PATH)
 
 
+@pytest.fixture(scope="function")
+def glue_sticks_image_numpy() -> np.ndarray:
+    _download_if_not_exists(
+        file_path=GLUE_STICKS_IMAGE_PATH, url=GLUE_STICKS_IMAGE_URL
+    )
+    image = cv2.imread(GLUE_STICKS_IMAGE_PATH)
+    assert image is not None, "Could not load test image"
+    return image
+
+
+@pytest.fixture(scope="function")
+def glue_sticks_image_torch() -> torch.Tensor:
+    _download_if_not_exists(
+        file_path=GLUE_STICKS_IMAGE_PATH, url=GLUE_STICKS_IMAGE_URL
+    )
+    return torchvision.io.read_image(GLUE_STICKS_IMAGE_PATH)
+
+
 @pytest.fixture(scope="function")
 def snake_image_numpy() -> np.ndarray:
     _download_if_not_exists(file_path=SNAKE_IMAGE_PATH, url=SNAKE_IMAGE_URL)
diff --git a/inference_models/tests/integration_tests/models/conftest.py b/inference_models/tests/integration_tests/models/conftest.py
index 747810281b..f782ac7bec 100644
--- a/inference_models/tests/integration_tests/models/conftest.py
+++ b/inference_models/tests/integration_tests/models/conftest.py
@@ -41,6 +41,7 @@
 COIN_COUNTING_RFDETR_NANO_TORCH_STATIC_CROP_CENTER_CROP_URL = "https://storage.googleapis.com/roboflow-tests-assets/rf-platform-models/rfdetr-nano-torch-static-crop-center-crop-640.zip"
 COIN_COUNTING_RFDETR_NANO_ONNX_STATIC_BS_NONSQUARE_LETTERBOX_URL = "https://storage.googleapis.com/roboflow-tests-assets/rf-platform-models/rfdetr-nano-onnx-static-bs-nonsquare-letterbox.zip"
 COIN_COUNTING_RFDETR_NANO_TORCH_STATIC_BS_NONSQUARE_LETTERBOX_URL = "https://storage.googleapis.com/roboflow-tests-assets/rf-platform-models/rfdetr-nano-torch-static-bs-nonsquare-letterbox.zip"
+RFDETR_KP_PREVIEW_ONNX_GLUE_STICKS_URL = "https://storage.googleapis.com/roboflow-tests-assets/rf-platform-models/rfdetr-kp-preview-onnx-glue-sticks.zip"
 
 OG_RFDETR_WEIGHTS_URL = "https://storage.googleapis.com/rfdetr/rf-detr-base-coco.pth"
 
@@ -428,6 +429,14 @@ def coin_counting_rfdetr_nano_torch_cs_stretch_package() -> str:
     )
 
 
+@pytest.fixture(scope="module")
+def rfdetr_kp_preview_onnx_glue_sticks_package() -> str:
+    return download_model_package(
+        model_package_zip_url=RFDETR_KP_PREVIEW_ONNX_GLUE_STICKS_URL,
+        package_name="rfdetr-kp-preview-onnx-glue-sticks",
+    )
+
+
 @pytest.fixture(scope="module")
 def coin_counting_rfdetr_nano_onnx_cs_stretch_package() -> str:
     return download_model_package(
diff --git a/inference_models/tests/integration_tests/models/test_rfdetr_keypoints_detection_predictions_onnx.py b/inference_models/tests/integration_tests/models/test_rfdetr_keypoints_detection_predictions_onnx.py
new file mode 100644
index 0000000000..7761026a51
--- /dev/null
+++ b/inference_models/tests/integration_tests/models/test_rfdetr_keypoints_detection_predictions_onnx.py
@@ -0,0 +1,199 @@
+import numpy as np
+import pytest
+import torch
+
+
+@pytest.mark.slow
+@pytest.mark.onnx_extras
+def test_rfdetr_keypoints_onnx_glue_sticks_numpy(
+    rfdetr_kp_preview_onnx_glue_sticks_package: str,
+    glue_sticks_image_numpy: np.ndarray,
+) -> None:
+    # given
+    from inference_models.models.rfdetr.rfdetr_key_points_detection_onnx import (
+        RFDetrForKeyPointsONNX,
+    )
+
+    model = RFDetrForKeyPointsONNX.from_pretrained(
+        model_name_or_path=rfdetr_kp_preview_onnx_glue_sticks_package,
+        onnx_execution_providers=["CUDAExecutionProvider", "CPUExecutionProvider"],
+    )
+
+    # when
+    key_points_list, detections_list = model(glue_sticks_image_numpy, confidence=0.5)
+
+    # then
+    assert len(key_points_list) == 1
+    key_points = key_points_list[0]
+    assert torch.allclose(
+        key_points.xy.cpu().to(torch.int32),
+        torch.tensor(
+            [
+                [[860, 166], [671, 420]],
+                [[1278, 657], [895, 532]],
+            ],
+            dtype=torch.int32,
+        ),
+        atol=2,
+    )
+    assert torch.allclose(
+        key_points.confidence.cpu(),
+        torch.tensor(
+            [
+                [0.9899, 0.9807],
+                [0.9873, 0.9876],
+            ]
+        ),
+        atol=0.01,
+    )
+    assert key_points.class_id.cpu().tolist() == [0, 0]
+
+    assert detections_list is not None
+    detections = detections_list[0]
+    assert torch.allclose(
+        detections.xyxy.cpu().to(torch.int32),
+        torch.tensor(
+            [
+                [625, 124, 909, 441],
+                [875, 497, 1286, 680],
+            ],
+            dtype=torch.int32,
+        ),
+        atol=2,
+    )
+    assert torch.allclose(
+        detections.confidence.cpu(),
+        torch.tensor([0.6288, 0.6260]),
+        atol=0.01,
+    )
+    assert detections.class_id.cpu().tolist() == [0, 0]
+
+
+@pytest.mark.slow
+@pytest.mark.onnx_extras
+def test_rfdetr_keypoints_onnx_glue_sticks_batch_numpy(
+    rfdetr_kp_preview_onnx_glue_sticks_package: str,
+    glue_sticks_image_numpy: np.ndarray,
+) -> None:
+    # given
+    from inference_models.models.rfdetr.rfdetr_key_points_detection_onnx import (
+        RFDetrForKeyPointsONNX,
+    )
+
+    model = RFDetrForKeyPointsONNX.from_pretrained(
+        model_name_or_path=rfdetr_kp_preview_onnx_glue_sticks_package,
+        onnx_execution_providers=["CUDAExecutionProvider", "CPUExecutionProvider"],
+    )
+
+    # when
+    key_points_list, detections_list = model(
+        [glue_sticks_image_numpy, glue_sticks_image_numpy], confidence=0.5
+    )
+
+    # then
+    assert len(key_points_list) == 2
+    assert detections_list is not None
+    assert len(detections_list) == 2
+
+    expected_kp_xy = torch.tensor(
+        [
+            [[860, 166], [671, 420]],
+            [[1278, 657], [895, 532]],
+        ],
+        dtype=torch.int32,
+    )
+    expected_kp_confidence = torch.tensor(
+        [
+            [0.9899, 0.9807],
+            [0.9873, 0.9876],
+        ]
+    )
+    expected_xyxy = torch.tensor(
+        [
+            [625, 124, 909, 441],
+            [875, 497, 1286, 680],
+        ],
+        dtype=torch.int32,
+    )
+    for key_points in key_points_list:
+        assert torch.allclose(
+            key_points.xy.cpu().to(torch.int32), expected_kp_xy, atol=2
+        )
+        assert torch.allclose(
+            key_points.confidence.cpu(), expected_kp_confidence, atol=0.01
+        )
+        assert key_points.class_id.cpu().tolist() == [0, 0]
+    for detections in detections_list:
+        assert torch.allclose(
+            detections.xyxy.cpu().to(torch.int32), expected_xyxy, atol=2
+        )
+        assert torch.allclose(
+            detections.confidence.cpu(), torch.tensor([0.6288, 0.6260]), atol=0.01
+        )
+        assert detections.class_id.cpu().tolist() == [0, 0]
+
+
+@pytest.mark.slow
+@pytest.mark.onnx_extras
+def test_rfdetr_keypoints_onnx_glue_sticks_torch(
+    rfdetr_kp_preview_onnx_glue_sticks_package: str,
+    glue_sticks_image_torch: torch.Tensor,
+) -> None:
+    # given
+    from inference_models.models.rfdetr.rfdetr_key_points_detection_onnx import (
+        RFDetrForKeyPointsONNX,
+    )
+
+    model = RFDetrForKeyPointsONNX.from_pretrained(
+        model_name_or_path=rfdetr_kp_preview_onnx_glue_sticks_package,
+        onnx_execution_providers=["CUDAExecutionProvider", "CPUExecutionProvider"],
+    )
+
+    # when
+    key_points_list, detections_list = model(glue_sticks_image_torch, confidence=0.5)
+
+    # then
+    assert len(key_points_list) == 1
+    key_points = key_points_list[0]
+    assert torch.allclose(
+        key_points.xy.cpu().to(torch.int32),
+        torch.tensor(
+            [
+                [[857, 170], [673, 417]],
+                [[1285, 656], [906, 536]],
+            ],
+            dtype=torch.int32,
+        ),
+        atol=2,
+    )
+    assert torch.allclose(
+        key_points.confidence.cpu(),
+        torch.tensor(
+            [
+                [0.9896, 0.9812],
+                [0.9868, 0.9867],
+            ]
+        ),
+        atol=0.01,
+    )
+    assert key_points.class_id.cpu().tolist() == [0, 0]
+
+    assert detections_list is not None
+    detections = detections_list[0]
+    assert torch.allclose(
+        detections.xyxy.cpu().to(torch.int32),
+        torch.tensor(
+            [
+                [625, 124, 908, 442],
+                [876, 499, 1285, 680],
+            ],
+            dtype=torch.int32,
+        ),
+        atol=2,
+    )
+    assert torch.allclose(
+        detections.confidence.cpu(),
+        torch.tensor([0.6338, 0.6074]),
+        atol=0.01,
+    )
+    assert detections.class_id.cpu().tolist() == [0, 0]
\ No newline at end of file

From 72fa276ef888cd43e7da388702baba201bb6ac6a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Pawe=C5=82=20P=C4=99czek?=
 <146137186+PawelPeczek-Roboflow@users.noreply.github.com>
Date: Fri, 5 Jun 2026 09:10:29 +0000
Subject: [PATCH 60/76] Add security enhancements for local deployments (#2417)

* Add draft of change in auth middleware

* Add support for stripping env variables

* Add tests

* Make linters happy

* Add changes to docs

* Add warning in code

* Apply CR comments
---
 docs/install/index.md                         |   7 +
 docs/install/security.md                      | 140 ++++++++
 .../environmental_variables.md                |   1 +
 inference/core/env.py                         |   6 +-
 inference/core/interfaces/http/http_api.py    |  30 +-
 inference/core/utils/environment.py           |  12 +-
 mkdocs.yml                                    |   1 +
 .../core/interfaces/http/test_http_api.py     | 333 +++++++++++++++++-
 .../unit_tests/core/utils/test_environment.py |  16 +
 .../test_workflow_with_current_time.py        |  10 +-
 ...st_workflow_with_overlap_analysis_block.py |  12 +-
 .../core_steps/common/test_openrouter.py      |  13 +-
 .../formatters/test_current_time.py           |   2 +-
 .../fusion/test_overlap_analysis.py           | 155 +++++---
 .../models/foundation/test_google_gemma_v2.py |   6 +-
 .../foundation/test_kimi_openrouter_v2.py     |   8 +-
 .../models/foundation/test_llama_vision_v2.py |   6 +-
 .../models/foundation/test_qwen_vlm.py        |  21 +-
 .../models/roboflow/_hosted_api_resolution.py |   4 +-
 .../roboflow/instance_segmentation/test_v4.py |   4 +-
 .../asset_library_attributes/test_v1.py       |   4 +-
 .../test_dynamic_blocks_collection.py         |   6 +-
 22 files changed, 688 insertions(+), 109 deletions(-)
 create mode 100644 docs/install/security.md

diff --git a/docs/install/index.md b/docs/install/index.md
index 027b89662b..43ddd95371 100644
--- a/docs/install/index.md
+++ b/docs/install/index.md
@@ -82,6 +82,13 @@ Browse the navigation on the left for detailed install guides:
 - :simple-nvidia: [Jetson](jetson.md)
 - :fontawesome-brands-raspberry-pi: [Raspberry Pi](raspberry-pi.md)
 
+## Securing your server
+
+A self-hosted server does not enforce authentication, encryption, or network
+restrictions by default — securing it is your responsibility. Before exposing
+it beyond local development traffic, review
+[Securing a Self-Hosted Server](security.md).
+
 ## Using Your New Server
 
 Once you have [Inference server](../quickstart/docker.md) running,
diff --git a/docs/install/security.md b/docs/install/security.md
new file mode 100644
index 0000000000..162a80d7c4
--- /dev/null
+++ b/docs/install/security.md
@@ -0,0 +1,140 @@
+---
+description: Securing a self-hosted Roboflow Inference Server — network isolation, authentication, TLS, and disabling custom Python execution. Security of local deployments is your responsibility.
+---
+
+# Securing a Self-Hosted Server
+
+When you run Inference on your own hardware, **you own its security posture**.
+A locally deployed server does not enforce authentication, encryption, or
+network restrictions by default — it is built to be easy to start, not to be
+safe to expose. Out of the box it will answer any request that reaches it,
+including requests to run models and execute Workflows.
+
+This page covers the four controls every self-hosted deployment should
+review before it handles anything beyond local development traffic. They are
+complementary — apply as many as your environment allows.
+
+!!! warning "This is your responsibility"
+
+    Roboflow secures the managed Cloud, Serverless, and Dedicated Deployment
+    offerings. For a server you run yourself, securing the host, the network
+    around it, and the credentials it accepts is **your responsibility**. If
+    your server is reachable from an untrusted network without the controls
+    below, treat it as open to the world.
+
+## 1. Restrict network access
+
+The single most effective control is to not expose the server in the first
+place. Inference listens on port `9001` by default and has no concept of a
+"trusted" network — anything that can reach the port can use it.
+
+- **Bind to localhost** when only processes on the same host need it
+  (e.g. publish the container port as `127.0.0.1:9001:9001` instead of
+  `9001:9001`).
+- **Keep it on a private network / VPC** and reach it through a VPN, SSH
+  tunnel, or service mesh rather than a public IP.
+- **Use host and cloud firewalls / security groups** to allow `9001` only
+  from the specific clients that need it.
+- **Put a reverse proxy in front of it** (nginx, Traefik, Caddy, a cloud
+  load balancer) if you need to expose it more broadly — that gives you a
+  single place to add TLS, rate limiting, and access logging.
+
+Never publish the inference port directly to the public internet without
+authentication and TLS in place.
+
+## 2. Enforce authentication
+
+By default a self-hosted server does **not** require an API key to make requests — as a result,
+beyond the auth that happens at the Roboflow API level when fetching data from the platform, there is
+no additional security on the server itself. To turn on authentication, set
+`WORKSPACES_WHITELISTED_FOR_LOCAL_DEPLOYMENT` to a comma-separated list of the Roboflow workspace slug
+allowed to use the server:
+
+```bash
+docker run --rm -p 9001:9001 \
+  -e WORKSPACES_WHITELISTED_FOR_LOCAL_DEPLOYMENT=your-workspace-url-slug,another-workspace-url-slug \
+  roboflow/roboflow-inference-server-cpu:latest
+```
+
+With this set, the server installs an authorization middleware. Every
+inference and Workflow request must carry an `api_key` (as a query parameter
+or in the JSON body) that resolves — via Roboflow — to one of the whitelisted
+workspaces. Requests with a missing, invalid, or non-whitelisted key are
+rejected with `401 Unauthorized`.
+
+!!! note "What is *not* covered by the API-key check"
+
+    A small set of unauthenticated endpoints stay open so the server remains
+    usable and observable: `/`, `/docs`, `/redoc`, `/info`, `/healthz`,
+    `/readiness`, `/metrics`, `/openapi.json`, and static assets
+    (`/static/...`, `/_next/...`). Treat `/info` and `/metrics` as
+    information that anyone who can reach the server can read, and rely on
+    network restrictions (control #1) to limit who that is.
+
+**Bring your own auth.** The built-in check ties authorization to Roboflow
+workspaces. If you have your own identity model, you can instead place a
+reverse proxy or authentication middleware in front of the server — enforcing
+OAuth/OIDC, mTLS, signed headers, an API gateway, or whatever your
+organization already uses — and let only authenticated traffic through to
+port `9001`. The two approaches can be combined.
+
+## 3. Enable TLS when the network requires it
+
+The built-in API-key check sends credentials in the request. If those
+requests travel over any network you do not fully control, the connection
+must be encrypted, otherwise keys and payloads are exposed in plaintext.
+
+You have two options:
+
+- **Terminate TLS at a reverse proxy / load balancer** in front of the
+  server. This is the usual choice when you already run one.
+- **Serve HTTPS directly from the server** by mounting a certificate and
+  key and setting `ENABLE_HTTPS=true`. See
+  [Serving inference over HTTPS](/server_configuration/https.md) for the
+  full guide, including mutual TLS (client certificates) via `SSL_CA_CERTS`.
+
+For purely local, loopback-only traffic (control #1, bound to `127.0.0.1`)
+TLS is optional. Any time requests leave the host over an untrusted network,
+TLS is required.
+
+## 4. Disable custom Python execution in Workflows
+
+Workflows can contain **Custom Python blocks** — arbitrary Python that runs
+inside the server process. This is a powerful feature, but it means that
+anyone who can submit a Workflow to the server can run arbitrary code on your
+host. On a server reachable by untrusted clients, that is remote code
+execution.
+
+This is controlled by `ALLOW_CUSTOM_PYTHON_EXECUTION_IN_WORKFLOWS`.
+
+| Setting | Effect |
+| --- | --- |
+| `True` (current default) | Workflows may define and run custom Python blocks. |
+| `False` | Custom Python blocks are rejected; all other Workflow features still work. |
+
+**If your Workflows do not rely on custom Python, set it to `False`:**
+
+```bash
+docker run --rm -p 9001:9001 \
+  -e ALLOW_CUSTOM_PYTHON_EXECUTION_IN_WORKFLOWS=false \
+  roboflow/roboflow-inference-server-cpu:latest
+```
+
+!!! warning "The default is changing on 2026-06-19"
+
+    Today this flag defaults to `True` for backward compatibility. On
+    **2026-06-19** the default will change to `False`. If your Workflows
+    depend on custom Python blocks, set
+    `ALLOW_CUSTOM_PYTHON_EXECUTION_IN_WORKFLOWS=true` explicitly so they
+    keep working after that date. Otherwise, leave it disabled — and prefer
+    enabling it only on deployments where the network and authentication
+    controls above are already in place.
+
+## Recommended baseline
+
+For any self-hosted server that is reachable beyond `localhost`:
+
+- [ ] Network access restricted to known clients (firewall / private network / proxy).
+- [ ] `WORKSPACES_WHITELISTED_FOR_LOCAL_DEPLOYMENT` set, or your own auth in front.
+- [ ] TLS terminated at the server or an upstream proxy.
+- [ ] `ALLOW_CUSTOM_PYTHON_EXECUTION_IN_WORKFLOWS=false` unless you genuinely need it.
diff --git a/docs/server_configuration/environmental_variables.md b/docs/server_configuration/environmental_variables.md
index c95d5cbd58..9efc110467 100644
--- a/docs/server_configuration/environmental_variables.md
+++ b/docs/server_configuration/environmental_variables.md
@@ -36,3 +36,4 @@ Environmental variable                         | Description
 `SSL_KEYFILE`                                  | Path to the PEM-encoded TLS private key paired with `SSL_CERTFILE`. Defaults to `/etc/inference/certs/server.key` so a single bind mount is enough to enable HTTPS.                                                                                                                                                    | `/etc/inference/certs/server.key`
 `SSL_KEYFILE_PASSWORD`                         | Optional passphrase used to decrypt `SSL_KEYFILE` when the private key is encrypted.                                                                                                                                                                                                                                    | Not set
 `SSL_CA_CERTS`                                 | Optional path to a CA bundle used when client certificate verification (mTLS) is required.                                                                                                                                                                                                                              | Not set
+`WORKSPACES_WHITELISTED_FOR_LOCAL_DEPLOYMENT`  | List of whitelisted workspace urls (separator `,`) which are meant to be allowed to execute requests against local server. When enabled - each request (apart from docs, landing page and health / liveness / metrics) will be authorised with Roboflow API key                                                         | Not set
\ No newline at end of file
diff --git a/inference/core/env.py b/inference/core/env.py
index 8c941ef213..3ef0244f64 100644
--- a/inference/core/env.py
+++ b/inference/core/env.py
@@ -740,7 +740,11 @@
 DEDICATED_DEPLOYMENT_WORKSPACE_URL = os.environ.get(
     "DEDICATED_DEPLOYMENT_WORKSPACE_URL", None
 )
-
+WORKSPACES_WHITELISTED_FOR_LOCAL_DEPLOYMENT = safe_split_value(
+    value=os.getenv("WORKSPACES_WHITELISTED_FOR_LOCAL_DEPLOYMENT"),
+    delimiter=",",
+    strip=True,
+)
 ENABLE_STREAM_API = str2bool(os.getenv("ENABLE_STREAM_API", "False"))
 STREAM_API_PRELOADED_PROCESSES = int(os.getenv("STREAM_API_PRELOADED_PROCESSES", "0"))
 
diff --git a/inference/core/interfaces/http/http_api.py b/inference/core/interfaces/http/http_api.py
index bf281cf8c8..a9d9d38f46 100644
--- a/inference/core/interfaces/http/http_api.py
+++ b/inference/core/interfaces/http/http_api.py
@@ -3,6 +3,7 @@
 import logging
 import os
 import re
+import warnings
 from concurrent.futures import CancelledError, Future, ThreadPoolExecutor
 from dataclasses import dataclass
 from functools import partial
@@ -139,6 +140,7 @@
     WorkflowValidationStatus,
 )
 from inference.core.env import (
+    ALLOW_CUSTOM_PYTHON_EXECUTION_IN_WORKFLOWS,
     ALLOW_ORIGINS,
     API_BASE_URL,
     API_LOGGING_ENABLED,
@@ -195,8 +197,8 @@
     WEBRTC_WORKER_ENABLED,
     WORKFLOWS_MAX_CONCURRENT_STEPS,
     WORKFLOWS_PROFILER_BUFFER_SIZE,
-    WORKFLOWS_REMOTE_EXECUTION_TIME_FORWARDING,
     WORKFLOWS_STEP_EXECUTION_MODE,
+    WORKSPACES_WHITELISTED_FOR_LOCAL_DEPLOYMENT,
 )
 from inference.core.exceptions import (
     ContentTypeInvalid,
@@ -294,6 +296,7 @@
 from inference.core.utils.container import is_docker_socket_mounted
 from inference.core.utils.notebooks import start_notebook
 from inference.core.utils.url_utils import wrap_url
+from inference.core.warnings import InferenceDeprecationWarning
 from inference.core.workflows.core_steps.common.entities import StepExecutionMode
 from inference.core.workflows.errors import (
     WorkflowBlockError,
@@ -354,6 +357,16 @@ async def dispatch(self, request, call_next):
 REQUEST_RECEIVED_LOG_MESSAGE = "Request received"
 
 
+if ALLOW_CUSTOM_PYTHON_EXECUTION_IN_WORKFLOWS:
+    warnings.warn(
+        "Your `inference` configuration specifies `ALLOW_CUSTOM_PYTHON_EXECUTION_IN_WORKFLOWS=True`. "
+        "Currently, Workflows Custom Python blocks are allowed by default - but this is going to change 19.06.2026. "
+        "If your workload relies on that setting, please make adjustment to your configuration before the inference "
+        "release following mentioned date. Otherwise - you may ignore this warning.",
+        category=InferenceDeprecationWarning,
+    )
+
+
 @dataclass(frozen=True)
 class AuthorizationCacheEntry:
     expires_at: float
@@ -981,7 +994,10 @@ def _authorization_error_response(
                     response.headers[WORKSPACE_ID_HEADER] = workspace_id
                 return response
 
-        if DEDICATED_DEPLOYMENT_WORKSPACE_URL:
+        if (
+            DEDICATED_DEPLOYMENT_WORKSPACE_URL
+            or WORKSPACES_WHITELISTED_FOR_LOCAL_DEPLOYMENT
+        ):
 
             @app.middleware("http")
             async def check_authorization(request: Request, call_next):
@@ -1051,8 +1067,14 @@ def _unauthorized_response(msg):
                             workspace_id = await get_roboflow_workspace_async(
                                 api_key=api_key
                             )
-
-                        if workspace_id != DEDICATED_DEPLOYMENT_WORKSPACE_URL:
+                        allowed_workspaces = set()
+                        if DEDICATED_DEPLOYMENT_WORKSPACE_URL:
+                            allowed_workspaces.add(DEDICATED_DEPLOYMENT_WORKSPACE_URL)
+                        if WORKSPACES_WHITELISTED_FOR_LOCAL_DEPLOYMENT:
+                            allowed_workspaces.update(
+                                WORKSPACES_WHITELISTED_FOR_LOCAL_DEPLOYMENT
+                            )
+                        if workspace_id not in allowed_workspaces:
                             return _unauthorized_response("Unauthorized api_key")
 
                         cached_api_keys[api_key] = AuthorizationCacheEntry(
diff --git a/inference/core/utils/environment.py b/inference/core/utils/environment.py
index 234084bd6e..4dd414e786 100644
--- a/inference/core/utils/environment.py
+++ b/inference/core/utils/environment.py
@@ -52,13 +52,18 @@ def str2bool(value: Any) -> bool:
         )
 
 
-def safe_split_value(value: Optional[str], delimiter: str = ",") -> Optional[List[str]]:
+def safe_split_value(
+    value: Optional[str],
+    delimiter: str = ",",
+    strip: bool = False,
+) -> Optional[List[str]]:
     """
     Splits a separated environment variable into a list.
 
     Args:
         value (str): The environment variable value to be split.
         delimiter(str): Delimiter to be used
+        strip (bool): Strip leading and trailing whitespace
 
     Returns:
         list or None: The split values as a list, or None if the input is None.
@@ -66,4 +71,7 @@ def safe_split_value(value: Optional[str], delimiter: str = ",") -> Optional[Lis
     if value is None:
         return None
     else:
-        return value.split(delimiter)
+        result = value.split(delimiter)
+        if strip:
+            result = [element.strip() for element in result if len(element.strip())]
+        return result
diff --git a/mkdocs.yml b/mkdocs.yml
index 5c818a2664..4420413d95 100644
--- a/mkdocs.yml
+++ b/mkdocs.yml
@@ -58,6 +58,7 @@ nav:
               - Azure: install/cloud/azure.md
               - GCP: install/cloud/gcp.md
           - Other Devices: install/other.md
+          - Securing Your Server: install/security.md
       # - Inputs:
       #     - inputs/index.md
       #     - Images: inputs/images.md
diff --git a/tests/inference/unit_tests/core/interfaces/http/test_http_api.py b/tests/inference/unit_tests/core/interfaces/http/test_http_api.py
index bc0f3f105d..ed631abcbb 100644
--- a/tests/inference/unit_tests/core/interfaces/http/test_http_api.py
+++ b/tests/inference/unit_tests/core/interfaces/http/test_http_api.py
@@ -1,6 +1,7 @@
 import time
 from unittest.mock import AsyncMock, MagicMock
 
+import pytest
 from pydantic import BaseModel
 from starlette.testclient import TestClient
 
@@ -10,6 +11,7 @@
     WORKSPACE_ID_HEADER,
 )
 from inference.core.env import CORRELATION_ID_HEADER
+from inference.core.exceptions import RoboflowAPINotAuthorizedError
 from inference.core.roboflow_api import ServerlessUsageCheckResponse
 
 
@@ -77,7 +79,16 @@ def _build_dedicated_deployment_interface(
     monkeypatch,
     workspace_lookup_result="dedicated-workspace",
     dedicated_workspace_url="dedicated-workspace",
+    workspace_lookup_side_effect=None,
+    local_whitelist=None,
 ):
+    """Build an HttpInterface with the workspace-allowlist middleware enabled.
+
+    `local_whitelist` patches `WORKSPACES_WHITELISTED_FOR_LOCAL_DEPLOYMENT`.
+    Default `None` preserves the historical behaviour for existing callers
+    (env var unset). Pass a list to simulate the env var being set; pass
+    `[]` to assert the empty-list-doesn't-enable-middleware contract.
+    """
     import inference.core.interfaces.http.http_api as http_api
 
     monkeypatch.setattr(http_api, "InferenceInstrumentator", _DummyInstrumentator)
@@ -90,7 +101,15 @@ def _build_dedicated_deployment_interface(
     monkeypatch.setattr(
         http_api, "DEDICATED_DEPLOYMENT_WORKSPACE_URL", dedicated_workspace_url
     )
-    workspace_lookup_mock = AsyncMock(return_value=workspace_lookup_result)
+    monkeypatch.setattr(
+        http_api,
+        "WORKSPACES_WHITELISTED_FOR_LOCAL_DEPLOYMENT",
+        list(local_whitelist) if local_whitelist is not None else None,
+    )
+    if workspace_lookup_side_effect is not None:
+        workspace_lookup_mock = AsyncMock(side_effect=workspace_lookup_side_effect)
+    else:
+        workspace_lookup_mock = AsyncMock(return_value=workspace_lookup_result)
     monkeypatch.setattr(
         http_api,
         "get_roboflow_workspace_async",
@@ -577,3 +596,315 @@ def test_dedicated_deployment_auth_middleware_rejects_host_header_path_injection
 
     model_manager.infer_from_request_sync.assert_not_called()
     workspace_lookup_mock.assert_not_called()
+
+
+# A representative spread of paths the workspace-allowlist middleware must
+# guard. Mix of: real POST inference handlers, a non-exempt GET, a registered
+# admin POST, and an unregistered route (the middleware runs before FastAPI's
+# 404, so even unregistered paths must produce a 401 instead of leaking 404).
+# Used to parametrise the rejection tests below so a future refactor that
+# skips the middleware on any path family trips a regression.
+_SECURED_GATE_TARGETS = [
+    ("POST", "/infer/lmm/florence-2-base"),
+    ("POST", "/infer/object_detection"),
+    ("POST", "/infer/instance_segmentation"),
+    ("POST", "/infer/classification"),
+    ("POST", "/model/add"),
+    ("GET", "/device/stats"),
+    ("POST", "/some/unregistered/secured/path"),
+]
+
+
+def _send_secured_request(client, method, path, *, api_key=None):
+    """Drive a request through the middleware for parametrised tests.
+
+    The body is irrelevant for the middleware (it runs before route
+    validation), so we send a generic JSON payload on POST and nothing on
+    GET. `api_key`, when provided, is passed as a query parameter — which is
+    where the middleware looks first.
+    """
+    kwargs = {}
+    if api_key is not None:
+        kwargs["params"] = {"api_key": api_key}
+    if method == "POST":
+        kwargs["json"] = _make_inference_request()
+    return client.request(method, path, **kwargs)
+
+
+def test_local_whitelist_alone_enables_middleware_and_allows_matching_workspace(
+    monkeypatch,
+) -> None:
+    interface, _, workspace_lookup_mock = _build_dedicated_deployment_interface(
+        monkeypatch=monkeypatch,
+        workspace_lookup_result="local-allowed-ws",
+        dedicated_workspace_url=None,
+        local_whitelist=["local-allowed-ws"],
+    )
+
+    with TestClient(interface.app) as client:
+        response = client.post(
+            "/infer/lmm/florence-2-base",
+            params={"api_key": "key-for-allowed-ws"},
+            json=_make_inference_request(),
+        )
+
+    assert response.status_code == 200
+    workspace_lookup_mock.assert_awaited_once_with(api_key="key-for-allowed-ws")
+
+
+@pytest.mark.parametrize("method,path", _SECURED_GATE_TARGETS)
+def test_local_whitelist_alone_rejects_non_matching_workspace(
+    monkeypatch, method, path
+) -> None:
+    interface, _, _ = _build_dedicated_deployment_interface(
+        monkeypatch=monkeypatch,
+        workspace_lookup_result="some-other-ws",
+        dedicated_workspace_url=None,
+        local_whitelist=["local-allowed-ws"],
+    )
+
+    with TestClient(interface.app) as client:
+        response = _send_secured_request(
+            client, method, path, api_key="key-for-other-ws"
+        )
+
+    assert (
+        response.status_code == 401
+    ), f"{method} {path}: expected 401, got {response.status_code}"
+    assert response.json() == {"status": 401, "message": "Unauthorized api_key"}
+
+
+def test_dedicated_workspace_remains_accepted_when_local_whitelist_is_also_set(
+    monkeypatch,
+) -> None:
+    """Union semantics — the dedicated path must keep working after the local
+    whitelist is added alongside it."""
+    interface, _, _ = _build_dedicated_deployment_interface(
+        monkeypatch=monkeypatch,
+        workspace_lookup_result="dedicated-workspace",
+        dedicated_workspace_url="dedicated-workspace",
+        local_whitelist=["local-allowed-ws"],
+    )
+
+    with TestClient(interface.app) as client:
+        response = client.post(
+            "/infer/lmm/florence-2-base",
+            params={"api_key": "key-for-dedicated"},
+            json=_make_inference_request(),
+        )
+
+    assert response.status_code == 200
+
+
+def test_locally_whitelisted_workspace_is_accepted_in_union_with_dedicated(
+    monkeypatch,
+) -> None:
+    """A workspace that is in the local whitelist but does NOT match the
+    dedicated URL must still be accepted when both env vars are set."""
+    interface, _, _ = _build_dedicated_deployment_interface(
+        monkeypatch=monkeypatch,
+        workspace_lookup_result="local-allowed-ws",
+        dedicated_workspace_url="dedicated-workspace",
+        local_whitelist=["local-allowed-ws"],
+    )
+
+    with TestClient(interface.app) as client:
+        response = client.post(
+            "/infer/lmm/florence-2-base",
+            params={"api_key": "key-for-local"},
+            json=_make_inference_request(),
+        )
+
+    assert response.status_code == 200
+
+
+@pytest.mark.parametrize("method,path", _SECURED_GATE_TARGETS)
+def test_workspace_outside_both_allowlists_is_rejected(
+    monkeypatch, method, path
+) -> None:
+    interface, _, _ = _build_dedicated_deployment_interface(
+        monkeypatch=monkeypatch,
+        workspace_lookup_result="some-other-ws",
+        dedicated_workspace_url="dedicated-workspace",
+        local_whitelist=["local-allowed-ws"],
+    )
+
+    with TestClient(interface.app) as client:
+        response = _send_secured_request(client, method, path, api_key="key-for-other")
+
+    assert (
+        response.status_code == 401
+    ), f"{method} {path}: expected 401, got {response.status_code}"
+
+
+def test_local_whitelist_with_multiple_entries_accepts_each_member(monkeypatch) -> None:
+    import inference.core.interfaces.http.http_api as http_api
+
+    workspace_for_key = {
+        "key-a": "ws-a",
+        "key-b": "ws-b",
+        "key-c": "ws-c",
+    }
+
+    async def lookup(api_key: str) -> str:
+        return workspace_for_key[api_key]
+
+    interface, _, workspace_lookup_mock = _build_dedicated_deployment_interface(
+        monkeypatch=monkeypatch,
+        dedicated_workspace_url=None,
+        local_whitelist=["ws-a", "ws-b", "ws-c"],
+    )
+    # Override the lookup with a per-key one (the helper installs a single
+    # AsyncMock(return_value=...) which is fine for most tests but not for
+    # this one).
+    monkeypatch.setattr(
+        http_api, "get_roboflow_workspace_async", AsyncMock(side_effect=lookup)
+    )
+
+    with TestClient(interface.app) as client:
+        for api_key in ("key-a", "key-b", "key-c"):
+            response = client.post(
+                "/infer/lmm/florence-2-base",
+                params={"api_key": api_key},
+                json=_make_inference_request(),
+            )
+            assert (
+                response.status_code == 200
+            ), f"{api_key} should map to a whitelisted workspace"
+
+
+@pytest.mark.parametrize("method,path", _SECURED_GATE_TARGETS)
+def test_local_whitelist_middleware_rejects_request_without_api_key(
+    monkeypatch, method, path
+) -> None:
+    interface, _, workspace_lookup_mock = _build_dedicated_deployment_interface(
+        monkeypatch=monkeypatch,
+        dedicated_workspace_url=None,
+        local_whitelist=["local-allowed-ws"],
+    )
+
+    with TestClient(interface.app) as client:
+        response = _send_secured_request(client, method, path)
+
+    assert (
+        response.status_code == 401
+    ), f"{method} {path}: expected 401, got {response.status_code}"
+    assert response.json() == {"status": 401, "message": "Unauthorized api_key"}
+    workspace_lookup_mock.assert_not_awaited()
+
+
+@pytest.mark.parametrize("method,path", _SECURED_GATE_TARGETS)
+def test_local_whitelist_middleware_returns_401_when_roboflow_api_rejects_key(
+    monkeypatch, method, path
+) -> None:
+    interface, _, _ = _build_dedicated_deployment_interface(
+        monkeypatch=monkeypatch,
+        dedicated_workspace_url=None,
+        local_whitelist=["local-allowed-ws"],
+        workspace_lookup_side_effect=RoboflowAPINotAuthorizedError("key revoked"),
+    )
+
+    with TestClient(interface.app) as client:
+        response = _send_secured_request(client, method, path, api_key="revoked-key")
+
+    assert (
+        response.status_code == 401
+    ), f"{method} {path}: expected 401, got {response.status_code}"
+
+
+def test_local_whitelist_middleware_caches_successful_workspace_lookup(
+    monkeypatch,
+) -> None:
+    interface, _, workspace_lookup_mock = _build_dedicated_deployment_interface(
+        monkeypatch=monkeypatch,
+        workspace_lookup_result="local-allowed-ws",
+        dedicated_workspace_url=None,
+        local_whitelist=["local-allowed-ws"],
+    )
+
+    with TestClient(interface.app) as client:
+        for _ in range(3):
+            response = client.post(
+                "/infer/lmm/florence-2-base",
+                params={"api_key": "same-key"},
+                json=_make_inference_request(),
+            )
+            assert response.status_code == 200
+
+    assert workspace_lookup_mock.await_count == 1, (
+        "After the first lookup, the api_key/workspace_id pair is cached "
+        "and the upstream Roboflow API must not be hit again"
+    )
+
+
+@pytest.mark.parametrize(
+    "exempt_path",
+    [
+        "/healthz",
+        "/readiness",
+        "/info",
+        "/openapi.json",
+    ],
+)
+def test_local_whitelist_middleware_skips_check_for_exempt_paths(
+    monkeypatch, exempt_path
+) -> None:
+    """Liveness and metadata endpoints must remain reachable without an
+    api_key even when the local-deployment allowlist is enforcing auth."""
+    interface, _, workspace_lookup_mock = _build_dedicated_deployment_interface(
+        monkeypatch=monkeypatch,
+        dedicated_workspace_url=None,
+        local_whitelist=["local-allowed-ws"],
+    )
+
+    with TestClient(interface.app) as client:
+        response = client.get(exempt_path)
+
+    assert response.status_code == 200, f"{exempt_path} must bypass the auth middleware"
+    workspace_lookup_mock.assert_not_awaited()
+
+
+def test_middleware_is_not_installed_when_neither_env_var_is_set(monkeypatch) -> None:
+    """Pins the gate at http_api.py:985 — when neither
+    `DEDICATED_DEPLOYMENT_WORKSPACE_URL` nor
+    `WORKSPACES_WHITELISTED_FOR_LOCAL_DEPLOYMENT` is set, the middleware is
+    never installed and unauthenticated requests proceed (community default).
+    """
+    interface, _, workspace_lookup_mock = _build_dedicated_deployment_interface(
+        monkeypatch=monkeypatch,
+        dedicated_workspace_url=None,
+        local_whitelist=None,
+    )
+
+    with TestClient(interface.app) as client:
+        response = client.post(
+            "/infer/lmm/florence-2-base",
+            params={"api_key": "anything"},
+            json=_make_inference_request(),
+        )
+
+    assert response.status_code == 200
+    workspace_lookup_mock.assert_not_awaited()
+
+
+def test_empty_local_whitelist_alone_does_not_enable_middleware(monkeypatch) -> None:
+    """An empty list is falsy, so the gate `if DEDICATED_... or WORKSPACES_...`
+    must NOT install the middleware when only an empty list is provided.
+    Otherwise an operator who clears the env var to `""` (which `safe_split_value`
+    can collapse to an empty list depending on implementation) would silently
+    enable an allowlist that rejects every api_key."""
+    interface, _, workspace_lookup_mock = _build_dedicated_deployment_interface(
+        monkeypatch=monkeypatch,
+        dedicated_workspace_url=None,
+        local_whitelist=[],
+    )
+
+    with TestClient(interface.app) as client:
+        response = client.post(
+            "/infer/lmm/florence-2-base",
+            params={"api_key": "anything"},
+            json=_make_inference_request(),
+        )
+
+    assert response.status_code == 200
+    workspace_lookup_mock.assert_not_awaited()
diff --git a/tests/inference/unit_tests/core/utils/test_environment.py b/tests/inference/unit_tests/core/utils/test_environment.py
index 6551ac7087..cfb5b4f7ac 100644
--- a/tests/inference/unit_tests/core/utils/test_environment.py
+++ b/tests/inference/unit_tests/core/utils/test_environment.py
@@ -72,3 +72,19 @@ def test_safe_split_value_when_non_splittable_value_given() -> None:
 
     # then
     assert result == ["a,b,c,d"]
+
+
+def test_safe_split_value_when_splittable_value_given_and_strip_requested() -> None:
+    # when
+    result = safe_split_value(value="a, b ,c, d", delimiter=",", strip=True)
+
+    # then
+    assert result == ["a", "b", "c", "d"]
+
+
+def test_safe_split_value_when_empty_value_with_strip_requested() -> None:
+    # when
+    result = safe_split_value(value="", delimiter=",", strip=True)
+
+    # then
+    assert result == []
diff --git a/tests/workflows/integration_tests/execution/test_workflow_with_current_time.py b/tests/workflows/integration_tests/execution/test_workflow_with_current_time.py
index 6f952e9068..7ed768855e 100644
--- a/tests/workflows/integration_tests/execution/test_workflow_with_current_time.py
+++ b/tests/workflows/integration_tests/execution/test_workflow_with_current_time.py
@@ -23,7 +23,11 @@
     ],
     "outputs": [
         {"type": "JsonField", "name": "timestamp", "selector": "$steps.now.timestamp"},
-        {"type": "JsonField", "name": "iso_string", "selector": "$steps.now.iso_string"},
+        {
+            "type": "JsonField",
+            "name": "iso_string",
+            "selector": "$steps.now.iso_string",
+        },
         {"type": "JsonField", "name": "date", "selector": "$steps.now.date"},
         {"type": "JsonField", "name": "time", "selector": "$steps.now.time"},
     ],
@@ -42,9 +46,7 @@ def test_current_time_workflow(model_manager: ModelManager) -> None:
     )
 
     # when
-    result = execution_engine.run(
-        runtime_parameters={"timezone": "America/New_York"}
-    )
+    result = execution_engine.run(runtime_parameters={"timezone": "America/New_York"})
 
     # then
     assert len(result) == 1, "Single image/parameter batch expected"
diff --git a/tests/workflows/integration_tests/execution/test_workflow_with_overlap_analysis_block.py b/tests/workflows/integration_tests/execution/test_workflow_with_overlap_analysis_block.py
index c2baccaaf8..ff152c49fe 100644
--- a/tests/workflows/integration_tests/execution/test_workflow_with_overlap_analysis_block.py
+++ b/tests/workflows/integration_tests/execution/test_workflow_with_overlap_analysis_block.py
@@ -122,9 +122,9 @@ def test_overlap_analysis_workflow(
     assert len(result) == 1, "Expected one output element for one input image"
     assert set(result[0].keys()) == {"result"}
     overlaps = result[0]["result"]["overlaps"]
-    assert isinstance(overlaps, list), (
-        f"Expected `overlaps` to be a list, got {type(overlaps).__name__}"
-    )
+    assert isinstance(
+        overlaps, list
+    ), f"Expected `overlaps` to be a list, got {type(overlaps).__name__}"
     assert len(overlaps) > 0, (
         "Expected at least one overlapping pair between the high-confidence and "
         "low-confidence detection runs on the crowd image."
@@ -136,9 +136,9 @@ def test_overlap_analysis_workflow(
         assert not missing, f"Record missing required keys: {missing}"
         ratio = record["overlap_ratio"]
         assert isinstance(ratio, float)
-        assert 0.1 <= ratio <= 1.0 + 1e-9, (
-            f"overlap_ratio out of expected range [0.1, 1.0]: {ratio}"
-        )
+        assert (
+            0.1 <= ratio <= 1.0 + 1e-9
+        ), f"overlap_ratio out of expected range [0.1, 1.0]: {ratio}"
         # RoboflowObjectDetectionModel emits detections carrying detection_id,
         # so both id fields must be present.
         assert "reference_detection_id" in record
diff --git a/tests/workflows/unit_tests/core_steps/common/test_openrouter.py b/tests/workflows/unit_tests/core_steps/common/test_openrouter.py
index 9abbc6c5bc..773ec4be8b 100644
--- a/tests/workflows/unit_tests/core_steps/common/test_openrouter.py
+++ b/tests/workflows/unit_tests/core_steps/common/test_openrouter.py
@@ -7,16 +7,13 @@
 
 from inference.core.workflows.core_steps.common.openrouter import (
     OpenRouterWorkflowBlockBase,
-    build_provider_routing,
-    build_prompts_from_images,
-    validate_task_type_required_fields,
-)
-from inference.core.workflows.core_steps.common.openrouter import (
     _execute_direct_openrouter_request,
     _execute_proxied_openrouter_request,
+    build_prompts_from_images,
+    build_provider_routing,
+    validate_task_type_required_fields,
 )
 
-
 # ---------------------------------------------------------------------------
 # build_provider_routing
 # ---------------------------------------------------------------------------
@@ -164,9 +161,7 @@ def test_execute_openrouter_batch_routes_to_direct_for_user_key(
 
 @patch("inference.core.workflows.core_steps.common.openrouter.post_to_roboflow_api")
 def test_proxied_request_sends_expected_payload_to_roboflow(mock_post):
-    mock_post.return_value = {
-        "choices": [{"message": {"content": "hello world"}}]
-    }
+    mock_post.return_value = {"choices": [{"message": {"content": "hello world"}}]}
 
     out = _execute_proxied_openrouter_request(
         roboflow_api_key="ws-key-xyz",
diff --git a/tests/workflows/unit_tests/core_steps/formatters/test_current_time.py b/tests/workflows/unit_tests/core_steps/formatters/test_current_time.py
index 00197ac994..35055d21c8 100644
--- a/tests/workflows/unit_tests/core_steps/formatters/test_current_time.py
+++ b/tests/workflows/unit_tests/core_steps/formatters/test_current_time.py
@@ -6,9 +6,9 @@
 
 from inference.core.workflows.core_steps.formatters.current_time.v1 import (
     ALLOWED_TIMEZONES,
+    TIMEZONE_METADATA,
     BlockManifest,
     CurrentTimeBlockV1,
-    TIMEZONE_METADATA,
 )
 
 
diff --git a/tests/workflows/unit_tests/core_steps/fusion/test_overlap_analysis.py b/tests/workflows/unit_tests/core_steps/fusion/test_overlap_analysis.py
index 167e4eb114..bc53243609 100644
--- a/tests/workflows/unit_tests/core_steps/fusion/test_overlap_analysis.py
+++ b/tests/workflows/unit_tests/core_steps/fusion/test_overlap_analysis.py
@@ -34,7 +34,6 @@
     OverlapAnalysisBlockV1,
 )
 
-
 # ---------------------------------------------------------------------------
 # Fixtures
 # ---------------------------------------------------------------------------
@@ -56,7 +55,9 @@ def _detections_from_xyxy(
         data["detection_id"] = np.array(detection_ids)
     return sv.Detections(
         xyxy=np.asarray(xyxy, dtype=float),
-        confidence=np.asarray(confidences, dtype=float) if confidences is not None else None,
+        confidence=(
+            np.asarray(confidences, dtype=float) if confidences is not None else None
+        ),
         class_id=np.zeros(n, dtype=int),
         mask=masks,
         data=data,
@@ -145,7 +146,9 @@ def test_run_with_bbox_only_inputs_full_containment() -> None:
     block = OverlapAnalysisBlockV1()
 
     # when
-    result = block.run(reference_predictions=ref, candidate_predictions=cand, min_overlap=0.1)
+    result = block.run(
+        reference_predictions=ref, candidate_predictions=cand, min_overlap=0.1
+    )
 
     # then
     assert len(result["overlaps"]) == 1
@@ -170,7 +173,9 @@ def test_run_with_bbox_only_inputs_partial_overlap() -> None:
     block = OverlapAnalysisBlockV1()
 
     # when
-    result = block.run(reference_predictions=ref, candidate_predictions=cand, min_overlap=0.1)
+    result = block.run(
+        reference_predictions=ref, candidate_predictions=cand, min_overlap=0.1
+    )
 
     # then
     assert len(result["overlaps"]) == 1
@@ -184,7 +189,9 @@ def test_run_below_threshold_returns_empty() -> None:
     block = OverlapAnalysisBlockV1()
 
     # when
-    result = block.run(reference_predictions=ref, candidate_predictions=cand, min_overlap=0.1)
+    result = block.run(
+        reference_predictions=ref, candidate_predictions=cand, min_overlap=0.1
+    )
 
     # then
     assert result == {"overlaps": []}
@@ -198,7 +205,9 @@ def test_run_at_threshold_boundary_just_above_passes() -> None:
     block = OverlapAnalysisBlockV1()
 
     # when
-    result = block.run(reference_predictions=ref, candidate_predictions=cand, min_overlap=0.1)
+    result = block.run(
+        reference_predictions=ref, candidate_predictions=cand, min_overlap=0.1
+    )
 
     # then
     assert len(result["overlaps"]) == 1
@@ -212,7 +221,9 @@ def test_run_at_threshold_boundary_just_below_is_dropped() -> None:
     block = OverlapAnalysisBlockV1()
 
     # when
-    result = block.run(reference_predictions=ref, candidate_predictions=cand, min_overlap=0.1)
+    result = block.run(
+        reference_predictions=ref, candidate_predictions=cand, min_overlap=0.1
+    )
 
     # then
     assert result == {"overlaps": []}
@@ -225,7 +236,9 @@ def test_run_with_disjoint_detections_returns_empty() -> None:
     block = OverlapAnalysisBlockV1()
 
     # when
-    result = block.run(reference_predictions=ref, candidate_predictions=cand, min_overlap=0.1)
+    result = block.run(
+        reference_predictions=ref, candidate_predictions=cand, min_overlap=0.1
+    )
 
     # then
     assert result == {"overlaps": []}
@@ -267,7 +280,9 @@ def test_run_with_mask_inputs_uses_mask_path() -> None:
     block = OverlapAnalysisBlockV1()
 
     # when
-    result = block.run(reference_predictions=ref, candidate_predictions=cand, min_overlap=0.01)
+    result = block.run(
+        reference_predictions=ref, candidate_predictions=cand, min_overlap=0.01
+    )
 
     # then
     assert len(result["overlaps"]) == 1
@@ -307,7 +322,9 @@ def test_run_with_invalid_polygon_falls_back_to_bbox() -> None:
     block = OverlapAnalysisBlockV1()
 
     # when
-    result = block.run(reference_predictions=ref, candidate_predictions=cand, min_overlap=0.1)
+    result = block.run(
+        reference_predictions=ref, candidate_predictions=cand, min_overlap=0.1
+    )
 
     # then
     # Fallback to bbox polygon → reference is a full 100x100 box → fully
@@ -338,7 +355,9 @@ def test_run_propagates_detection_ids_when_present_on_both_sides() -> None:
     block = OverlapAnalysisBlockV1()
 
     # when
-    result = block.run(reference_predictions=ref, candidate_predictions=cand, min_overlap=0.1)
+    result = block.run(
+        reference_predictions=ref, candidate_predictions=cand, min_overlap=0.1
+    )
 
     # then
     record = result["overlaps"][0]
@@ -362,7 +381,9 @@ def test_run_propagates_only_present_detection_ids() -> None:
     block = OverlapAnalysisBlockV1()
 
     # when
-    result = block.run(reference_predictions=ref, candidate_predictions=cand, min_overlap=0.1)
+    result = block.run(
+        reference_predictions=ref, candidate_predictions=cand, min_overlap=0.1
+    )
 
     # then
     record = result["overlaps"][0]
@@ -386,7 +407,9 @@ def test_run_with_empty_reference_returns_empty() -> None:
     block = OverlapAnalysisBlockV1()
 
     # when
-    result = block.run(reference_predictions=ref, candidate_predictions=cand, min_overlap=0.1)
+    result = block.run(
+        reference_predictions=ref, candidate_predictions=cand, min_overlap=0.1
+    )
 
     # then
     assert result == {"overlaps": []}
@@ -403,7 +426,9 @@ def test_run_with_empty_candidate_returns_empty() -> None:
     block = OverlapAnalysisBlockV1()
 
     # when
-    result = block.run(reference_predictions=ref, candidate_predictions=cand, min_overlap=0.1)
+    result = block.run(
+        reference_predictions=ref, candidate_predictions=cand, min_overlap=0.1
+    )
 
     # then
     assert result == {"overlaps": []}
@@ -417,20 +442,24 @@ def test_run_with_empty_candidate_returns_empty() -> None:
 def test_run_n_to_m_pairs_emits_only_pairs_above_threshold() -> None:
     # given: 2 reference x 3 candidate; only some pairs overlap above 0.1.
     ref = _detections_from_xyxy(
-        np.array([
-            [0, 0, 100, 100],     # ref 0
-            [200, 200, 300, 300], # ref 1 (disjoint from all candidates)
-        ]),
+        np.array(
+            [
+                [0, 0, 100, 100],  # ref 0
+                [200, 200, 300, 300],  # ref 1 (disjoint from all candidates)
+            ]
+        ),
         class_names=["R0", "R1"],
         confidences=[1.0, 1.0],
         detection_ids=["r0", "r1"],
     )
     cand = _detections_from_xyxy(
-        np.array([
-            [50, 0, 150, 100],    # cand 0 - 50% overlap with ref 0
-            [80, 0, 180, 100],    # cand 1 - 20% overlap with ref 0
-            [400, 400, 500, 500], # cand 2 - no overlap with anything
-        ]),
+        np.array(
+            [
+                [50, 0, 150, 100],  # cand 0 - 50% overlap with ref 0
+                [80, 0, 180, 100],  # cand 1 - 20% overlap with ref 0
+                [400, 400, 500, 500],  # cand 2 - no overlap with anything
+            ]
+        ),
         class_names=["C0", "C1", "C2"],
         confidences=[1.0, 1.0, 1.0],
         detection_ids=["c0", "c1", "c2"],
@@ -438,11 +467,15 @@ def test_run_n_to_m_pairs_emits_only_pairs_above_threshold() -> None:
     block = OverlapAnalysisBlockV1()
 
     # when
-    result = block.run(reference_predictions=ref, candidate_predictions=cand, min_overlap=0.1)
+    result = block.run(
+        reference_predictions=ref, candidate_predictions=cand, min_overlap=0.1
+    )
 
     # then
     overlaps = result["overlaps"]
-    pairs = {(o["reference_detection_id"], o["candidate_detection_id"]) for o in overlaps}
+    pairs = {
+        (o["reference_detection_id"], o["candidate_detection_id"]) for o in overlaps
+    }
     assert pairs == {("r0", "c0"), ("r0", "c1")}
     ratios = {
         (o["reference_detection_id"], o["candidate_detection_id"]): o["overlap_ratio"]
@@ -469,19 +502,25 @@ def get_shapely_poly(detections, idx):
     refs = []
     for i in range(len(reference.xyxy)):
         poly, b_box = get_shapely_poly(reference, i)
-        refs.append({
-            "poly": poly, "bbox": b_box,
-            "conf": float(reference.confidence[i]),
-            "class": reference.data.get("class_name", [])[i],
-        })
+        refs.append(
+            {
+                "poly": poly,
+                "bbox": b_box,
+                "conf": float(reference.confidence[i]),
+                "class": reference.data.get("class_name", [])[i],
+            }
+        )
     cands = []
     for j in range(len(candidate.xyxy)):
         poly, b_box = get_shapely_poly(candidate, j)
-        cands.append({
-            "poly": poly, "bbox": b_box,
-            "conf": float(candidate.confidence[j]),
-            "class": candidate.data.get("class_name", [])[j],
-        })
+        cands.append(
+            {
+                "poly": poly,
+                "bbox": b_box,
+                "conf": float(candidate.confidence[j]),
+                "class": candidate.data.get("class_name", [])[j],
+            }
+        )
 
     out = []
     for b in refs:
@@ -491,34 +530,40 @@ def get_shapely_poly(detections, idx):
             intersection_area = b["poly"].intersection(a["poly"]).area
             percent_overlap = intersection_area / b["poly"].area
             if percent_overlap >= threshold:
-                out.append({
-                    "reference_class": b["class"],
-                    "reference_confidence": b["conf"],
-                    "candidate_class": a["class"],
-                    "candidate_confidence": a["conf"],
-                    "overlap_ratio": percent_overlap,
-                })
+                out.append(
+                    {
+                        "reference_class": b["class"],
+                        "reference_confidence": b["conf"],
+                        "candidate_class": a["class"],
+                        "candidate_confidence": a["conf"],
+                        "overlap_ratio": percent_overlap,
+                    }
+                )
     return out
 
 
 def test_parity_against_original_code_on_bbox_only_inputs() -> None:
     # given: 3 x 4 grid with mixed overlaps
     ref = _detections_from_xyxy(
-        np.array([
-            [0, 0, 100, 100],
-            [200, 200, 300, 300],
-            [50, 50, 150, 150],
-        ]),
+        np.array(
+            [
+                [0, 0, 100, 100],
+                [200, 200, 300, 300],
+                [50, 50, 150, 150],
+            ]
+        ),
         class_names=["alpha", "beta", "gamma"],
         confidences=[0.9, 0.8, 0.7],
     )
     cand = _detections_from_xyxy(
-        np.array([
-            [50, 0, 150, 100],     # overlaps ref 0
-            [80, 0, 180, 100],     # overlaps ref 0
-            [400, 400, 500, 500],  # disjoint
-            [100, 100, 200, 200],  # overlaps ref 2
-        ]),
+        np.array(
+            [
+                [50, 0, 150, 100],  # overlaps ref 0
+                [80, 0, 180, 100],  # overlaps ref 0
+                [400, 400, 500, 500],  # disjoint
+                [100, 100, 200, 200],  # overlaps ref 2
+            ]
+        ),
         class_names=["x", "y", "z", "w"],
         confidences=[0.6, 0.5, 0.4, 0.3],
     )
@@ -538,4 +583,6 @@ def _record_key(r):
             round(r["overlap_ratio"], 9),
         )
 
-    assert sorted(map(_record_key, new_records)) == sorted(map(_record_key, legacy_records))
+    assert sorted(map(_record_key, new_records)) == sorted(
+        map(_record_key, legacy_records)
+    )
diff --git a/tests/workflows/unit_tests/core_steps/models/foundation/test_google_gemma_v2.py b/tests/workflows/unit_tests/core_steps/models/foundation/test_google_gemma_v2.py
index 4350bc0dea..b584abcf9f 100644
--- a/tests/workflows/unit_tests/core_steps/models/foundation/test_google_gemma_v2.py
+++ b/tests/workflows/unit_tests/core_steps/models/foundation/test_google_gemma_v2.py
@@ -14,16 +14,18 @@
 from pydantic import ValidationError
 
 from inference.core.workflows.core_steps.models.foundation.google_gemma.v2 import (
+    MODEL_VERSION_MAPPING,
     BlockManifest,
     GoogleGemmaBlockV2,
-    MODEL_VERSION_MAPPING,
 )
 from inference.core.workflows.execution_engine.entities.base import WorkflowImageData
 
 
 def _stub_image() -> WorkflowImageData:
     return WorkflowImageData(
-        parent_metadata=MagicMock(parent_id="root", workflow_root_ancestor_metadata=None),
+        parent_metadata=MagicMock(
+            parent_id="root", workflow_root_ancestor_metadata=None
+        ),
         numpy_image=np.zeros((10, 10, 3), dtype=np.uint8),
     )
 
diff --git a/tests/workflows/unit_tests/core_steps/models/foundation/test_kimi_openrouter_v2.py b/tests/workflows/unit_tests/core_steps/models/foundation/test_kimi_openrouter_v2.py
index 1ef7aa1ef6..390ca9671d 100644
--- a/tests/workflows/unit_tests/core_steps/models/foundation/test_kimi_openrouter_v2.py
+++ b/tests/workflows/unit_tests/core_steps/models/foundation/test_kimi_openrouter_v2.py
@@ -3,20 +3,22 @@
 from unittest.mock import MagicMock, patch
 
 import numpy as np
-from pydantic import ValidationError
 import pytest
+from pydantic import ValidationError
 
 from inference.core.workflows.core_steps.models.foundation.kimi_openrouter.v2 import (
+    MODEL_VERSION_MAPPING,
     BlockManifest,
     KimiOpenrouterBlockV2,
-    MODEL_VERSION_MAPPING,
 )
 from inference.core.workflows.execution_engine.entities.base import WorkflowImageData
 
 
 def _stub_image() -> WorkflowImageData:
     return WorkflowImageData(
-        parent_metadata=MagicMock(parent_id="root", workflow_root_ancestor_metadata=None),
+        parent_metadata=MagicMock(
+            parent_id="root", workflow_root_ancestor_metadata=None
+        ),
         numpy_image=np.zeros((10, 10, 3), dtype=np.uint8),
     )
 
diff --git a/tests/workflows/unit_tests/core_steps/models/foundation/test_llama_vision_v2.py b/tests/workflows/unit_tests/core_steps/models/foundation/test_llama_vision_v2.py
index dd9f8518cc..7eabc1ef77 100644
--- a/tests/workflows/unit_tests/core_steps/models/foundation/test_llama_vision_v2.py
+++ b/tests/workflows/unit_tests/core_steps/models/foundation/test_llama_vision_v2.py
@@ -7,16 +7,18 @@
 from pydantic import ValidationError
 
 from inference.core.workflows.core_steps.models.foundation.llama_vision.v2 import (
+    MODEL_VERSION_MAPPING,
     BlockManifest,
     LlamaVisionBlockV2,
-    MODEL_VERSION_MAPPING,
 )
 from inference.core.workflows.execution_engine.entities.base import WorkflowImageData
 
 
 def _stub_image() -> WorkflowImageData:
     return WorkflowImageData(
-        parent_metadata=MagicMock(parent_id="root", workflow_root_ancestor_metadata=None),
+        parent_metadata=MagicMock(
+            parent_id="root", workflow_root_ancestor_metadata=None
+        ),
         numpy_image=np.zeros((10, 10, 3), dtype=np.uint8),
     )
 
diff --git a/tests/workflows/unit_tests/core_steps/models/foundation/test_qwen_vlm.py b/tests/workflows/unit_tests/core_steps/models/foundation/test_qwen_vlm.py
index 22a8d88c36..caef375278 100644
--- a/tests/workflows/unit_tests/core_steps/models/foundation/test_qwen_vlm.py
+++ b/tests/workflows/unit_tests/core_steps/models/foundation/test_qwen_vlm.py
@@ -8,10 +8,10 @@
 
 from inference.core.workflows.core_steps.common.entities import StepExecutionMode
 from inference.core.workflows.core_steps.models.foundation.qwen_vlm.v1 import (
-    BlockManifest,
     DEFAULT_NATIVE_MODEL_VERSION,
     FINE_TUNED_NATIVE_LABEL,
     MODEL_VARIANTS,
+    BlockManifest,
     QwenVlmBlockV1,
     _build_native_prompt,
     _coerce_native_response,
@@ -21,7 +21,9 @@
 
 def _stub_image() -> WorkflowImageData:
     return WorkflowImageData(
-        parent_metadata=MagicMock(parent_id="root", workflow_root_ancestor_metadata=None),
+        parent_metadata=MagicMock(
+            parent_id="root", workflow_root_ancestor_metadata=None
+        ),
         numpy_image=np.zeros((10, 10, 3), dtype=np.uint8),
     )
 
@@ -310,16 +312,15 @@ def test_run_local_native_with_enable_thinking_splits_response():
             enable_thinking=True,
         )
     )
-    assert result == [
-        {"output": "42", "classes": None, "thinking": "reasoning..."}
-    ]
+    assert result == [{"output": "42", "classes": None, "thinking": "reasoning..."}]
     request = model_manager.infer_from_request_sync.call_args.kwargs["request"]
     assert request.enable_thinking is True
 
 
 def test_run_local_native_enable_thinking_silently_ignored_on_unsupported_model():
     """`enable_thinking=True` on a non-3.5 variant must NOT propagate to the
-    LMM request — the field is a no-op there, gated by NATIVE_THINKING_MODEL_VERSIONS."""
+    LMM request — the field is a no-op there, gated by NATIVE_THINKING_MODEL_VERSIONS.
+    """
     model_manager = MagicMock()
     fake_prediction = MagicMock()
     fake_prediction.response = "ok"
@@ -362,9 +363,7 @@ def test_run_dispatches_to_remote_native_when_step_mode_remote(mock_client_cls):
             temperature=0.0,
         )
     )
-    assert result == [
-        {"output": "remote answer", "classes": None, "thinking": ""}
-    ]
+    assert result == [{"output": "remote answer", "classes": None, "thinking": ""}]
     assert fake_client.infer_lmm.called
     call_kwargs = fake_client.infer_lmm.call_args.kwargs
     assert call_kwargs["model_id"] == "qwen3_5-0.8b"
@@ -391,9 +390,7 @@ def test_run_dispatches_to_local_native_with_fine_tuned_model_id():
             fine_tuned_model_id="your-workspace/3",
         )
     )
-    assert result == [
-        {"output": "finetune answer", "classes": None, "thinking": ""}
-    ]
+    assert result == [{"output": "finetune answer", "classes": None, "thinking": ""}]
     model_manager.add_model.assert_called_once_with(
         model_id="your-workspace/3", api_key="ws-key"
     )
diff --git a/tests/workflows/unit_tests/core_steps/models/roboflow/_hosted_api_resolution.py b/tests/workflows/unit_tests/core_steps/models/roboflow/_hosted_api_resolution.py
index 263f328749..be056f5f82 100644
--- a/tests/workflows/unit_tests/core_steps/models/roboflow/_hosted_api_resolution.py
+++ b/tests/workflows/unit_tests/core_steps/models/roboflow/_hosted_api_resolution.py
@@ -68,7 +68,9 @@ def _invoke_run_remotely(module, block_cls, remote_target: str):
     return client_cls, client
 
 
-def assert_hosted_selects_v0(family: str, version: str, class_name: str, hosted_url_attr: str):
+def assert_hosted_selects_v0(
+    family: str, version: str, class_name: str, hosted_url_attr: str
+):
     module, block_cls = _load_block(family, version, class_name)
     client_cls, client = _invoke_run_remotely(module, block_cls, "hosted")
     assert client_cls.call_args.kwargs["api_url"] == getattr(module, hosted_url_attr)
diff --git a/tests/workflows/unit_tests/core_steps/models/roboflow/instance_segmentation/test_v4.py b/tests/workflows/unit_tests/core_steps/models/roboflow/instance_segmentation/test_v4.py
index 9e5c8b4b5c..c60c05f785 100644
--- a/tests/workflows/unit_tests/core_steps/models/roboflow/instance_segmentation/test_v4.py
+++ b/tests/workflows/unit_tests/core_steps/models/roboflow/instance_segmentation/test_v4.py
@@ -47,7 +47,9 @@ def test_instance_segmentation_model_v4_validation_when_required_field_is_not_gi
         _ = BlockManifest.model_validate(data)
 
 
-def test_instance_segmentation_model_v4_predictions_output_advertises_rle_kind_first() -> None:
+def test_instance_segmentation_model_v4_predictions_output_advertises_rle_kind_first() -> (
+    None
+):
     outputs = {o.name: o for o in BlockManifest.describe_outputs()}
 
     predictions_kinds = outputs["predictions"].kind
diff --git a/tests/workflows/unit_tests/core_steps/sinks/roboflow/asset_library_attributes/test_v1.py b/tests/workflows/unit_tests/core_steps/sinks/roboflow/asset_library_attributes/test_v1.py
index 81a535a1db..c2041305bd 100644
--- a/tests/workflows/unit_tests/core_steps/sinks/roboflow/asset_library_attributes/test_v1.py
+++ b/tests/workflows/unit_tests/core_steps/sinks/roboflow/asset_library_attributes/test_v1.py
@@ -39,9 +39,7 @@ def mocked_v1():
     with (
         mock.patch.object(v1, "get_workspace_name") as workspace_mock,
         mock.patch.object(v1, "batch_update_image_metadata_at_roboflow") as batch_mock,
-        mock.patch.object(
-            v1, "update_image_metadata_at_roboflow"
-        ) as single_mock,
+        mock.patch.object(v1, "update_image_metadata_at_roboflow") as single_mock,
     ):
         workspace_mock.return_value = "my-workspace"
         batch_mock.return_value = {"taskId": "task-123"}
diff --git a/tests/workflows/unit_tests/execution_engine/inner_workflow/test_dynamic_blocks_collection.py b/tests/workflows/unit_tests/execution_engine/inner_workflow/test_dynamic_blocks_collection.py
index 26dbfa9a23..6280fcdc47 100644
--- a/tests/workflows/unit_tests/execution_engine/inner_workflow/test_dynamic_blocks_collection.py
+++ b/tests/workflows/unit_tests/execution_engine/inner_workflow/test_dynamic_blocks_collection.py
@@ -3,12 +3,12 @@
 from typing import Any, Dict
 from unittest import mock
 
-from inference.core.workflows.execution_engine.v1.inner_workflow.constants import (
-    USE_INNER_WORKFLOW_BLOCK_TYPE,
-)
 from inference.core.workflows.execution_engine.v1.inner_workflow import (
     dynamic_blocks_collection,
 )
+from inference.core.workflows.execution_engine.v1.inner_workflow.constants import (
+    USE_INNER_WORKFLOW_BLOCK_TYPE,
+)
 from inference.core.workflows.execution_engine.v1.inner_workflow.dynamic_blocks_collection import (
     apply_collected_dynamic_blocks_definitions_to_workflow_root,
     collect_dynamic_blocks_definitions_from_workflow_definition,

From efb0ec4beff249a1a1577694cfd12a50589a6cbc Mon Sep 17 00:00:00 2001
From: Lee Clement <lee@roboflow.com>
Date: Fri, 5 Jun 2026 13:29:03 -0230
Subject: [PATCH 61/76] feat(yolo26-sem): support fine-tuned models + binary
 head (#2407)

---
 inference/models/utils.py                     | 14 ++-
 inference_models/docs/changelog.md            |  1 +
 .../models/common/roboflow/model_packages.py  | 23 -----
 .../models/common/roboflow/post_processing.py | 28 +++++-
 .../common/roboflow/semantic_segmentation.py  | 49 ++++++++++
 .../deep_lab_v3_plus_segmentation_onnx.py     |  4 +
 .../deep_lab_v3_plus_segmentation_torch.py    |  4 +
 .../deep_lab_v3_plus_segmentation_trt.py      |  6 +-
 .../yolo26_semantic_segmentation_onnx.py      |  6 +-
 ...lo26_semantic_segmentation_torch_script.py |  4 +
 .../yolo26_semantic_segmentation_trt.py       |  6 +-
 inference_models/pyproject.toml               |  2 +-
 .../common/roboflow/test_model_packages.py    | 23 -----
 .../roboflow/test_semantic_segmentation.py    | 56 ++++++++++++
 .../test_yolo26_semantic_segmentation.py      | 91 +++++++++++++++++++
 inference_models/uv.lock                      |  2 +-
 requirements/requirements.cpu.txt             |  2 +-
 requirements/requirements.gpu.txt             |  2 +-
 requirements/requirements.jetson.txt          |  2 +-
 requirements/requirements.vino.txt            |  2 +-
 20 files changed, 265 insertions(+), 62 deletions(-)
 create mode 100644 inference_models/inference_models/models/common/roboflow/semantic_segmentation.py
 create mode 100644 inference_models/tests/unit_tests/models/common/roboflow/test_semantic_segmentation.py

diff --git a/inference/models/utils.py b/inference/models/utils.py
index 4ee4516e71..9ea9b8d5c8 100644
--- a/inference/models/utils.py
+++ b/inference/models/utils.py
@@ -992,9 +992,17 @@ def get_roboflow_model(*args, **kwargs):
 
     # YOLO26 semantic segmentation is inference_models-only (no legacy implementation),
     # so we add entries directly rather than swapping existing ones.
-    ROBOFLOW_MODEL_TYPES[("semantic-segmentation", "yolo26")] = (
-        InferenceModelsSemanticSegmentationAdapter
-    )
+    for variant in [
+        "yolo26",
+        "yolo26n-sem",
+        "yolo26s-sem",
+        "yolo26m-sem",
+        "yolo26l-sem",
+        "yolo26x-sem",
+    ]:
+        ROBOFLOW_MODEL_TYPES[("semantic-segmentation", variant)] = (
+            InferenceModelsSemanticSegmentationAdapter
+        )
 
     # RFDETR keypoint detection is inference_models-only (no legacy implementation),
     # so we add entries directly rather than swapping existing ones.
diff --git a/inference_models/docs/changelog.md b/inference_models/docs/changelog.md
index 4487819615..18456f6a25 100644
--- a/inference_models/docs/changelog.md
+++ b/inference_models/docs/changelog.md
@@ -3,6 +3,7 @@
 ## `0.29.0`
 
 - Added RF-DETR preview keypoint support (ONNX backend).
+- Added support for fine-tuned YOLO26 semantic segmentation models.
 
 ## `0.28.7`
 
diff --git a/inference_models/inference_models/models/common/roboflow/model_packages.py b/inference_models/inference_models/models/common/roboflow/model_packages.py
index cc45a31f25..f09d9d0b48 100644
--- a/inference_models/inference_models/models/common/roboflow/model_packages.py
+++ b/inference_models/inference_models/models/common/roboflow/model_packages.py
@@ -28,29 +28,6 @@ def parse_class_names_file(class_names_path: str) -> List[str]:
         ) from error
 
 
-def resolve_background_class_id(class_names: List[str]) -> int:
-    """Return the index of the `background` class for a semantic-segmentation
-    package.
-
-    Roboflow semantic segmentation maps every pixel to a class id, with one
-    class reserved for background (sub-threshold / unlabeled pixels). A valid,
-    in-range background id is required - a negative sentinel would alias a real
-    class via negative indexing in downstream consumers (`class_names[-1]`,
-    palette LUTs, the 0=background platform convention), silently corrupting
-    the segmentation map. Packages must therefore declare a `background` class.
-    """
-    try:
-        return [c.lower() for c in class_names].index("background")
-    except ValueError as error:
-        raise CorruptedModelPackageError(
-            message="Semantic segmentation model package does not define a `background` class in "
-            "`class_names.txt`. A background class is required so that sub-threshold pixels map to a "
-            "valid class id. If you created the model package manually, prepend `background` to the class "
-            "names. If the weights are hosted on the Roboflow platform - contact support.",
-            help_url="https://inference-models.roboflow.com/errors/model-loading/#corruptedmodelpackageerror",
-        ) from error
-
-
 PADDING_VALUES_MAPPING = {
     "black edges": 0,
     "grey edges": 127,
diff --git a/inference_models/inference_models/models/common/roboflow/post_processing.py b/inference_models/inference_models/models/common/roboflow/post_processing.py
index a92d4242f8..56dc81c65f 100644
--- a/inference_models/inference_models/models/common/roboflow/post_processing.py
+++ b/inference_models/inference_models/models/common/roboflow/post_processing.py
@@ -15,6 +15,9 @@
     PreProcessingMetadata,
     StaticCropOffset,
 )
+from inference_models.models.common.roboflow.semantic_segmentation import (
+    insert_background_class,
+)
 from inference_models.weights_providers.entities import RecommendedParameters
 
 
@@ -745,6 +748,11 @@ def post_process_semantic_segmentation_logits(
     softmax over classes → argmax → place into original-image canvas if
     static_crop was applied → apply per-class confidence threshold
     (sub-threshold pixels collapse to background_class_id).
+
+    Single-channel (K==1) outputs are the Ultralytics binary (``nc==1``) head:
+    instead of softmax+argmax, the sigmoid foreground probability is used and the
+    lone foreground class is read from ``class_names`` (``[background, <fg>]``).
+    Sub-threshold pixels collapse to background via the same threshold step.
     """
     confidence_filter = ConfidenceFilter(
         confidence=confidence,
@@ -809,10 +817,22 @@ def post_process_semantic_segmentation_logits(
                 ],
                 interpolation=functional.InterpolationMode.BILINEAR,
             )
-        image_results = torch.nn.functional.softmax(image_results, dim=0)
-        image_confidence, image_class_ids = torch.max(image_results, dim=0)
-        if len(class_names) == image_results.shape[0] + 1:
-            image_class_ids = image_class_ids + 1
+        if image_results.shape[0] == 1:
+            image_confidence = image_results[0].sigmoid()
+            image_class_ids = insert_background_class(
+                torch.zeros_like(image_confidence, dtype=torch.long),
+                background_class_id=background_class_id,
+                num_classes=len(class_names),
+            )
+        else:
+            image_results = torch.nn.functional.softmax(image_results, dim=0)
+            image_confidence, image_class_ids = torch.max(image_results, dim=0)
+            if len(class_names) == image_results.shape[0] + 1:
+                image_class_ids = insert_background_class(
+                    image_class_ids,
+                    background_class_id=background_class_id,
+                    num_classes=len(class_names),
+                )
         if (
             image_metadata.static_crop_offset.offset_x > 0
             or image_metadata.static_crop_offset.offset_y > 0
diff --git a/inference_models/inference_models/models/common/roboflow/semantic_segmentation.py b/inference_models/inference_models/models/common/roboflow/semantic_segmentation.py
new file mode 100644
index 0000000000..f55433aa08
--- /dev/null
+++ b/inference_models/inference_models/models/common/roboflow/semantic_segmentation.py
@@ -0,0 +1,49 @@
+"""Semantic-segmentation class-name helpers shared by the DeepLabV3+ and
+YOLO26-sem backends (kept out of the generic `model_packages` module)."""
+
+from typing import List
+
+import torch
+
+from inference_models.errors import CorruptedModelPackageError
+
+
+def validate_class_names(class_names: List[str]) -> None:
+    """Require a `background` class plus >= 1 foreground class, raising otherwise.
+
+    Call once at model load so downstream helpers can assume the precondition.
+    """
+    if "background" not in [c.lower() for c in class_names]:
+        raise CorruptedModelPackageError(
+            message="Semantic segmentation model package does not define a `background` class in "
+            "`class_names.txt`. A background class is required so that sub-threshold pixels map to a "
+            "valid class id. If you created the model package manually, prepend `background` to the class "
+            "names. If the weights are hosted on the Roboflow platform - contact support.",
+            help_url="https://inference-models.roboflow.com/errors/model-loading/#corruptedmodelpackageerror",
+        )
+    if len(class_names) < 2:
+        raise CorruptedModelPackageError(
+            message="Semantic segmentation model package must define `background` plus at least one "
+            f"foreground class, but `class_names.txt` only contains {class_names}. If you created the "
+            "model package manually, ensure it lists `background` followed by the foreground class(es). "
+            "If the weights are hosted on the Roboflow platform - contact support.",
+            help_url="https://inference-models.roboflow.com/errors/model-loading/#corruptedmodelpackageerror",
+        )
+
+
+def resolve_background_class_id(class_names: List[str]) -> int:
+    """Index of the `background` class; assumes a `validate_class_names`-checked package."""
+    return [c.lower() for c in class_names].index("background")
+
+
+def insert_background_class(
+    class_ids: torch.Tensor, *, background_class_id: int, num_classes: int
+) -> torch.Tensor:
+    """Map foreground-channel indices (``0..K-1``) to full class ids, skipping the
+    background slot. ``num_classes`` is the full class count (``K + 1``)."""
+    foreground_ids = torch.tensor(
+        [i for i in range(num_classes) if i != background_class_id],
+        device=class_ids.device,
+        dtype=class_ids.dtype,
+    )
+    return foreground_ids[class_ids]
diff --git a/inference_models/inference_models/models/deep_lab_v3_plus/deep_lab_v3_plus_segmentation_onnx.py b/inference_models/inference_models/models/deep_lab_v3_plus/deep_lab_v3_plus_segmentation_onnx.py
index 58b2eb4026..f028171016 100644
--- a/inference_models/inference_models/models/deep_lab_v3_plus/deep_lab_v3_plus_segmentation_onnx.py
+++ b/inference_models/inference_models/models/deep_lab_v3_plus/deep_lab_v3_plus_segmentation_onnx.py
@@ -30,7 +30,10 @@
     ResizeMode,
     parse_class_names_file,
     parse_inference_config,
+)
+from inference_models.models.common.roboflow.semantic_segmentation import (
     resolve_background_class_id,
+    validate_class_names,
 )
 from inference_models.models.common.roboflow.post_processing import (
     post_process_semantic_segmentation_logits,
@@ -94,6 +97,7 @@ def from_pretrained(
         class_names = parse_class_names_file(
             class_names_path=model_package_content["class_names.txt"]
         )
+        validate_class_names(class_names)
         background_class_id = resolve_background_class_id(class_names)
         inference_config = parse_inference_config(
             config_path=model_package_content["inference_config.json"],
diff --git a/inference_models/inference_models/models/deep_lab_v3_plus/deep_lab_v3_plus_segmentation_torch.py b/inference_models/inference_models/models/deep_lab_v3_plus/deep_lab_v3_plus_segmentation_torch.py
index 2fde6b75e8..9838b168fd 100644
--- a/inference_models/inference_models/models/deep_lab_v3_plus/deep_lab_v3_plus_segmentation_torch.py
+++ b/inference_models/inference_models/models/deep_lab_v3_plus/deep_lab_v3_plus_segmentation_torch.py
@@ -23,7 +23,10 @@
     ResizeMode,
     parse_class_names_file,
     parse_inference_config,
+)
+from inference_models.models.common.roboflow.semantic_segmentation import (
     resolve_background_class_id,
+    validate_class_names,
 )
 from inference_models.models.common.roboflow.post_processing import (
     post_process_semantic_segmentation_logits,
@@ -57,6 +60,7 @@ def from_pretrained(
         class_names = parse_class_names_file(
             class_names_path=model_package_content["class_names.txt"]
         )
+        validate_class_names(class_names)
         background_class_id = resolve_background_class_id(class_names)
         inference_config = parse_inference_config(
             config_path=model_package_content["inference_config.json"],
diff --git a/inference_models/inference_models/models/deep_lab_v3_plus/deep_lab_v3_plus_segmentation_trt.py b/inference_models/inference_models/models/deep_lab_v3_plus/deep_lab_v3_plus_segmentation_trt.py
index 50aef54981..5c870ec423 100644
--- a/inference_models/inference_models/models/deep_lab_v3_plus/deep_lab_v3_plus_segmentation_trt.py
+++ b/inference_models/inference_models/models/deep_lab_v3_plus/deep_lab_v3_plus_segmentation_trt.py
@@ -32,9 +32,12 @@
     TRTConfig,
     parse_class_names_file,
     parse_inference_config,
-    resolve_background_class_id,
     parse_trt_config,
 )
+from inference_models.models.common.roboflow.semantic_segmentation import (
+    resolve_background_class_id,
+    validate_class_names,
+)
 from inference_models.models.common.roboflow.post_processing import (
     post_process_semantic_segmentation_logits,
 )
@@ -110,6 +113,7 @@ def from_pretrained(
         class_names = parse_class_names_file(
             class_names_path=model_package_content["class_names.txt"]
         )
+        validate_class_names(class_names)
         background_class_id = resolve_background_class_id(class_names)
         inference_config = parse_inference_config(
             config_path=model_package_content["inference_config.json"],
diff --git a/inference_models/inference_models/models/yolo26/yolo26_semantic_segmentation_onnx.py b/inference_models/inference_models/models/yolo26/yolo26_semantic_segmentation_onnx.py
index 5b823b36fc..aafa87330d 100644
--- a/inference_models/inference_models/models/yolo26/yolo26_semantic_segmentation_onnx.py
+++ b/inference_models/inference_models/models/yolo26/yolo26_semantic_segmentation_onnx.py
@@ -28,9 +28,12 @@
     PreProcessingMetadata,
     ResizeMode,
     parse_class_names_file,
-    resolve_background_class_id,
     parse_inference_config,
 )
+from inference_models.models.common.roboflow.semantic_segmentation import (
+    resolve_background_class_id,
+    validate_class_names,
+)
 from inference_models.models.common.roboflow.post_processing import (
     post_process_semantic_segmentation_logits,
 )
@@ -99,6 +102,7 @@ def from_pretrained(
         class_names = parse_class_names_file(
             class_names_path=model_package_content["class_names.txt"]
         )
+        validate_class_names(class_names)
         background_class_id = resolve_background_class_id(class_names)
         inference_config = parse_inference_config(
             config_path=model_package_content["inference_config.json"],
diff --git a/inference_models/inference_models/models/yolo26/yolo26_semantic_segmentation_torch_script.py b/inference_models/inference_models/models/yolo26/yolo26_semantic_segmentation_torch_script.py
index 2775ccb848..67f9e48a58 100644
--- a/inference_models/inference_models/models/yolo26/yolo26_semantic_segmentation_torch_script.py
+++ b/inference_models/inference_models/models/yolo26/yolo26_semantic_segmentation_torch_script.py
@@ -22,7 +22,10 @@
     ResizeMode,
     parse_class_names_file,
     parse_inference_config,
+)
+from inference_models.models.common.roboflow.semantic_segmentation import (
     resolve_background_class_id,
+    validate_class_names,
 )
 from inference_models.models.common.roboflow.post_processing import (
     post_process_semantic_segmentation_logits,
@@ -61,6 +64,7 @@ def from_pretrained(
         class_names = parse_class_names_file(
             class_names_path=model_package_content["class_names.txt"]
         )
+        validate_class_names(class_names)
         background_class_id = resolve_background_class_id(class_names)
         inference_config = parse_inference_config(
             config_path=model_package_content["inference_config.json"],
diff --git a/inference_models/inference_models/models/yolo26/yolo26_semantic_segmentation_trt.py b/inference_models/inference_models/models/yolo26/yolo26_semantic_segmentation_trt.py
index cb2f70fc75..244d264a3c 100644
--- a/inference_models/inference_models/models/yolo26/yolo26_semantic_segmentation_trt.py
+++ b/inference_models/inference_models/models/yolo26/yolo26_semantic_segmentation_trt.py
@@ -32,9 +32,12 @@
     TRTConfig,
     parse_class_names_file,
     parse_inference_config,
-    resolve_background_class_id,
     parse_trt_config,
 )
+from inference_models.models.common.roboflow.semantic_segmentation import (
+    resolve_background_class_id,
+    validate_class_names,
+)
 from inference_models.models.common.roboflow.post_processing import (
     post_process_semantic_segmentation_logits,
 )
@@ -110,6 +113,7 @@ def from_pretrained(
         class_names = parse_class_names_file(
             class_names_path=model_package_content["class_names.txt"]
         )
+        validate_class_names(class_names)
         background_class_id = resolve_background_class_id(class_names)
         inference_config = parse_inference_config(
             config_path=model_package_content["inference_config.json"],
diff --git a/inference_models/pyproject.toml b/inference_models/pyproject.toml
index 6ddf576025..b3e948ae10 100644
--- a/inference_models/pyproject.toml
+++ b/inference_models/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "inference-models"
-version = "0.29.0-rc3"
+version = "0.29.0"
 description = "The new inference engine for Computer Vision models"
 readme = "README.md"
 requires-python = ">=3.10,<3.13"
diff --git a/inference_models/tests/unit_tests/models/common/roboflow/test_model_packages.py b/inference_models/tests/unit_tests/models/common/roboflow/test_model_packages.py
index 1cef26bf35..c7c7ad6cdd 100644
--- a/inference_models/tests/unit_tests/models/common/roboflow/test_model_packages.py
+++ b/inference_models/tests/unit_tests/models/common/roboflow/test_model_packages.py
@@ -20,7 +20,6 @@
     parse_inference_config,
     parse_key_points_metadata,
     parse_trt_config,
-    resolve_background_class_id,
 )
 
 
@@ -271,28 +270,6 @@ def test_parse_class_names_file_when_valid_config_provided(
     assert result == ["some", "other", "yet-another"]
 
 
-def test_resolve_background_class_id_when_background_present() -> None:
-    # when
-    result = resolve_background_class_id(["background", "road", "sidewalk"])
-
-    # then
-    assert result == 0
-
-
-def test_resolve_background_class_id_is_case_insensitive() -> None:
-    # when
-    result = resolve_background_class_id(["road", "Background", "sidewalk"])
-
-    # then
-    assert result == 1
-
-
-def test_resolve_background_class_id_when_background_absent() -> None:
-    # when
-    with pytest.raises(CorruptedModelPackageError):
-        _ = resolve_background_class_id(["road", "sidewalk", "building"])
-
-
 def test_parse_inference_config_when_path_does_not_exists() -> None:
     # when
     with pytest.raises(CorruptedModelPackageError):
diff --git a/inference_models/tests/unit_tests/models/common/roboflow/test_semantic_segmentation.py b/inference_models/tests/unit_tests/models/common/roboflow/test_semantic_segmentation.py
new file mode 100644
index 0000000000..afe51fea6c
--- /dev/null
+++ b/inference_models/tests/unit_tests/models/common/roboflow/test_semantic_segmentation.py
@@ -0,0 +1,56 @@
+import pytest
+import torch
+
+from inference_models.errors import CorruptedModelPackageError
+from inference_models.models.common.roboflow.semantic_segmentation import (
+    insert_background_class,
+    resolve_background_class_id,
+    validate_class_names,
+)
+
+
+def test_resolve_background_class_id_when_background_present() -> None:
+    assert resolve_background_class_id(["background", "road", "sidewalk"]) == 0
+
+
+def test_resolve_background_class_id_is_case_insensitive() -> None:
+    assert resolve_background_class_id(["road", "Background", "sidewalk"]) == 1
+
+
+def test_validate_class_names_with_minimal_binary_class_list() -> None:
+    validate_class_names(["background", "object"])  # does not raise
+
+
+def test_validate_class_names_with_multiclass() -> None:
+    validate_class_names(["background", "road", "sidewalk"])  # does not raise
+
+
+def test_validate_class_names_when_background_absent() -> None:
+    with pytest.raises(CorruptedModelPackageError):
+        validate_class_names(["road", "sidewalk", "building"])
+
+
+def test_validate_class_names_when_no_foreground_class() -> None:
+    with pytest.raises(CorruptedModelPackageError):
+        validate_class_names(["background"])
+
+
+def test_insert_background_class_when_background_first() -> None:
+    out = insert_background_class(
+        torch.tensor([0, 1, 2]), background_class_id=0, num_classes=4
+    )
+    assert out.tolist() == [1, 2, 3]
+
+
+def test_insert_background_class_when_background_not_first() -> None:
+    out = insert_background_class(
+        torch.tensor([0, 1, 2]), background_class_id=1, num_classes=4
+    )
+    assert out.tolist() == [0, 2, 3]
+
+
+def test_insert_background_class_binary_single_foreground() -> None:
+    out = insert_background_class(
+        torch.zeros(3, dtype=torch.long), background_class_id=0, num_classes=2
+    )
+    assert out.tolist() == [1, 1, 1]
diff --git a/inference_models/tests/unit_tests/models/test_yolo26_semantic_segmentation.py b/inference_models/tests/unit_tests/models/test_yolo26_semantic_segmentation.py
index aea8860769..e04c0d9275 100644
--- a/inference_models/tests/unit_tests/models/test_yolo26_semantic_segmentation.py
+++ b/inference_models/tests/unit_tests/models/test_yolo26_semantic_segmentation.py
@@ -114,6 +114,97 @@ def test_post_process_semantic_segmentation_logits_shifts_when_class_names_prepe
     assert torch.all(results[0].segmentation_map == 3)
 
 
+def _binary_meta(h, w):
+    return [
+        MagicMock(
+            inference_size=MagicMock(height=h, width=w),
+            pad_top=0,
+            pad_bottom=0,
+            pad_left=0,
+            pad_right=0,
+            size_after_pre_processing=MagicMock(height=h, width=w),
+            original_size=MagicMock(height=h, width=w),
+            static_crop_offset=MagicMock(offset_x=0, offset_y=0),
+        )
+    ]
+
+
+def test_post_process_semantic_segmentation_logits_binary_single_channel_foreground():
+    """A single-channel (Ultralytics nc==1) output uses the sigmoid foreground
+    probability — high-positive logits map to the lone foreground class, not the
+    degenerate softmax-over-one-channel."""
+    from inference_models.models.common.roboflow.post_processing import (
+        post_process_semantic_segmentation_logits,
+    )
+
+    h, w = 6, 6
+    logits = torch.full((1, 1, h, w), 5.0)  # sigmoid ~ 0.993 -> foreground
+
+    results = post_process_semantic_segmentation_logits(
+        model_results=logits,
+        pre_processing_meta=_binary_meta(h, w),
+        class_names=["background", "object"],
+        background_class_id=0,
+        device=torch.device("cpu"),
+        confidence=0.5,
+        recommended_parameters=None,
+        default_confidence=0.5,
+    )
+
+    assert results[0].segmentation_map.shape == (h, w)
+    assert torch.all(results[0].segmentation_map == 1)  # lone foreground class id
+    assert torch.all(results[0].confidence > 0.99)
+
+
+def test_post_process_semantic_segmentation_logits_binary_sub_threshold_to_background():
+    """Single-channel logits with low sigmoid confidence collapse to background."""
+    from inference_models.models.common.roboflow.post_processing import (
+        post_process_semantic_segmentation_logits,
+    )
+
+    h, w = 4, 4
+    logits = torch.full((1, 1, h, w), -5.0)  # sigmoid ~ 0.0067 -> below threshold
+
+    results = post_process_semantic_segmentation_logits(
+        model_results=logits,
+        pre_processing_meta=_binary_meta(h, w),
+        class_names=["background", "object"],
+        background_class_id=0,
+        device=torch.device("cpu"),
+        confidence=0.5,
+        recommended_parameters=None,
+        default_confidence=0.5,
+    )
+
+    assert torch.all(results[0].segmentation_map == 0)  # background
+    assert torch.all(results[0].confidence == 0.0)
+
+
+def test_post_process_semantic_segmentation_logits_maps_channels_when_background_not_first():
+    """K foreground channels with background NOT at index 0: channel j maps to
+    the j-th non-background class id, not a blanket +1."""
+    from inference_models.models.common.roboflow.post_processing import (
+        post_process_semantic_segmentation_logits,
+    )
+
+    h, w, num_channels = 6, 6, 3
+    logits = torch.zeros(1, num_channels, h, w)
+    logits[0, 0] = 5.0  # channel 0 dominant
+
+    results = post_process_semantic_segmentation_logits(
+        model_results=logits,
+        pre_processing_meta=_binary_meta(h, w),
+        class_names=["a", "background", "b", "c"],
+        background_class_id=1,
+        device=torch.device("cpu"),
+        confidence=0.0,
+        recommended_parameters=None,
+        default_confidence=0.0,
+    )
+
+    assert torch.all(results[0].segmentation_map == 0)
+
+
 def test_yolo26_semantic_segmentation_registered():
     from inference_models.models.auto_loaders.entities import BackendType
     from inference_models.models.auto_loaders.models_registry import (
diff --git a/inference_models/uv.lock b/inference_models/uv.lock
index ecff7942cd..19ff414d2a 100644
--- a/inference_models/uv.lock
+++ b/inference_models/uv.lock
@@ -913,7 +913,7 @@ wheels = [
 
 [[package]]
 name = "inference-models"
-version = "0.29.0rc3"
+version = "0.29.0"
 source = { virtual = "." }
 dependencies = [
     { name = "accelerate" },
diff --git a/requirements/requirements.cpu.txt b/requirements/requirements.cpu.txt
index 88a212affa..cfffd8f82c 100644
--- a/requirements/requirements.cpu.txt
+++ b/requirements/requirements.cpu.txt
@@ -1,3 +1,3 @@
 onnxruntime>=1.15.1,<1.22.0
 nvidia-ml-py<13.0.0
-inference-models[torch-cpu,onnx-cpu]~=0.29.0rc3  # keep in sync between requirements.gpu.txt, requirements.cpu.txt, requirements.vino.txt
+inference-models[torch-cpu,onnx-cpu]~=0.29.0  # keep in sync between requirements.gpu.txt, requirements.cpu.txt, requirements.vino.txt
diff --git a/requirements/requirements.gpu.txt b/requirements/requirements.gpu.txt
index 1c85c7d48a..87753809cb 100644
--- a/requirements/requirements.gpu.txt
+++ b/requirements/requirements.gpu.txt
@@ -1,2 +1,2 @@
 onnxruntime-gpu>=1.15.1,<1.22.0
-inference-models[torch-cu124,onnx-cu12]~=0.29.0rc3 # keep in sync between requirements.jetson requirements.gpu.txt, requirements.cpu.txt, requirements.vino.txt
+inference-models[torch-cu124,onnx-cu12]~=0.29.0 # keep in sync between requirements.jetson requirements.gpu.txt, requirements.cpu.txt, requirements.vino.txt
diff --git a/requirements/requirements.jetson.txt b/requirements/requirements.jetson.txt
index 92ce0fc514..9246addbf3 100644
--- a/requirements/requirements.jetson.txt
+++ b/requirements/requirements.jetson.txt
@@ -1,4 +1,4 @@
 pypdfium2>=4.11.0,<5.0.0
 jupyterlab>=4.3.0,<5.0.0
 PyYAML~=6.0.0
-inference-models~=0.29.0rc3  # keep in sync between requirements.jetson requirements.gpu.txt, requirements.cpu.txt, requirements.vino.txt
+inference-models~=0.29.0  # keep in sync between requirements.jetson requirements.gpu.txt, requirements.cpu.txt, requirements.vino.txt
diff --git a/requirements/requirements.vino.txt b/requirements/requirements.vino.txt
index 1adf27d5e2..ebdac73aef 100644
--- a/requirements/requirements.vino.txt
+++ b/requirements/requirements.vino.txt
@@ -1,2 +1,2 @@
 onnxruntime-openvino>=1.15.0,<1.22.0
-inference-models[torch-cpu]~=0.29.0rc3  # keep in sync between requirements.jetson requirements.gpu.txt, requirements.cpu.txt, requirements.vino.txt
+inference-models[torch-cpu]~=0.29.0  # keep in sync between requirements.jetson requirements.gpu.txt, requirements.cpu.txt, requirements.vino.txt

From 536c8e405717909a64301e00efba9323400c302b Mon Sep 17 00:00:00 2001
From: Nathan Marraccini <119442842+nathan-marraccini@users.noreply.github.com>
Date: Fri, 5 Jun 2026 12:39:59 -0400
Subject: [PATCH 62/76] GLM-OCR block: accept selector for task_type (#2409)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

task_type was a bare Literal enum, so Pydantic rejected any selector
reference outright — only literal modes worked. This blocks the platform's
fork-with-input_defaults flow from exposing the preset recognition modes
(text/table/formula/structured) through a dynamic $inputs.task_type; the
modal could only offer Custom Prompt. (prompt already accepts selectors.)

Widen task_type to Union[Selector(kind=[STRING_KIND]), TaskType], keeping
the TaskType literal so the UI dropdown still renders all preset modes and
invalid literals are still rejected at parse time. This mirrors the
Literal+Selector pattern already used by analytics blocks (triggering_anchor
in time_in_zone/line_counter/path_deviation) and lmm_type in the LMM blocks.

Because a dynamic task_type isn't known until execution, the static
model_validator can no longer evaluate the "custom requires prompt" /
"structured-answering requires output_structure" rules for that case, so:
  - validate_prompt skips those checks when task_type is a selector
  - _resolve_prompt enforces them at runtime and raises a clear error for an
    unsupported resolved value (previously a bare KeyError)

Literal task_type behaviour is unchanged. Adds manifest + runtime unit tests.

Co-authored-by: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
Co-authored-by: Paweł Pęczek <146137186+PawelPeczek-Roboflow@users.noreply.github.com>
---
 .../models/foundation/glm_ocr/v1.py           | 31 ++++++-
 .../models/foundation/test_glm_ocr.py         | 89 +++++++++++++++++++
 2 files changed, 118 insertions(+), 2 deletions(-)
 create mode 100644 tests/workflows/unit_tests/core_steps/models/foundation/test_glm_ocr.py

diff --git a/inference/core/workflows/core_steps/models/foundation/glm_ocr/v1.py b/inference/core/workflows/core_steps/models/foundation/glm_ocr/v1.py
index b13590d2d4..72f9bc5669 100644
--- a/inference/core/workflows/core_steps/models/foundation/glm_ocr/v1.py
+++ b/inference/core/workflows/core_steps/models/foundation/glm_ocr/v1.py
@@ -85,6 +85,12 @@
 TASKS_REQUIRING_PROMPT = {"custom"}
 TASKS_REQUIRING_OUTPUT_STRUCTURE = {"structured-answering"}
 
+
+def _is_selector(value: object) -> bool:
+    """True if value is a workflow selector reference ($inputs.X / $steps.X.Y)."""
+    return isinstance(value, str) and value.startswith("$")
+
+
 LONG_DESCRIPTION = """
 Recognize text in images using GLM-OCR, a vision language model by Zhipu AI specialized
 for optical character recognition.
@@ -113,9 +119,10 @@
 class BlockManifest(WorkflowBlockManifest):
     images: Selector(kind=[IMAGE_KIND]) = ImageInputField
 
-    task_type: TaskType = Field(
+    task_type: Union[Selector(kind=[STRING_KIND]), TaskType] = Field(
         default="text-recognition",
-        description="Recognition task to perform. Determines the prompt sent to GLM-OCR.",
+        description="Recognition task to perform. Determines the prompt sent to GLM-OCR. "
+        "Accepts a selector (e.g. $inputs.task_type) so the mode can be set dynamically.",
         json_schema_extra={
             "values_metadata": TASKS_METADATA,
             "recommended_parsers": {
@@ -192,6 +199,12 @@ class BlockManifest(WorkflowBlockManifest):
 
     @model_validator(mode="after")
     def validate_prompt(self) -> "BlockManifest":
+        # When task_type is a selector (e.g. "$inputs.task_type"), its concrete
+        # value isn't known until execution, so the dependent-field requirements
+        # below can't be checked here — they are enforced at runtime in
+        # _resolve_prompt instead.
+        if _is_selector(self.task_type):
+            return self
         if self.task_type == "custom" and not self.prompt:
             raise ValueError("`prompt` is required when task_type is 'custom'.")
         if (
@@ -254,9 +267,23 @@ def _resolve_prompt(
     prompt: Optional[str],
     output_structure: Optional[Dict[str, str]],
 ) -> str:
+    # task_type may arrive from a selector ($inputs.task_type), so the value is
+    # only known here. Validate it and the fields it requires at runtime — the
+    # static manifest validator skips these checks for dynamic task_type.
+    if task_type not in TASK_TYPE_TO_PROMPT:
+        raise ValueError(
+            f"Unsupported GLM-OCR task_type '{task_type}'. Expected one of: "
+            f"{', '.join(sorted(TASK_TYPE_TO_PROMPT))}."
+        )
     if task_type == "custom":
+        if not prompt:
+            raise ValueError("`prompt` is required when task_type is 'custom'.")
         return prompt
     if task_type == "structured-answering":
+        if not output_structure:
+            raise ValueError(
+                "`output_structure` is required when task_type is 'structured-answering'."
+            )
         return STRUCTURED_ANSWERING_PROMPT_TEMPLATE.format(
             output_structure=json.dumps(output_structure, indent=4),
         )
diff --git a/tests/workflows/unit_tests/core_steps/models/foundation/test_glm_ocr.py b/tests/workflows/unit_tests/core_steps/models/foundation/test_glm_ocr.py
new file mode 100644
index 0000000000..92d7b66fc6
--- /dev/null
+++ b/tests/workflows/unit_tests/core_steps/models/foundation/test_glm_ocr.py
@@ -0,0 +1,89 @@
+"""Unit tests for GLM-OCR v1 block: dynamic (selector) task_type support."""
+
+import pytest
+
+from inference.core.workflows.core_steps.models.foundation.glm_ocr.v1 import (
+    BlockManifest,
+    _resolve_prompt,
+)
+
+BASE = {
+    "type": "roboflow_core/glm_ocr@v1",
+    "name": "my_glm_step",
+    "images": "$inputs.image",
+}
+
+
+# --- Manifest tests ---
+
+
+def test_manifest_accepts_literal_task_type():
+    result = BlockManifest.model_validate(
+        {**BASE, "task_type": "custom", "prompt": "Read the label."}
+    )
+    assert result.task_type == "custom"
+
+
+def test_manifest_accepts_selector_task_type():
+    """task_type must accept $inputs.X references for dynamic mode selection."""
+    result = BlockManifest.model_validate(
+        {**BASE, "task_type": "$inputs.task_type"}
+    )
+    assert result.task_type == "$inputs.task_type"
+
+
+def test_manifest_rejects_invalid_literal_task_type():
+    with pytest.raises(Exception):
+        BlockManifest.model_validate({**BASE, "task_type": "not-a-real-mode"})
+
+
+def test_manifest_defers_required_checks_for_selector_task_type():
+    """With a dynamic task_type we can't know it's 'custom', so a missing
+    prompt must NOT raise at parse time (it is enforced at runtime instead)."""
+    result = BlockManifest.model_validate(
+        {**BASE, "task_type": "$inputs.task_type"}
+    )
+    assert result.prompt is None
+
+
+def test_manifest_still_enforces_custom_requires_prompt_for_literal():
+    with pytest.raises(ValueError):
+        BlockManifest.model_validate({**BASE, "task_type": "custom"})
+
+
+def test_task_type_schema_exposes_selector_and_enum():
+    schema = BlockManifest.model_json_schema()
+    branches = schema["properties"]["task_type"]["anyOf"]
+    assert any(
+        b.get("reference") is True and "pattern" in b for b in branches
+    ), "task_type should expose a selector branch"
+    enum_values = {v for b in branches for v in b.get("enum", [])}
+    assert "custom" in enum_values and "text-recognition" in enum_values
+
+
+# --- Runtime resolution tests ---
+
+
+def test_resolve_prompt_preset_modes():
+    assert _resolve_prompt("text-recognition", None, None) == "Text Recognition:"
+
+
+def test_resolve_prompt_custom_returns_prompt():
+    assert _resolve_prompt("custom", "my prompt", None) == "my prompt"
+
+
+def test_resolve_prompt_custom_without_prompt_raises():
+    with pytest.raises(ValueError):
+        _resolve_prompt("custom", None, None)
+
+
+def test_resolve_prompt_structured_without_structure_raises():
+    with pytest.raises(ValueError):
+        _resolve_prompt("structured-answering", None, None)
+
+
+def test_resolve_prompt_unknown_task_type_raises():
+    """A selector that resolves to an unsupported value gives a clear error,
+    not a KeyError."""
+    with pytest.raises(ValueError, match="Unsupported GLM-OCR task_type"):
+        _resolve_prompt("garbage", None, None)

From 84ac5f0868cb24aa27986acac03febfdc492001d Mon Sep 17 00:00:00 2001
From: Lee Clement <lee@roboflow.com>
Date: Fri, 5 Jun 2026 14:11:11 -0230
Subject: [PATCH 63/76] docs(inference_models): add YOLO26 semantic
 segmentation model page (#2419)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Document the YOLO26 semantic-segmentation variant alongside the existing
yolo26 (object-detection / instance-segmentation / keypoint) pages: a new
models/yolo26-semantic-segmentation.md (license, public Cityscapes
yolo26{n,s,m,l,x}-sem-1024 checkpoints, onnx/torch-script/trt backends,
SemanticSegmentationResult output, usage example), plus nav + model-index
entries under Semantic Segmentation next to DeepLabV3+.

Co-authored-by: Claude Opus 4.8 <noreply@anthropic.com>
Co-authored-by: Paweł Pęczek <146137186+PawelPeczek-Roboflow@users.noreply.github.com>
---
 inference_models/docs/models/index.md         |   1 +
 .../models/yolo26-semantic-segmentation.md    | 103 ++++++++++++++++++
 inference_models/mkdocs.yml                   |   1 +
 3 files changed, 105 insertions(+)
 create mode 100644 inference_models/docs/models/yolo26-semantic-segmentation.md

diff --git a/inference_models/docs/models/index.md b/inference_models/docs/models/index.md
index c1dfe34274..41721892fc 100644
--- a/inference_models/docs/models/index.md
+++ b/inference_models/docs/models/index.md
@@ -66,6 +66,7 @@ The `inference-models` library supports a wide range of computer vision models a
 
 | Model | Backends | License | Commercial License in RF Plan | Pre-trained Weights | Trainable at RF |
 |-------|----------|---------|-------------------------------|---------------------|-----------------|
+| [YOLO26 Sem](yolo26-semantic-segmentation.md) | `onnx`, `torch-script`, `trt` | AGPL-3.0 | ✅ | ✅ | ✅ |
 | [DeepLabV3+](deeplabv3plus.md) | `torch`, `onnx`, `trt` | MIT | N/A | ❌ | ✅ |
 
 ### OCR & Document Parsing
diff --git a/inference_models/docs/models/yolo26-semantic-segmentation.md b/inference_models/docs/models/yolo26-semantic-segmentation.md
new file mode 100644
index 0000000000..851a1941df
--- /dev/null
+++ b/inference_models/docs/models/yolo26-semantic-segmentation.md
@@ -0,0 +1,103 @@
+# YOLO26 - Semantic Segmentation
+
+YOLO26 is the latest addition to the Ultralytics YOLO model series. The semantic segmentation variant assigns a class label to every pixel in an image, producing dense scene-level masks rather than per-object instances.
+
+## Overview
+
+YOLO26 for semantic segmentation pairs the efficient YOLO26 backbone with a dense per-pixel prediction head. Key features include:
+
+- **Per-pixel classification** - Every pixel is assigned a single class label.
+- **Efficient YOLO26 backbone** - Shares the NMS-free, DFL-free YOLO26 architecture for fast inference and broad edge compatibility.
+- **Cityscapes pre-trained checkpoints** - Public weights trained on the 19-class Cityscapes dataset, available across all model sizes.
+- **Binary and multi-class** - Supports a single foreground class (binary head) or many foreground classes, alongside an implicit background.
+- **Multiple model sizes** - From nano to extra-large variants.
+
+## License
+
+**AGPL-3.0**
+
+!!! info "Commercial Licensing"
+    - **AGPL-3.0**: Free for open-source projects. Requires derivative works to be open-sourced.
+    - **Paid Roboflow customers**: Automatically get access to use any YOLO26 models trained on or uploaded to the Roboflow platform for commercial use.
+    - **Free Roboflow customers**: Can use YOLO26 via the serverless hosted API, or commercially self-hosted with a paid plan.
+
+    Learn more: [Roboflow Licensing](https://roboflow.com/licensing) | [YOLO26 License Details](https://roboflow.com/model-licenses/yolo26)
+
+## Pre-trained Model IDs
+
+Public YOLO26 semantic segmentation checkpoints are trained on the **Cityscapes** dataset (19 classes) at 1024×1024 and are **open access** (no API key required).
+
+| Model Size | 1024×1024 |
+|------------|-----------|
+| Nano | `yolo26n-sem-1024` |
+| Small | `yolo26s-sem-1024` |
+| Medium | `yolo26m-sem-1024` |
+| Large | `yolo26l-sem-1024` |
+| Extra-Large | `yolo26x-sem-1024` |
+
+**Custom model ID format:** `project-url/version` (e.g., `my-project-abc123/2`)
+
+## Supported Backends
+
+| Backend | Extras Required |
+|---------|----------------|
+| `onnx` | `onnx-cpu`, `onnx-cu12`, `onnx-cu118`, `onnx-jp6-cu126` |
+| `torch-script` | `torch-cpu`, `torch-cu118`, `torch-cu124`, `torch-cu126`, `torch-cu128`, `torch-jp6-cu126` |
+| `trt` | `trt10` |
+
+## Roboflow Platform Compatibility
+
+| Feature | Supported |
+|---------|-----------|
+| **Training** | ✅ Train custom models on Roboflow |
+| **Upload Weights** | ✅ Upload pre-trained weights |
+| **Serverless API (v2)** | ✅ [Deploy via hosted API](https://docs.roboflow.com/deploy/serverless-hosted-api-v2) |
+| **Edge Deployment (Jetson)** | ✅ Deploy on NVIDIA Jetson devices |
+| **Self-Hosting** | ✅ Deploy with `inference-models` |
+
+## Usage Example
+
+```python
+import cv2
+import numpy as np
+from inference_models import AutoModel
+
+# Load a public Cityscapes checkpoint (open access, no API key required)
+model = AutoModel.from_pretrained("yolo26n-sem-1024")
+image = cv2.imread("path/to/image.jpg")
+
+# Run inference
+results = model(image)
+
+# Per-pixel class IDs and confidence
+seg_map = results[0].segmentation_map      # (H x W) class id per pixel
+confidence = results[0].confidence         # (H x W) per-pixel confidence
+
+# Colour the mask and overlay on the original image
+colors = np.random.randint(0, 255, size=(len(model.class_names), 3), dtype=np.uint8)
+colored_mask = colors[seg_map.cpu().numpy()]
+overlay = cv2.addWeighted(image, 0.5, colored_mask, 0.5, 0)
+cv2.imwrite("segmentation_result.jpg", overlay)
+```
+
+To run a custom-trained model, pass your `project-url/version` and a Roboflow API key:
+
+```python
+model = AutoModel.from_pretrained(
+    "my-project-abc123/2",
+    api_key="your_roboflow_api_key",
+)
+```
+
+## Output Format
+
+The model returns a list of `SemanticSegmentationResult` objects with:
+
+| Field | Type | Description |
+|-------|------|-------------|
+| `segmentation_map` | `torch.Tensor` | Class ID for each pixel (H x W) |
+| `confidence` | `torch.Tensor` | Confidence score for each pixel (H x W) |
+| `image_metadata` | `dict` | Optional metadata about the image |
+
+!!! note "Semantic vs Instance Segmentation"
+    Semantic segmentation labels every pixel with a class but does not separate individual objects (all cars share the "car" label). For per-object masks, counting, or tracking, use an instance segmentation model such as [YOLO26 - Instance Segmentation](yolo26-instance-segmentation.md).
diff --git a/inference_models/mkdocs.yml b/inference_models/mkdocs.yml
index bc5f8229c6..822a784b6e 100644
--- a/inference_models/mkdocs.yml
+++ b/inference_models/mkdocs.yml
@@ -85,6 +85,7 @@ nav:
           - CLIP: models/clip.md
           - Perception Encoder: models/perception-encoder.md
       - Semantic Segmentation:
+          - YOLO26: models/yolo26-semantic-segmentation.md
           - DeepLabV3+: models/deeplabv3plus.md
       - Open-Vocabulary Object Detection:
           - Grounding DINO: models/grounding-dino.md

From 277be9eba16ae51396263515cab5a2ad18b297d0 Mon Sep 17 00:00:00 2001
From: Nathan Marraccini <119442842+nathan-marraccini@users.noreply.github.com>
Date: Fri, 5 Jun 2026 12:42:41 -0400
Subject: [PATCH 64/76] Qwen3.5-VL block: accept selectors for prompt /
 system_prompt (#2408)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The prompt and system_prompt fields were declared as plain Optional[str],
so they only accepted literal strings — a workflow step referencing
$inputs.prompt would receive the literal text "$inputs.prompt" instead of
the resolved value. This blocks the platform's fork-with-input_defaults
flow from threading user-supplied prompts through (the same flow that
already works for SAM3's classes).

Widen both fields to Optional[Union[Selector(kind=[STRING_KIND]), str]],
matching the pattern already used by model_version here and by the GLM-OCR
prompt field. Literal strings still validate, so this is backwards
compatible and needs no new block version. run() is unchanged — the engine
resolves selectors before invoking it.

Adds manifest unit tests covering literal, $inputs, and $steps references
and asserting the schema advertises selector support (reference: true).

Co-authored-by: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
Co-authored-by: Paweł Pęczek <146137186+PawelPeczek-Roboflow@users.noreply.github.com>
---
 .../models/foundation/qwen3_5vl/v1.py         |  8 +--
 .../models/foundation/test_qwen3_5vl.py       | 59 +++++++++++++++++++
 2 files changed, 63 insertions(+), 4 deletions(-)
 create mode 100644 tests/workflows/unit_tests/core_steps/models/foundation/test_qwen3_5vl.py

diff --git a/inference/core/workflows/core_steps/models/foundation/qwen3_5vl/v1.py b/inference/core/workflows/core_steps/models/foundation/qwen3_5vl/v1.py
index dc9950fae0..70f70f396d 100644
--- a/inference/core/workflows/core_steps/models/foundation/qwen3_5vl/v1.py
+++ b/inference/core/workflows/core_steps/models/foundation/qwen3_5vl/v1.py
@@ -75,10 +75,10 @@ class BlockManifest(WorkflowBlockManifest):
     type: Literal["roboflow_core/qwen3_5vl@v1"]
 
     images: Selector(kind=[IMAGE_KIND]) = ImageInputField
-    prompt: Optional[str] = Field(
+    prompt: Optional[Union[Selector(kind=[STRING_KIND]), str]] = Field(
         default=None,
         description="Optional text prompt to provide additional context to Qwen3.5-VL. Otherwise it will just be a default one, which may affect the desired model behavior.",
-        examples=["What is in this image?"],
+        examples=["What is in this image?", "$inputs.prompt"],
     )
     model_version: Union[
         Literal["qwen3_5-0.8b", "qwen3_5-2b"],
@@ -90,10 +90,10 @@ class BlockManifest(WorkflowBlockManifest):
         examples=["qwen3_5-0.8b", "qwen3_5-2b"],
     )
 
-    system_prompt: Optional[str] = Field(
+    system_prompt: Optional[Union[Selector(kind=[STRING_KIND]), str]] = Field(
         default=None,
         description="Optional system prompt to provide additional context to Qwen3.5-VL.",
-        examples=["You are a helpful assistant."],
+        examples=["You are a helpful assistant.", "$inputs.system_prompt"],
     )
 
     enable_thinking: bool = Field(
diff --git a/tests/workflows/unit_tests/core_steps/models/foundation/test_qwen3_5vl.py b/tests/workflows/unit_tests/core_steps/models/foundation/test_qwen3_5vl.py
new file mode 100644
index 0000000000..e45f1711d6
--- /dev/null
+++ b/tests/workflows/unit_tests/core_steps/models/foundation/test_qwen3_5vl.py
@@ -0,0 +1,59 @@
+"""Unit tests for the Qwen3.5-VL v1 block manifest selector support."""
+
+from inference.core.workflows.core_steps.models.foundation.qwen3_5vl.v1 import (
+    BlockManifest,
+)
+
+BASE = {
+    "type": "roboflow_core/qwen3_5vl@v1",
+    "name": "my_qwen_step",
+    "images": "$inputs.image",
+}
+
+
+def test_manifest_accepts_literal_prompts():
+    """Literal strings must still validate (backwards compatibility)."""
+    result = BlockManifest.model_validate(
+        {**BASE, "prompt": "What is in this image?", "system_prompt": "Be terse."}
+    )
+    assert result.prompt == "What is in this image?"
+    assert result.system_prompt == "Be terse."
+
+
+def test_manifest_accepts_input_selector_prompts():
+    """prompt / system_prompt must accept $inputs.X references."""
+    result = BlockManifest.model_validate(
+        {
+            **BASE,
+            "prompt": "$inputs.prompt",
+            "system_prompt": "$inputs.system_prompt",
+        }
+    )
+    assert result.prompt == "$inputs.prompt"
+    assert result.system_prompt == "$inputs.system_prompt"
+
+
+def test_manifest_accepts_step_output_selector_prompt():
+    """prompt must accept $steps.X.Y references."""
+    result = BlockManifest.model_validate(
+        {**BASE, "prompt": "$steps.some_step.output"}
+    )
+    assert result.prompt == "$steps.some_step.output"
+
+
+def test_manifest_prompts_default_to_none():
+    """Both prompt fields remain optional."""
+    result = BlockManifest.model_validate(BASE)
+    assert result.prompt is None
+    assert result.system_prompt is None
+
+
+def test_prompt_fields_expose_selector_in_schema():
+    """Schema must advertise selector support (reference: true + pattern)."""
+    schema = BlockManifest.model_json_schema()
+    for field in ("prompt", "system_prompt"):
+        branches = schema["properties"][field]["anyOf"]
+        assert any(
+            branch.get("reference") is True and "pattern" in branch
+            for branch in branches
+        ), f"{field} should expose a selector branch in its schema"

From 0fa0718c9e2a823fd4cbce00732876f398926af7 Mon Sep 17 00:00:00 2001
From: Riaz Virani <riaz@roboflow.com>
Date: Fri, 5 Jun 2026 12:45:43 -0400
Subject: [PATCH 65/76] feat(workflows): add local event store mode to Vision
 Events block (ENT-1192) (#2402)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add a "Write to Local Event Store" toggle to the Roboflow Vision Events block so
one block serves both cloud (Serverless API) and edge deployments, removing the
need to swap between it and the enterprise Event Writer block.

When enabled, the block embeds images as base64 and POSTs to a local Event
Ingestion Service (<event store URL>/v2/events, default http://localhost:8001)
instead of the cloud Vision Events API. No Roboflow API key is required in this
mode; the local service is authenticated via EVENT_INGESTION_API_KEY.

- New manifest fields: write_to_event_store (advanced) and event_store_url
  (revealed via relevant_for when the toggle is on)
- Forwards `solution` and supports all event schemas incl. operator_feedback
- Adds event_id output: cloud returns the client-generated UUID, local parses
  the service-assigned id from the 201 response (mirrors Event Writer)
- Mirrors Event Writer 201/529 response handling for the shared endpoint
- requires_rf_key -> False so the block is configurable in the edge-embedded editor
- Event Writer block left unchanged

Validated end-to-end against a local Event Ingestion Service; 47 unit tests pass.

Co-authored-by: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
Co-authored-by: Paweł Pęczek <146137186+PawelPeczek-Roboflow@users.noreply.github.com>
---
 .../sinks/roboflow/vision_events/v1.py        | 315 ++++++++++++---
 .../sinks/roboflow/vision_events/test_v1.py   | 360 +++++++++++++++++-
 2 files changed, 621 insertions(+), 54 deletions(-)

diff --git a/inference/core/workflows/core_steps/sinks/roboflow/vision_events/v1.py b/inference/core/workflows/core_steps/sinks/roboflow/vision_events/v1.py
index 997a8abe7b..5109ed1aa1 100644
--- a/inference/core/workflows/core_steps/sinks/roboflow/vision_events/v1.py
+++ b/inference/core/workflows/core_steps/sinks/roboflow/vision_events/v1.py
@@ -1,3 +1,4 @@
+import os
 from concurrent.futures import ThreadPoolExecutor
 from datetime import datetime, timezone
 from functools import partial
@@ -98,6 +99,19 @@
    and custom metadata
 4. Supports fire-and-forget mode for non-blocking execution
 
+## Deployment Modes
+
+By default this block sends events to the **Roboflow Vision Events API** (cloud /
+Serverless API), uploading images and posting the event over the public API.
+
+For edge deployments, enable **Write to Local Event Store** to send events to a
+local Event Ingestion Service instead. In this mode images are embedded directly
+in the request (no upload step) and the event is posted to `<event store URL>/v2/events`.
+The event store URL defaults to `http://localhost:8001` and can be overridden. No
+Roboflow API key is required in this mode; if the local service requires
+authentication, set the `EVENT_INGESTION_API_KEY` environment variable on the
+inference server.
+
 ## Event Types
 
 - **quality_check**: Manufacturing/inspection QA with pass/fail result and optional confidence
@@ -108,8 +122,9 @@
 
 ## Requirements
 
-**API Key Required**: This block requires a valid Roboflow API key with `vision-events:write`
-scope. The API key must be configured in your environment or workflow configuration.
+The default (cloud) mode requires a valid Roboflow API key with `vision-events:write`
+scope, configured in your environment or workflow configuration. No Roboflow API key is
+needed when **Write to Local Event Store** is enabled (see Deployment Modes above).
 
 ## Common Use Cases
 
@@ -134,7 +149,11 @@ class BlockManifest(WorkflowBlockManifest):
                 "icon": "fal fa-eye",
                 "blockPriority": 1,
                 "popular": False,
-                "requires_rf_key": True,
+                # Not unconditionally required: the local event store mode needs no
+                # Roboflow key. A True value here walls off the whole block config in
+                # the inference-embedded (edge) editor, which would defeat that mode.
+                # The cloud path still raises at runtime if no key is available.
+                "requires_rf_key": False,
             },
         }
     )
@@ -354,11 +373,36 @@ class BlockManifest(WorkflowBlockManifest):
         description="If True, the block is disabled and no events are sent.",
         examples=[False, "$inputs.disable_vision_events"],
     )
+    write_to_event_store: Union[bool, Selector(kind=[BOOLEAN_KIND])] = Field(
+        default=False,
+        title="Write to Local Event Store",
+        description="If True, send the event to a local Event Ingestion Service "
+        "(edge deployment) instead of the Roboflow Vision Events API (cloud). "
+        "Images are embedded in the request and the event is posted to "
+        "`<Event Store URL>/v2/events`. No Roboflow API key is required in this mode.",
+        examples=[False, True, "$inputs.write_to_event_store"],
+    )
+    event_store_url: Union[Selector(kind=[STRING_KIND]), str] = Field(
+        default="http://localhost:8001",
+        title="Event Store URL",
+        description="Base URL of the local Event Ingestion Service. Only used when "
+        "`Write to Local Event Store` is enabled.",
+        examples=["http://localhost:8001", "$inputs.event_store_url"],
+        json_schema_extra={
+            "relevant_for": {
+                "write_to_event_store": {
+                    "values": [True],
+                    "required": False,
+                },
+            },
+        },
+    )
 
     @classmethod
     def describe_outputs(cls) -> List[OutputDefinition]:
         return [
             OutputDefinition(name="error_status", kind=[BOOLEAN_KIND]),
+            OutputDefinition(name="event_id", kind=[STRING_KIND]),
             OutputDefinition(name="message", kind=[STRING_KIND]),
         ]
 
@@ -397,6 +441,8 @@ def run(
         custom_metadata: Dict[str, Any],
         fire_and_forget: bool,
         disable_sink: bool,
+        write_to_event_store: bool = False,
+        event_store_url: str = "http://localhost:8001",
         external_id: Optional[str] = None,
         qc_result: Optional[str] = None,
         location: Optional[str] = None,
@@ -409,17 +455,10 @@ def run(
         related_event_id: Optional[str] = None,
         feedback: Optional[str] = None,
     ) -> BlockResult:
-        if self._api_key is None:
-            raise ValueError(
-                "VisionEvents block cannot run without Roboflow API key. "
-                "If you do not know how to get API key - visit "
-                "https://docs.roboflow.com/api-reference/authentication"
-                "#retrieve-an-api-key to learn how to retrieve one."
-            )
-
         if disable_sink:
             return {
                 "error_status": False,
+                "event_id": "",
                 "message": "Sink was disabled by parameter `disable_sink`",
             }
 
@@ -438,34 +477,60 @@ def run(
             feedback=feedback,
         )
 
-        task = partial(
-            _execute_vision_event,
-            api_base_url=API_BASE_URL,
-            api_key=self._api_key,
-            input_image=input_image,
-            output_image=output_image,
-            prediction=predictions,
-            event_type=event_type,
-            solution=solution,
-            event_data=event_data,
-            custom_metadata=custom_metadata,
-        )
+        if write_to_event_store:
+            task = partial(
+                _execute_local_event,
+                event_store_url=event_store_url,
+                input_image=input_image,
+                output_image=output_image,
+                prediction=predictions,
+                event_type=event_type,
+                solution=solution,
+                event_data=event_data,
+                custom_metadata=custom_metadata,
+            )
+        else:
+            if self._api_key is None:
+                raise ValueError(
+                    "VisionEvents block cannot run without Roboflow API key. "
+                    "If you do not know how to get API key - visit "
+                    "https://docs.roboflow.com/api-reference/authentication"
+                    "#retrieve-an-api-key to learn how to retrieve one."
+                )
+            task = partial(
+                _execute_vision_event,
+                api_base_url=API_BASE_URL,
+                api_key=self._api_key,
+                input_image=input_image,
+                output_image=output_image,
+                prediction=predictions,
+                event_type=event_type,
+                solution=solution,
+                event_data=event_data,
+                custom_metadata=custom_metadata,
+            )
 
         if fire_and_forget and self._background_tasks:
             self._background_tasks.add_task(task)
             return {
                 "error_status": False,
+                "event_id": "",
                 "message": "Vision event sent in background task",
             }
         elif fire_and_forget and self._thread_pool_executor:
             self._thread_pool_executor.submit(task)
             return {
                 "error_status": False,
+                "event_id": "",
                 "message": "Vision event sent in background task",
             }
         else:
-            error_status, message = task()
-            return {"error_status": error_status, "message": message}
+            error_status, message, event_id = task()
+            return {
+                "error_status": error_status,
+                "event_id": event_id,
+                "message": message,
+            }
 
 
 def _build_event_data(
@@ -512,6 +577,44 @@ def _build_event_data(
     return {k: v for k, v in data.items() if v is not None}
 
 
+def _convert_predictions_to_annotations(
+    prediction: Optional[Union[sv.Detections, dict]],
+) -> Dict[str, List[dict]]:
+    """Convert predictions into vision events annotation lists.
+
+    Returns a dict keyed by annotation type (objectDetections, classifications,
+    instanceSegmentations, keypoints), containing only the non-empty annotation
+    lists. Shared by the cloud and local event store code paths.
+    """
+    object_detections: List[dict] = []
+    classifications: List[dict] = []
+    instance_segmentations: List[dict] = []
+    keypoints_detections: List[dict] = []
+
+    if prediction is not None:
+        if isinstance(prediction, sv.Detections) and len(prediction) > 0:
+            (
+                object_detections,
+                instance_segmentations,
+                keypoints_detections,
+            ) = _convert_sv_detections_to_vision_events_format(prediction)
+        elif isinstance(prediction, dict):
+            classifications = _convert_classification_to_vision_events_format(
+                prediction
+            )
+
+    annotations: Dict[str, List[dict]] = {}
+    if object_detections:
+        annotations["objectDetections"] = object_detections
+    if classifications:
+        annotations["classifications"] = classifications
+    if instance_segmentations:
+        annotations["instanceSegmentations"] = instance_segmentations
+    if keypoints_detections:
+        annotations["keypoints"] = keypoints_detections
+    return annotations
+
+
 def _execute_vision_event(
     api_base_url: str,
     api_key: str,
@@ -522,25 +625,10 @@ def _execute_vision_event(
     solution: str,
     event_data: Dict[str, Any],
     custom_metadata: Dict[str, Any],
-) -> Tuple[bool, str]:
+) -> Tuple[bool, str, str]:
     try:
-        # Step 1: Convert predictions to vision events format
-        object_detections: List[dict] = []
-        classifications: List[dict] = []
-        instance_segmentations: List[dict] = []
-        keypoints_detections: List[dict] = []
-
-        if prediction is not None:
-            if isinstance(prediction, sv.Detections) and len(prediction) > 0:
-                (
-                    object_detections,
-                    instance_segmentations,
-                    keypoints_detections,
-                ) = _convert_sv_detections_to_vision_events_format(prediction)
-            elif isinstance(prediction, dict):
-                classifications = _convert_classification_to_vision_events_format(
-                    prediction
-                )
+        # Step 1: Convert predictions to vision events annotation format
+        annotations = _convert_predictions_to_annotations(prediction)
 
         # Step 2: Upload images and build a single image entry
         # sourceId = output/display image, inputSourceId = original input image
@@ -558,14 +646,7 @@ def _execute_vision_event(
 
         if image_entry:
             image_entry["label"] = "workflow"
-            if object_detections:
-                image_entry["objectDetections"] = object_detections
-            if classifications:
-                image_entry["classifications"] = classifications
-            if instance_segmentations:
-                image_entry["instanceSegmentations"] = instance_segmentations
-            if keypoints_detections:
-                image_entry["keypoints"] = keypoints_detections
+            image_entry.update(annotations)
 
         images_payload: List[dict] = [image_entry] if image_entry else []
 
@@ -578,10 +659,138 @@ def _execute_vision_event(
             custom_metadata=custom_metadata,
         )
 
-        return _send_event(api_base_url, api_key, payload)
+        error_status, message = _send_event(api_base_url, api_key, payload)
+        # The eventId is generated client-side and sent in the payload, so it is the
+        # canonical id of the created event; surface it on success.
+        event_id = "" if error_status else payload.get("eventId", "")
+        return error_status, message, event_id
     except Exception as error:
         logger.warning("Failed to create vision event: %s", error)
-        return True, f"Error creating vision event: {type(error).__name__}: {error}"
+        return (
+            True,
+            f"Error creating vision event: {type(error).__name__}: {error}",
+            "",
+        )
+
+
+def _execute_local_event(
+    event_store_url: str,
+    input_image: Optional[WorkflowImageData],
+    output_image: Optional[WorkflowImageData],
+    prediction: Optional[Union[sv.Detections, dict]],
+    event_type: str,
+    solution: str,
+    event_data: Dict[str, Any],
+    custom_metadata: Dict[str, Any],
+) -> Tuple[bool, str, str]:
+    """Send an event to a local Event Ingestion Service (v2 API).
+
+    Unlike the cloud path, images are embedded directly in the request as base64
+    rather than uploaded first. The use case (`solution`) is forwarded so events are
+    namespaced consistently with the cloud path; the service requires it when cloud
+    upload is enabled.
+    """
+    try:
+        # Convert predictions to vision events annotation format
+        annotations = _convert_predictions_to_annotations(prediction)
+
+        # Build a single image entry with base64-embedded images
+        # base64Image = output/display image, inputBase64Image = original input image
+        image_entry: Dict[str, Any] = {}
+        if output_image is not None:
+            image_entry["base64Image"] = output_image.base64_image
+        if input_image is not None:
+            image_entry["inputBase64Image"] = input_image.base64_image
+        if image_entry:
+            image_entry["label"] = "workflow"
+            image_entry.update(annotations)
+
+        images_payload: List[dict] = [image_entry] if image_entry else []
+
+        payload: Dict[str, Any] = {
+            "inference_timestamp": datetime.now(timezone.utc).isoformat(),
+            "solution": solution,
+            "event_schema": event_type,
+            "event_data": event_data,
+            "images": images_payload,
+        }
+        if custom_metadata:
+            payload["custom_metadata"] = custom_metadata
+        if images_payload:
+            payload["displayImagePosition"] = 0
+
+        url = f"{event_store_url.rstrip('/')}/v2/events"
+        return _send_local_event(url, payload)
+    except Exception as error:
+        logger.warning("Failed to write event to local event store: %s", error)
+        return (
+            True,
+            f"Error writing event to event store: {type(error).__name__}: {error}",
+            "",
+        )
+
+
+def _send_local_event(
+    url: str,
+    payload: dict,
+) -> Tuple[bool, str, str]:
+    """Send an event to the local Event Ingestion Service.
+
+    Authenticates with the ``EVENT_INGESTION_API_KEY`` environment variable when set.
+    Mirrors the Event Writer sink's response handling for the shared ``/v2/events``
+    endpoint: the service returns 201 with the server-assigned event ``id`` on success,
+    and 529 when the device is at capacity and applying backpressure while it waits for
+    cloud uploads to drain.
+
+    Returns:
+        Tuple of (error_status, message, event_id)
+    """
+    try:
+        headers = {"Content-Type": "application/json"}
+        api_key = os.environ.get("EVENT_INGESTION_API_KEY")
+        if api_key:
+            headers["X-API-Key"] = api_key
+        response = requests.post(url, headers=headers, json=payload, timeout=30)
+
+        if response.status_code == 201:
+            event_id = str(response.json().get("id", ""))
+            return False, "Event written to local event store successfully", event_id
+
+        if response.status_code == 529:
+            detail = _extract_detail(response)
+            return (
+                True,
+                "Event Ingestion Service at capacity (529). The device is "
+                f"experiencing backpressure. Detail: {detail}",
+                "",
+            )
+
+        detail = _extract_detail(response)
+        logger.warning(
+            "Event Ingestion Service error (%s): %s", response.status_code, detail
+        )
+        return (
+            True,
+            f"Failed to write event to event store. HTTP {response.status_code}: {detail}",
+            "",
+        )
+    except requests.exceptions.Timeout:
+        return True, "Request to event store timed out after 30s", ""
+    except Exception as e:
+        logger.warning("Failed to write event to local event store: %s", e)
+        return (
+            True,
+            f"Failed to write event to event store. Error: {type(e).__name__}: {e}",
+            "",
+        )
+
+
+def _extract_detail(response: requests.Response) -> str:
+    """Extract the ``detail`` field from a JSON error response, falling back to text."""
+    try:
+        return str(response.json().get("detail", response.text))
+    except Exception:
+        return response.text
 
 
 def _detect_prediction_type(detections: sv.Detections) -> str:
diff --git a/tests/workflows/unit_tests/core_steps/sinks/roboflow/vision_events/test_v1.py b/tests/workflows/unit_tests/core_steps/sinks/roboflow/vision_events/test_v1.py
index 2902e3119d..4058e700f4 100644
--- a/tests/workflows/unit_tests/core_steps/sinks/roboflow/vision_events/test_v1.py
+++ b/tests/workflows/unit_tests/core_steps/sinks/roboflow/vision_events/test_v1.py
@@ -1,3 +1,4 @@
+import os
 from unittest.mock import MagicMock, patch
 
 import numpy as np
@@ -10,8 +11,12 @@
     _build_event_data,
     _build_event_payload,
     _convert_classification_to_vision_events_format,
+    _convert_predictions_to_annotations,
     _convert_sv_detections_to_vision_events_format,
     _detect_prediction_type,
+    _execute_local_event,
+    _execute_vision_event,
+    _send_local_event,
     _upload_image,
 )
 from inference.core.workflows.execution_engine.constants import (
@@ -455,6 +460,7 @@ def test_run_disabled() -> None:
     )
     assert isinstance(result, dict)
     assert result["error_status"] is False
+    assert result["event_id"] == ""
     assert "disabled" in result["message"].lower()
 
 
@@ -481,6 +487,7 @@ def test_run_fire_and_forget_background_tasks(mock_execute: MagicMock) -> None:
 
     background_tasks.add_task.assert_called_once()
     assert result["error_status"] is False
+    assert result["event_id"] == ""
     assert "background" in result["message"].lower()
 
 
@@ -507,6 +514,7 @@ def test_run_fire_and_forget_thread_pool(mock_execute: MagicMock) -> None:
 
     thread_pool.submit.assert_called_once()
     assert result["error_status"] is False
+    assert result["event_id"] == ""
     assert "background" in result["message"].lower()
 
 
@@ -514,7 +522,7 @@ def test_run_fire_and_forget_thread_pool(mock_execute: MagicMock) -> None:
     "inference.core.workflows.core_steps.sinks.roboflow.vision_events.v1._execute_vision_event"
 )
 def test_run_synchronous(mock_execute: MagicMock) -> None:
-    mock_execute.return_value = (False, "Vision event sent successfully")
+    mock_execute.return_value = (False, "Vision event sent successfully", "evt-123")
     block = RoboflowVisionEventsBlockV1(
         api_key="test-key",
         background_tasks=None,
@@ -533,9 +541,359 @@ def test_run_synchronous(mock_execute: MagicMock) -> None:
 
     mock_execute.assert_called_once()
     assert result["error_status"] is False
+    assert result["event_id"] == "evt-123"
     assert result["message"] == "Vision event sent successfully"
 
 
+# === Local Event Store Mode (ENT-1192) ===
+
+
+def test_manifest_write_to_event_store_defaults() -> None:
+    manifest = BlockManifest.model_validate(
+        {
+            "type": "roboflow_core/roboflow_vision_events@v1",
+            "name": "test_step",
+            "event_type": "quality_check",
+            "solution": "my-solution",
+        }
+    )
+    assert manifest.write_to_event_store is False
+    assert manifest.event_store_url == "http://localhost:8001"
+
+
+def test_manifest_write_to_event_store_enabled() -> None:
+    manifest = BlockManifest.model_validate(
+        {
+            "type": "roboflow_core/roboflow_vision_events@v1",
+            "name": "test_step",
+            "event_type": "quality_check",
+            "solution": "my-solution",
+            "write_to_event_store": True,
+            "event_store_url": "http://edge.local:8001",
+        }
+    )
+    assert manifest.write_to_event_store is True
+    assert manifest.event_store_url == "http://edge.local:8001"
+
+
+def test_convert_predictions_to_annotations_object_detection() -> None:
+    detections = _make_detections(n=2)
+    annotations = _convert_predictions_to_annotations(detections)
+    assert "objectDetections" in annotations
+    assert len(annotations["objectDetections"]) == 2
+    assert "classifications" not in annotations
+
+
+def test_convert_predictions_to_annotations_classification() -> None:
+    prediction = {"predictions": [{"class_name": "cat", "confidence": 0.9}]}
+    annotations = _convert_predictions_to_annotations(prediction)
+    assert "classifications" in annotations
+    assert "objectDetections" not in annotations
+
+
+def test_convert_predictions_to_annotations_none() -> None:
+    assert _convert_predictions_to_annotations(None) == {}
+
+
+@patch(
+    "inference.core.workflows.core_steps.sinks.roboflow.vision_events.v1._execute_local_event"
+)
+def test_run_write_to_event_store_does_not_require_api_key(
+    mock_execute: MagicMock,
+) -> None:
+    """In local event store mode, no Roboflow API key is required."""
+    mock_execute.return_value = (
+        False,
+        "Event written to local event store successfully",
+        "evt-local-1",
+    )
+    block = RoboflowVisionEventsBlockV1(
+        api_key=None,
+        background_tasks=None,
+        thread_pool_executor=None,
+    )
+    result = block.run(
+        input_image=None,
+        output_image=None,
+        predictions=None,
+        event_type="custom",
+        solution="test",
+        custom_metadata={},
+        fire_and_forget=False,
+        disable_sink=False,
+        write_to_event_store=True,
+    )
+
+    mock_execute.assert_called_once()
+    assert result["error_status"] is False
+    assert result["event_id"] == "evt-local-1"
+    assert "local event store" in result["message"].lower()
+
+
+@patch(
+    "inference.core.workflows.core_steps.sinks.roboflow.vision_events.v1._execute_local_event"
+)
+def test_run_write_to_event_store_passes_url(mock_execute: MagicMock) -> None:
+    mock_execute.return_value = (False, "ok", "")
+    block = RoboflowVisionEventsBlockV1(
+        api_key="test-key",
+        background_tasks=None,
+        thread_pool_executor=None,
+    )
+    block.run(
+        input_image=None,
+        output_image=None,
+        predictions=None,
+        event_type="custom",
+        solution="test",
+        custom_metadata={},
+        fire_and_forget=False,
+        disable_sink=False,
+        write_to_event_store=True,
+        event_store_url="http://edge:9000",
+    )
+
+    assert mock_execute.call_args.kwargs["event_store_url"] == "http://edge:9000"
+
+
+@patch(
+    "inference.core.workflows.core_steps.sinks.roboflow.vision_events.v1._send_local_event"
+)
+def test_execute_local_event_builds_v2_payload(mock_send: MagicMock) -> None:
+    mock_send.return_value = (False, "ok", "evt-local-42")
+    detections = _make_detections(n=1)
+
+    error_status, _, event_id = _execute_local_event(
+        event_store_url="http://localhost:8001/",
+        input_image=_make_workflow_image(),
+        output_image=_make_workflow_image(),
+        prediction=detections,
+        event_type="quality_check",
+        solution="my-solution",
+        event_data={"result": "pass"},
+        custom_metadata={"camera_id": "cam-01"},
+    )
+
+    assert error_status is False
+    # the service-assigned id from _send_local_event is propagated back
+    assert event_id == "evt-local-42"
+    mock_send.assert_called_once()
+    url, payload = mock_send.call_args.args
+    # trailing slash on the base URL is stripped before appending /v2/events
+    assert url == "http://localhost:8001/v2/events"
+    assert payload["event_schema"] == "quality_check"
+    # use case is forwarded so events are namespaced like the cloud path
+    assert payload["solution"] == "my-solution"
+    assert payload["event_data"] == {"result": "pass"}
+    assert payload["custom_metadata"] == {"camera_id": "cam-01"}
+    assert payload["displayImagePosition"] == 0
+    assert "inference_timestamp" in payload
+    assert len(payload["images"]) == 1
+
+    image = payload["images"][0]
+    assert "base64Image" in image  # output image
+    assert "inputBase64Image" in image  # input image
+    assert image["label"] == "workflow"
+    assert "objectDetections" in image
+
+
+@patch(
+    "inference.core.workflows.core_steps.sinks.roboflow.vision_events.v1._send_local_event"
+)
+def test_execute_local_event_no_images(mock_send: MagicMock) -> None:
+    mock_send.return_value = (False, "ok", "")
+
+    _execute_local_event(
+        event_store_url="http://localhost:8001",
+        input_image=None,
+        output_image=None,
+        prediction=None,
+        event_type="custom",
+        solution="my-solution",
+        event_data={"value": "x"},
+        custom_metadata={},
+    )
+
+    _, payload = mock_send.call_args.args
+    assert payload["images"] == []
+    assert "displayImagePosition" not in payload
+    assert "custom_metadata" not in payload
+
+
+@patch(
+    "inference.core.workflows.core_steps.sinks.roboflow.vision_events.v1._send_local_event"
+)
+def test_execute_local_event_operator_feedback(mock_send: MagicMock) -> None:
+    """operator_feedback is a valid schema in the local event store (v2 API)."""
+    mock_send.return_value = (False, "ok", "")
+
+    _execute_local_event(
+        event_store_url="http://localhost:8001",
+        input_image=None,
+        output_image=None,
+        prediction=None,
+        event_type="operator_feedback",
+        solution="my-solution",
+        event_data={"relatedEventId": "evt_abc123", "feedback": "correct"},
+        custom_metadata={},
+    )
+
+    _, payload = mock_send.call_args.args
+    assert payload["event_schema"] == "operator_feedback"
+    assert payload["event_data"] == {
+        "relatedEventId": "evt_abc123",
+        "feedback": "correct",
+    }
+
+
+@patch(
+    "inference.core.workflows.core_steps.sinks.roboflow.vision_events.v1._send_event"
+)
+def test_execute_vision_event_returns_generated_event_id(
+    mock_send: MagicMock,
+) -> None:
+    """The cloud path generates the eventId client-side and returns it on success."""
+    mock_send.return_value = (False, "Vision event sent successfully")
+
+    error_status, _, event_id = _execute_vision_event(
+        api_base_url="https://api.roboflow.com",
+        api_key="test-key",
+        input_image=None,
+        output_image=None,
+        prediction=None,
+        event_type="custom",
+        solution="my-solution",
+        event_data={"value": "x"},
+        custom_metadata={},
+    )
+
+    assert error_status is False
+    # the returned id is the client-generated UUID that was sent in the payload
+    assert event_id
+    sent_payload = mock_send.call_args.args[2]
+    assert event_id == sent_payload["eventId"]
+
+
+@patch(
+    "inference.core.workflows.core_steps.sinks.roboflow.vision_events.v1._send_event"
+)
+def test_execute_vision_event_no_event_id_on_error(mock_send: MagicMock) -> None:
+    mock_send.return_value = (True, "boom")
+
+    error_status, _, event_id = _execute_vision_event(
+        api_base_url="https://api.roboflow.com",
+        api_key="test-key",
+        input_image=None,
+        output_image=None,
+        prediction=None,
+        event_type="custom",
+        solution="my-solution",
+        event_data={},
+        custom_metadata={},
+    )
+
+    assert error_status is True
+    assert event_id == ""
+
+
+@patch(
+    "inference.core.workflows.core_steps.sinks.roboflow.vision_events.v1.requests.post"
+)
+def test_send_local_event_success_no_api_key(mock_post: MagicMock) -> None:
+    mock_response = MagicMock()
+    mock_response.status_code = 201
+    mock_response.json.return_value = {"id": "evt-123"}
+    mock_post.return_value = mock_response
+
+    env = {k: v for k, v in os.environ.items() if k != "EVENT_INGESTION_API_KEY"}
+    with patch.dict(os.environ, env, clear=True):
+        error_status, message, event_id = _send_local_event(
+            "http://localhost:8001/v2/events", {"a": 1}
+        )
+
+    assert error_status is False
+    # the server-assigned id is parsed from the 201 response body
+    assert event_id == "evt-123"
+    mock_post.assert_called_once()
+    assert mock_post.call_args.args[0] == "http://localhost:8001/v2/events"
+    headers = mock_post.call_args.kwargs["headers"]
+    assert "X-API-Key" not in headers
+    assert mock_post.call_args.kwargs["json"] == {"a": 1}
+
+
+@patch(
+    "inference.core.workflows.core_steps.sinks.roboflow.vision_events.v1.requests.post"
+)
+def test_send_local_event_sets_api_key_header(mock_post: MagicMock) -> None:
+    mock_response = MagicMock()
+    mock_response.status_code = 201
+    mock_response.json.return_value = {"id": "evt-123"}
+    mock_post.return_value = mock_response
+
+    with patch.dict(os.environ, {"EVENT_INGESTION_API_KEY": "secret-key"}):
+        _send_local_event("http://localhost:8001/v2/events", {})
+
+    headers = mock_post.call_args.kwargs["headers"]
+    assert headers["X-API-Key"] == "secret-key"
+
+
+@patch(
+    "inference.core.workflows.core_steps.sinks.roboflow.vision_events.v1.requests.post"
+)
+def test_send_local_event_backpressure_529(mock_post: MagicMock) -> None:
+    """529 from the Event Ingestion Service is surfaced as a clear backpressure message."""
+    mock_response = MagicMock()
+    mock_response.status_code = 529
+    mock_response.json.return_value = {
+        "detail": "Device storage full. Waiting for cloud uploads to complete.",
+        "error": "capacity_blocked",
+    }
+    mock_post.return_value = mock_response
+
+    error_status, message, event_id = _send_local_event(
+        "http://localhost:8001/v2/events", {}
+    )
+    assert error_status is True
+    assert event_id == ""
+    assert "529" in message
+    assert "backpressure" in message.lower()
+    assert "Device storage full" in message
+
+
+@patch(
+    "inference.core.workflows.core_steps.sinks.roboflow.vision_events.v1.requests.post"
+)
+def test_send_local_event_http_error(mock_post: MagicMock) -> None:
+    mock_response = MagicMock()
+    mock_response.status_code = 400
+    mock_response.json.return_value = {"detail": "bad request"}
+    mock_post.return_value = mock_response
+
+    error_status, message, event_id = _send_local_event(
+        "http://localhost:8001/v2/events", {}
+    )
+    assert error_status is True
+    assert event_id == ""
+    assert "400" in message
+    assert "bad request" in message
+
+
+@patch(
+    "inference.core.workflows.core_steps.sinks.roboflow.vision_events.v1.requests.post"
+)
+def test_send_local_event_timeout(mock_post: MagicMock) -> None:
+    import requests
+
+    mock_post.side_effect = requests.exceptions.Timeout()
+
+    error_status, message, event_id = _send_local_event(
+        "http://localhost:8001/v2/events", {}
+    )
+    assert error_status is True
+    assert event_id == ""
+    assert "timed out" in message.lower()
+
+
 # === Non-SIMD / Compilation Regression Tests (ENT-1126) ===
 
 

From 5448dbda060ad36501cc4884545f32680922b254 Mon Sep 17 00:00:00 2001
From: Damian Kosowski <kosowski.d@gmail.com>
Date: Fri, 5 Jun 2026 18:46:17 +0200
Subject: [PATCH 66/76] Parse Gemini 2.5 native object-detection format in
 vlm_as_detector (#2400)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* feat(gemini): implement new detection parsing functions and integrate with VLMAsDetectorBlock

This commit introduces a new module for parsing Gemini object detection responses, including functions for extracting detection entries, parsing coordinates, and scaling confidence values. The `parse_gemini_object_detection_response` function is integrated into both VLMAsDetectorBlockV1 and VLMAsDetectorBlockV2, replacing the previous inline implementation. Additionally, unit tests are added to validate the new functionality, ensuring correct handling of Gemini's native box format in detection outputs.

* refactor(vlm_as_detector): restore import of parse_gemini_object_detection_response in v1 and v2 files

This commit reinstates the import of the `parse_gemini_object_detection_response` function in both v1 and v2 formatter files for VLMAsDetectorBlock. The function was previously removed but is now necessary for proper functionality in the detection parsing workflow.

---------

Co-authored-by: Paweł Pęczek <146137186+PawelPeczek-Roboflow@users.noreply.github.com>
---
 .../gemini_detection_parsing.py               | 122 ++++++++++++++++++
 .../formatters/vlm_as_detector/v1.py          |  76 ++---------
 .../formatters/vlm_as_detector/v2.py          |  18 ++-
 .../formatters/vlm_as_detector/test_v1.py     |  30 +++++
 .../formatters/vlm_as_detector/test_v2.py     |  40 ++++++
 5 files changed, 219 insertions(+), 67 deletions(-)
 create mode 100644 inference/core/workflows/core_steps/formatters/vlm_as_detector/gemini_detection_parsing.py

diff --git a/inference/core/workflows/core_steps/formatters/vlm_as_detector/gemini_detection_parsing.py b/inference/core/workflows/core_steps/formatters/vlm_as_detector/gemini_detection_parsing.py
new file mode 100644
index 0000000000..6bce624231
--- /dev/null
+++ b/inference/core/workflows/core_steps/formatters/vlm_as_detector/gemini_detection_parsing.py
@@ -0,0 +1,122 @@
+from typing import List, Union
+from uuid import uuid4
+
+import numpy as np
+import supervision as sv
+from supervision.config import CLASS_NAME_DATA_FIELD
+
+from inference.core.workflows.core_steps.common.utils import (
+    attach_parents_coordinates_to_sv_detections,
+)
+from inference.core.workflows.execution_engine.constants import (
+    DETECTION_ID_KEY,
+    IMAGE_DIMENSIONS_KEY,
+    INFERENCE_ID_KEY,
+    PREDICTION_TYPE_KEY,
+)
+from inference.core.workflows.execution_engine.entities.base import WorkflowImageData
+
+GEMINI_NATIVE_BOX_COORDINATE_SCALE = 1000.0
+
+
+def extract_gemini_detection_entries(
+    parsed_data: Union[dict, list],
+) -> List[dict]:
+    if isinstance(parsed_data, list):
+        return parsed_data
+    if isinstance(parsed_data, dict) and "detections" in parsed_data:
+        return parsed_data["detections"]
+    raise ValueError("Unexpected Gemini object detection response format")
+
+
+def get_gemini_detection_class_name(detection: dict) -> str:
+    for key in ("class_name", "label", "class"):
+        value = detection.get(key)
+        if value is not None:
+            return str(value)
+    return "unknown"
+
+
+def parse_gemini_detection_xyxy(
+    detection: dict,
+    image_height: int,
+    image_width: int,
+) -> List[float]:
+    if "box_2d" in detection:
+        y_min, x_min, y_max, x_max = detection["box_2d"]
+        scale = GEMINI_NATIVE_BOX_COORDINATE_SCALE
+        return [
+            x_min / scale * image_width,
+            y_min / scale * image_height,
+            x_max / scale * image_width,
+            y_max / scale * image_height,
+        ]
+    return [
+        detection["x_min"] * image_width,
+        detection["y_min"] * image_height,
+        detection["x_max"] * image_width,
+        detection["y_max"] * image_height,
+    ]
+
+
+def scale_confidence(value: float) -> float:
+    return min(max(float(value), 0.0), 1.0)
+
+
+def create_classes_index(classes: List[str]) -> dict:
+    return {class_name: idx for idx, class_name in enumerate(classes)}
+
+
+def parse_gemini_object_detection_response(
+    image: WorkflowImageData,
+    parsed_data: Union[dict, list],
+    classes: List[str],
+    inference_id: str,
+) -> sv.Detections:
+    class_name2id = create_classes_index(classes=classes)
+    image_height, image_width = image.numpy_image.shape[:2]
+    detections = extract_gemini_detection_entries(parsed_data=parsed_data)
+    if len(detections) == 0:
+        return sv.Detections.empty()
+
+    xyxy, class_id, class_name, confidence = [], [], [], []
+    for detection in detections:
+        xyxy.append(
+            parse_gemini_detection_xyxy(
+                detection=detection,
+                image_height=image_height,
+                image_width=image_width,
+            )
+        )
+        label = get_gemini_detection_class_name(detection=detection)
+        class_id.append(class_name2id.get(label, -1))
+        class_name.append(label)
+        confidence.append(scale_confidence(detection.get("confidence", 1.0)))
+
+    xyxy = np.array(xyxy).round(0) if len(xyxy) > 0 else np.empty((0, 4))
+    confidence = np.array(confidence) if len(confidence) > 0 else np.empty(0)
+    class_id = np.array(class_id).astype(int) if len(class_id) > 0 else np.empty(0)
+    class_name = np.array(class_name) if len(class_name) > 0 else np.empty(0)
+    detection_ids = np.array([str(uuid4()) for _ in range(len(xyxy))])
+    dimensions = np.array([[image_height, image_width]] * len(xyxy))
+    inference_ids = np.array([inference_id] * len(xyxy))
+    prediction_type = np.array(["object-detection"] * len(xyxy))
+    data = {
+        CLASS_NAME_DATA_FIELD: class_name,
+        IMAGE_DIMENSIONS_KEY: dimensions,
+        INFERENCE_ID_KEY: inference_ids,
+        DETECTION_ID_KEY: detection_ids,
+        PREDICTION_TYPE_KEY: prediction_type,
+    }
+    detections_result = sv.Detections(
+        xyxy=xyxy,
+        confidence=confidence,
+        class_id=class_id,
+        mask=None,
+        tracker_id=None,
+        data=data,
+    )
+    return attach_parents_coordinates_to_sv_detections(
+        detections=detections_result,
+        image=image,
+    )
diff --git a/inference/core/workflows/core_steps/formatters/vlm_as_detector/v1.py b/inference/core/workflows/core_steps/formatters/vlm_as_detector/v1.py
index 06c3184c1b..e47409e7c9 100644
--- a/inference/core/workflows/core_steps/formatters/vlm_as_detector/v1.py
+++ b/inference/core/workflows/core_steps/formatters/vlm_as_detector/v1.py
@@ -15,6 +15,9 @@
     attach_parents_coordinates_to_sv_detections,
 )
 from inference.core.workflows.core_steps.common.vlms import VLM_TASKS_METADATA
+from inference.core.workflows.core_steps.formatters.vlm_as_detector.gemini_detection_parsing import (
+    parse_gemini_object_detection_response,
+)
 from inference.core.workflows.execution_engine.constants import (
     DETECTION_ID_KEY,
     IMAGE_DIMENSIONS_KEY,
@@ -290,7 +293,7 @@ def run(
 
 def string2json(
     raw_json: str,
-) -> Tuple[bool, dict]:
+) -> Tuple[bool, Union[dict, list]]:
     json_blocks_found = JSON_MARKDOWN_BLOCK_PATTERN.findall(raw_json)
     if len(json_blocks_found) == 0:
         return try_parse_json(raw_json)
@@ -298,9 +301,16 @@ def string2json(
     return try_parse_json(first_block)
 
 
-def try_parse_json(content: str) -> Tuple[bool, dict]:
+def try_parse_json(content: str) -> Tuple[bool, Union[dict, list]]:
     try:
-        return False, json.loads(content)
+        parsed = json.loads(content)
+        if isinstance(parsed, (dict, list)):
+            return False, parsed
+        logging.warning(
+            "Could not parse JSON to dict in `roboflow_core/vlm_as_detector@v1` block. "
+            f"Unexpected JSON root type: {type(parsed).__name__}."
+        )
+        return True, {}
     except Exception as error:
         logging.warning(
             f"Could not parse JSON to dict in `roboflow_core/vlm_as_detector@v1` block. "
@@ -309,66 +319,6 @@ def try_parse_json(content: str) -> Tuple[bool, dict]:
         return True, {}
 
 
-def parse_gemini_object_detection_response(
-    image: WorkflowImageData,
-    parsed_data: dict,
-    classes: List[str],
-    inference_id: str,
-) -> sv.Detections:
-    class_name2id = create_classes_index(classes=classes)
-    image_height, image_width = image.numpy_image.shape[:2]
-    if len(parsed_data["detections"]) == 0:
-        return sv.Detections.empty()
-    xyxy, class_id, class_name, confidence = [], [], [], []
-    for detection in parsed_data["detections"]:
-        xyxy.append(
-            [
-                detection["x_min"] * image_width,
-                detection["y_min"] * image_height,
-                detection["x_max"] * image_width,
-                detection["y_max"] * image_height,
-            ]
-        )
-        class_id.append(class_name2id.get(detection["class_name"], -1))
-        class_name.append(detection["class_name"])
-        confidence.append(scale_confidence(detection.get("confidence", 1.0)))
-    xyxy = np.array(xyxy).round(0) if len(xyxy) > 0 else np.empty((0, 4))
-    confidence = np.array(confidence) if len(confidence) > 0 else np.empty(0)
-    class_id = np.array(class_id).astype(int) if len(class_id) > 0 else np.empty(0)
-    class_name = np.array(class_name) if len(class_name) > 0 else np.empty(0)
-    detection_ids = np.array([str(uuid4()) for _ in range(len(xyxy))])
-    dimensions = np.array([[image_height, image_width]] * len(xyxy))
-    inference_ids = np.array([inference_id] * len(xyxy))
-    prediction_type = np.array(["object-detection"] * len(xyxy))
-    data = {
-        CLASS_NAME_DATA_FIELD: class_name,
-        IMAGE_DIMENSIONS_KEY: dimensions,
-        INFERENCE_ID_KEY: inference_ids,
-        DETECTION_ID_KEY: detection_ids,
-        PREDICTION_TYPE_KEY: prediction_type,
-    }
-    detections = sv.Detections(
-        xyxy=xyxy,
-        confidence=confidence,
-        class_id=class_id,
-        mask=None,
-        tracker_id=None,
-        data=data,
-    )
-    return attach_parents_coordinates_to_sv_detections(
-        detections=detections,
-        image=image,
-    )
-
-
-def create_classes_index(classes: List[str]) -> Dict[str, int]:
-    return {class_name: idx for idx, class_name in enumerate(classes)}
-
-
-def scale_confidence(value: float) -> float:
-    return min(max(float(value), 0.0), 1.0)
-
-
 def parse_florence2_object_detection_response(
     image: WorkflowImageData,
     parsed_data: dict,
diff --git a/inference/core/workflows/core_steps/formatters/vlm_as_detector/v2.py b/inference/core/workflows/core_steps/formatters/vlm_as_detector/v2.py
index da4f23db39..73f9bde112 100644
--- a/inference/core/workflows/core_steps/formatters/vlm_as_detector/v2.py
+++ b/inference/core/workflows/core_steps/formatters/vlm_as_detector/v2.py
@@ -15,6 +15,9 @@
     attach_parents_coordinates_to_sv_detections,
 )
 from inference.core.workflows.core_steps.common.vlms import VLM_TASKS_METADATA
+from inference.core.workflows.core_steps.formatters.vlm_as_detector.gemini_detection_parsing import (
+    parse_gemini_object_detection_response,
+)
 from inference.core.workflows.execution_engine.constants import (
     DETECTION_ID_KEY,
     IMAGE_DIMENSIONS_KEY,
@@ -310,7 +313,7 @@ def run(
 
 def string2json(
     raw_json: str,
-) -> Tuple[bool, dict]:
+) -> Tuple[bool, Union[dict, list]]:
     json_blocks_found = JSON_MARKDOWN_BLOCK_PATTERN.findall(raw_json)
     if len(json_blocks_found) == 0:
         return try_parse_json(raw_json)
@@ -318,9 +321,16 @@ def string2json(
     return try_parse_json(first_block)
 
 
-def try_parse_json(content: str) -> Tuple[bool, dict]:
+def try_parse_json(content: str) -> Tuple[bool, Union[dict, list]]:
     try:
-        return False, json.loads(content)
+        parsed = json.loads(content)
+        if isinstance(parsed, (dict, list)):
+            return False, parsed
+        logging.warning(
+            "Could not parse JSON to dict in `roboflow_core/vlm_as_detector@v2` block. "
+            f"Unexpected JSON root type: {type(parsed).__name__}."
+        )
+        return True, {}
     except Exception as error:
         logging.warning(
             f"Could not parse JSON to dict in `roboflow_core/vlm_as_detector@v1` block. "
@@ -450,7 +460,7 @@ def get_4digit_from_md5(input_string):
 REGISTERED_PARSERS = {
     # LLMs
     ("openai", "object-detection"): parse_llm_object_detection_response,
-    ("google-gemini", "object-detection"): parse_llm_object_detection_response,
+    ("google-gemini", "object-detection"): parse_gemini_object_detection_response,
     ("anthropic-claude", "object-detection"): parse_llm_object_detection_response,
     # Florence 2
     ("florence-2", "object-detection"): partial(
diff --git a/tests/workflows/unit_tests/core_steps/formatters/vlm_as_detector/test_v1.py b/tests/workflows/unit_tests/core_steps/formatters/vlm_as_detector/test_v1.py
index d5eada03d5..ed4d463d9b 100644
--- a/tests/workflows/unit_tests/core_steps/formatters/vlm_as_detector/test_v1.py
+++ b/tests/workflows/unit_tests/core_steps/formatters/vlm_as_detector/test_v1.py
@@ -107,6 +107,36 @@ def test_run_method_for_claude_and_gemini_output() -> None:
     assert "root_parent_id" in result["predictions"].data
 
 
+def test_run_method_for_gemini_native_box_2d_output() -> None:
+    # given
+    block = VLMAsDetectorBlockV1()
+    image = WorkflowImageData(
+        numpy_image=np.zeros((192, 168, 3), dtype=np.uint8),
+        parent_metadata=ImageParentMetadata(parent_id="parent"),
+    )
+    vlm_output = """
+[
+  {"box_2d": [29, 17, 163, 54], "label": "dog"},
+  {"box_2d": [58, 82, 163, 109], "label": "dog"}
+]
+    """
+
+    # when
+    result = block.run(
+        image=image,
+        vlm_output=vlm_output,
+        classes=["cat", "dog"],
+        model_type="google-gemini",
+        task_type="object-detection",
+    )
+
+    # then
+    assert result["error_status"] is False
+    assert isinstance(result["predictions"], sv.Detections)
+    assert result["predictions"].data["class_name"].tolist() == ["dog", "dog"]
+    assert np.allclose(result["predictions"].class_id, np.array([1, 1]))
+
+
 def test_run_method_for_invalid_claude_and_gemini_output() -> None:
     # given
     block = VLMAsDetectorBlockV1()
diff --git a/tests/workflows/unit_tests/core_steps/formatters/vlm_as_detector/test_v2.py b/tests/workflows/unit_tests/core_steps/formatters/vlm_as_detector/test_v2.py
index 3af0c5c10a..dca25a6907 100644
--- a/tests/workflows/unit_tests/core_steps/formatters/vlm_as_detector/test_v2.py
+++ b/tests/workflows/unit_tests/core_steps/formatters/vlm_as_detector/test_v2.py
@@ -107,6 +107,46 @@ def test_run_method_for_claude_and_gemini_output() -> None:
     assert "root_parent_id" in result["predictions"].data
 
 
+def test_run_method_for_gemini_native_box_2d_output() -> None:
+    # given
+    block = VLMAsDetectorBlockV2()
+    image = WorkflowImageData(
+        numpy_image=np.zeros((192, 168, 3), dtype=np.uint8),
+        parent_metadata=ImageParentMetadata(parent_id="parent"),
+    )
+    vlm_output = """
+[
+  {"box_2d": [29, 17, 163, 54], "label": "dog"},
+  {"box_2d": [58, 82, 163, 109], "label": "dog"}
+]
+    """
+
+    # when
+    result = block.run(
+        image=image,
+        vlm_output=vlm_output,
+        classes=["cat", "dog"],
+        model_type="google-gemini",
+        task_type="object-detection",
+    )
+
+    # then
+    assert result["error_status"] is False
+    assert isinstance(result["predictions"], sv.Detections)
+    assert result["predictions"].data["class_name"].tolist() == ["dog", "dog"]
+    assert np.allclose(result["predictions"].class_id, np.array([1, 1]))
+    assert np.allclose(
+        result["predictions"].xyxy,
+        np.array(
+            [
+                [2.856, 5.568, 9.072, 31.296],
+                [13.776, 11.136, 18.312, 31.296],
+            ]
+        ),
+        atol=1.0,
+    )
+
+
 def test_run_method_for_invalid_claude_and_gemini_output() -> None:
     # given
     block = VLMAsDetectorBlockV2()

From 9ca3c47ddc869fa55f7ff54c3dbd58e21f41affc Mon Sep 17 00:00:00 2001
From: Sachin Agarwal <sachin@roboflow.com>
Date: Fri, 5 Jun 2026 12:47:35 -0400
Subject: [PATCH 67/76] ci: add concurrency cancellation to PR-triggered test
 workflows (#2392)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Cancels superseded in-flight runs on a PR when a new commit is pushed,
so CI is billed for ~1 matrix pass per settled state instead of one per
intermediate commit. Cancellation is scoped to pull_request events only
(cancel-in-progress resolves to false on push:[main]) so merged commits
keep their per-commit CI / required checks and nothing is orphaned.

Group key is workflow+ref, so distinct PRs never cancel each other and
different workflows on the same branch stay independent. Verified no
duplicate workflow name: values that would collide group keys.

Applied to the 10 PR-triggered test workflows; codeflash already had a
concurrency block and is left as-is.

Co-authored-by: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
Co-authored-by: Paweł Pęczek <146137186+PawelPeczek-Roboflow@users.noreply.github.com>
---
 .github/workflows/check_model_licenses.yml                    | 4 ++++
 .github/workflows/integration_e2e_tests_inference_sdk_x86.yml | 4 ++++
 .../integration_tests_inference_experimental_cpu.yml          | 4 ++++
 .github/workflows/integration_tests_workflows_x86.yml         | 4 ++++
 .github/workflows/static_code_analysis.yml                    | 4 ++++
 .github/workflows/unit_tests_inference_cli_x86.yml            | 4 ++++
 .github/workflows/unit_tests_inference_experimental.yml       | 4 ++++
 .github/workflows/unit_tests_inference_sdk_x86.yml            | 4 ++++
 .github/workflows/unit_tests_inference_x86.yml                | 4 ++++
 .github/workflows/unit_tests_workflows_x86.yml                | 4 ++++
 10 files changed, 40 insertions(+)

diff --git a/.github/workflows/check_model_licenses.yml b/.github/workflows/check_model_licenses.yml
index 65d4cb0c10..ed40b5db1f 100644
--- a/.github/workflows/check_model_licenses.yml
+++ b/.github/workflows/check_model_licenses.yml
@@ -8,6 +8,10 @@ on:
       - ".github/workflows/check_model_licenses.yml"
   workflow_dispatch:
 
+concurrency:
+  group: ${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: ${{ github.event_name == 'pull_request' }}
+
 jobs:
   validate-models:
     runs-on: ubuntu-latest
diff --git a/.github/workflows/integration_e2e_tests_inference_sdk_x86.yml b/.github/workflows/integration_e2e_tests_inference_sdk_x86.yml
index 4f4d2c8439..46cbbc5638 100644
--- a/.github/workflows/integration_e2e_tests_inference_sdk_x86.yml
+++ b/.github/workflows/integration_e2e_tests_inference_sdk_x86.yml
@@ -8,6 +8,10 @@ on:
     branches: [main]
   workflow_dispatch:
 
+concurrency:
+  group: ${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: ${{ github.event_name == 'pull_request' }}
+
 jobs:
   call_is_mergeable:
     uses: ./.github/workflows/check_if_branch_is_mergeable.yml
diff --git a/.github/workflows/integration_tests_inference_experimental_cpu.yml b/.github/workflows/integration_tests_inference_experimental_cpu.yml
index a6356ec153..cfe44d1506 100644
--- a/.github/workflows/integration_tests_inference_experimental_cpu.yml
+++ b/.github/workflows/integration_tests_inference_experimental_cpu.yml
@@ -28,6 +28,10 @@ on:
           - '3.11'
           - '3.12'
 
+concurrency:
+  group: ${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: ${{ github.event_name == 'pull_request' }}
+
 jobs:
   integration-tests-inference-models-cpu:
     name: ${{ matrix.extras.marker }}:${{ matrix.python-version }}
diff --git a/.github/workflows/integration_tests_workflows_x86.yml b/.github/workflows/integration_tests_workflows_x86.yml
index 0e3fbc5456..82609feae4 100644
--- a/.github/workflows/integration_tests_workflows_x86.yml
+++ b/.github/workflows/integration_tests_workflows_x86.yml
@@ -26,6 +26,10 @@ on:
           - "true"
           - "false"
 
+concurrency:
+  group: ${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: ${{ github.event_name == 'pull_request' }}
+
 jobs:
   call_is_mergeable:
     uses: ./.github/workflows/check_if_branch_is_mergeable.yml
diff --git a/.github/workflows/static_code_analysis.yml b/.github/workflows/static_code_analysis.yml
index a8235e56d8..aa6abc9dfa 100644
--- a/.github/workflows/static_code_analysis.yml
+++ b/.github/workflows/static_code_analysis.yml
@@ -7,6 +7,10 @@ on:
   push:
     branches: [main]
 
+concurrency:
+  group: ${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: ${{ github.event_name == 'pull_request' }}
+
 jobs:
   build-dev-test:
     runs-on:
diff --git a/.github/workflows/unit_tests_inference_cli_x86.yml b/.github/workflows/unit_tests_inference_cli_x86.yml
index eca21de26d..ce09bd2b68 100644
--- a/.github/workflows/unit_tests_inference_cli_x86.yml
+++ b/.github/workflows/unit_tests_inference_cli_x86.yml
@@ -8,6 +8,10 @@ on:
     branches: [main]
   workflow_dispatch:
 
+concurrency:
+  group: ${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: ${{ github.event_name == 'pull_request' }}
+
 jobs:
   call_is_mergeable:
     uses: ./.github/workflows/check_if_branch_is_mergeable.yml
diff --git a/.github/workflows/unit_tests_inference_experimental.yml b/.github/workflows/unit_tests_inference_experimental.yml
index 990e4c6b84..960953ebbd 100644
--- a/.github/workflows/unit_tests_inference_experimental.yml
+++ b/.github/workflows/unit_tests_inference_experimental.yml
@@ -8,6 +8,10 @@ on:
     branches: [main]
   workflow_dispatch:
 
+concurrency:
+  group: ${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: ${{ github.event_name == 'pull_request' }}
+
 jobs:
   unit-tests-inference-models:
     runs-on:
diff --git a/.github/workflows/unit_tests_inference_sdk_x86.yml b/.github/workflows/unit_tests_inference_sdk_x86.yml
index 788c140184..8475cc0261 100644
--- a/.github/workflows/unit_tests_inference_sdk_x86.yml
+++ b/.github/workflows/unit_tests_inference_sdk_x86.yml
@@ -8,6 +8,10 @@ on:
     branches: [main]
   workflow_dispatch:
 
+concurrency:
+  group: ${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: ${{ github.event_name == 'pull_request' }}
+
 jobs:
   call_is_mergeable:
     uses: ./.github/workflows/check_if_branch_is_mergeable.yml
diff --git a/.github/workflows/unit_tests_inference_x86.yml b/.github/workflows/unit_tests_inference_x86.yml
index 924238e869..422f8233ef 100644
--- a/.github/workflows/unit_tests_inference_x86.yml
+++ b/.github/workflows/unit_tests_inference_x86.yml
@@ -9,6 +9,10 @@ on:
     branches: [main]
   workflow_dispatch:
 
+concurrency:
+  group: ${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: ${{ github.event_name == 'pull_request' }}
+
 jobs:
   call_is_mergeable:
     uses: ./.github/workflows/check_if_branch_is_mergeable.yml
diff --git a/.github/workflows/unit_tests_workflows_x86.yml b/.github/workflows/unit_tests_workflows_x86.yml
index 9e95a24b60..ca4d730aa8 100644
--- a/.github/workflows/unit_tests_workflows_x86.yml
+++ b/.github/workflows/unit_tests_workflows_x86.yml
@@ -9,6 +9,10 @@ on:
     branches: [main]
   workflow_dispatch:
 
+concurrency:
+  group: ${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: ${{ github.event_name == 'pull_request' }}
+
 jobs:
   call_is_mergeable:
     uses: ./.github/workflows/check_if_branch_is_mergeable.yml

From 76bb86dc2634297bd8621672cd421e2a4b9fe503 Mon Sep 17 00:00:00 2001
From: Rafel Bennasar <253519461+rafel-roboflow@users.noreply.github.com>
Date: Fri, 5 Jun 2026 18:49:03 +0200
Subject: [PATCH 68/76] updated docs for showing runtime compatibility (#2391)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Co-authored-by: Paweł Pęczek <146137186+PawelPeczek-Roboflow@users.noreply.github.com>
---
 development/docs/build_block_docs.py          | 320 ++++++++++++------
 docs/workflows/create_workflow_block.md       | 123 +++++++
 .../models/foundation/qwen3_5vl/v2.py         |  29 ++
 mkdocs.yml                                    |   1 +
 4 files changed, 367 insertions(+), 106 deletions(-)

diff --git a/development/docs/build_block_docs.py b/development/docs/build_block_docs.py
index 69f69f8c42..3b82732ad0 100644
--- a/development/docs/build_block_docs.py
+++ b/development/docs/build_block_docs.py
@@ -31,10 +31,12 @@
 template_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), "templates")
 jinja_env = Environment(loader=FileSystemLoader(template_dir))
 
+
 def render_template(template_name, **kwargs):
     template = jinja_env.get_template(template_name)
     return template.render(**kwargs)
 
+
 DOCS_ROOT_DIR = os.path.abspath(
     os.path.join(
         os.path.dirname(__file__),
@@ -46,10 +48,14 @@ def render_template(template_name, **kwargs):
 
 BLOCKS_DIR = os.path.join(DOCS_ROOT_DIR, "workflows", "blocks")
 
-BLOCK_DOCUMENTATION_FILE = os.path.join(DOCS_ROOT_DIR, "workflows", "blocks", "index.md")
+BLOCK_DOCUMENTATION_FILE = os.path.join(
+    DOCS_ROOT_DIR, "workflows", "blocks", "index.md"
+)
 
 KINDS_DIR = os.path.join(DOCS_ROOT_DIR, "workflows", "kinds")
-KINDS_DOCUMENTATION_TEMPLATE = os.path.join(DOCS_ROOT_DIR, "workflows", "kinds_template.md")
+KINDS_DOCUMENTATION_TEMPLATE = os.path.join(
+    DOCS_ROOT_DIR, "workflows", "kinds_template.md"
+)
 KINDS_DOCUMENTATION_FILE = os.path.join(DOCS_ROOT_DIR, "workflows", "kinds", "index.md")
 
 BLOCK_DOCUMENTATION_DIRECTORY = os.path.join(DOCS_ROOT_DIR, "workflows", "blocks")
@@ -81,7 +87,7 @@ def render_template(template_name, **kwargs):
 
 The **Refs** column marks possibility to parametrise the property with dynamic values available 
 in `workflow` runtime. See *Bindings* for more info.
-
+{block_runtime_compatibility}
 ### Available Connections {{ data-search-exclude }}
 
 ??? tip "Compatible Blocks"
@@ -123,8 +129,8 @@ def render_template(template_name, **kwargs):
 """
 
 
-
-BLOCK_VERSION_TEMPLATE_SINGLE_VERSION = """
+BLOCK_VERSION_TEMPLATE_SINGLE_VERSION = (
+    """
 
 ??? "Class: `{short_block_class_name}`"
 
@@ -132,10 +138,13 @@ def render_template(template_name, **kwargs):
     <a target="_blank" href="{block_source_link}">{block_class_name}</a>
     
 
-""" + BLOCK_VERSION_TEMPLATE
+"""
+    + BLOCK_VERSION_TEMPLATE
+)
 
 
-BLOCK_VERSION_TEMPLATE_MULTIPLE_VERSIONS = """
+BLOCK_VERSION_TEMPLATE_MULTIPLE_VERSIONS = (
+    """
 
 ## {version}
 
@@ -149,7 +158,9 @@ def render_template(template_name, **kwargs):
 
     
 
-""" + BLOCK_VERSION_TEMPLATE
+"""
+    + BLOCK_VERSION_TEMPLATE
+)
 
 BLOCK_CARD_TEMPLATE = '<p class="card block-card" data-url="{data_url}" data-name="{data_name}" data-desc="{data_desc}" data-labels="{data_labels}" data-author="{data_authors}"></p>\n'
 
@@ -202,60 +213,22 @@ def render_template(template_name, **kwargs):
 # This catches patterns like {{ $parameters.xxx }} in LONG_DESCRIPTION strings,
 # Field descriptions, and other generated content that would cause mkdocs-macros
 # Jinja2 parse errors ("unexpected char '$'").
-JINJA2_DOLLAR_EXPRESSION_PATTERN = re.compile(r"(\{\{(?:(?!\}\}).)*?\$(?:(?!\}\}).)*?\}\})")
+JINJA2_DOLLAR_EXPRESSION_PATTERN = re.compile(
+    r"(\{\{(?:(?!\}\}).)*?\$(?:(?!\}\}).)*?\}\})"
+)
 
 BLOCK_SECTIONS = [
-        {
-            "title": "Models",
-            "id": "model",
-            "colorScheme": "purboflow"
-        },
-        {
-            "title": "Visualizations",
-            "id": "visualization",
-            "colorScheme": "blue"
-        },
-        {
-            "title": "Logic and Branching",
-            "id": "flow_control",
-            "colorScheme": "yellow"
-        },
-        {
-            "title": "Data Storage",
-            "id": "data_storage",
-            "colorScheme": "pink"
-        },
-        {
-            "title": "Notifications",
-            "id": "notifications",
-            "colorScheme": "salmon"
-        },
-        {
-            "title": "Transformations",
-            "id": "transformation",
-            "colorScheme": "green"
-        },
-        {
-            "title": "Classical Computer Vision",
-            "id": "classical_cv",
-            "colorScheme": "cyan"
-        },
-        {
-            "title": "Video",
-            "id": "video",
-            "colorScheme": "indigo"
-        },
-        {
-            "title": "Advanced",
-            "id": "advanced",
-            "colorScheme": "orange"
-        },
-        {
-            "title": "Industrial",
-            "id": "industrial",
-            "colorScheme": "gray"
-        }
-    ]
+    {"title": "Models", "id": "model", "colorScheme": "purboflow"},
+    {"title": "Visualizations", "id": "visualization", "colorScheme": "blue"},
+    {"title": "Logic and Branching", "id": "flow_control", "colorScheme": "yellow"},
+    {"title": "Data Storage", "id": "data_storage", "colorScheme": "pink"},
+    {"title": "Notifications", "id": "notifications", "colorScheme": "salmon"},
+    {"title": "Transformations", "id": "transformation", "colorScheme": "green"},
+    {"title": "Classical Computer Vision", "id": "classical_cv", "colorScheme": "cyan"},
+    {"title": "Video", "id": "video", "colorScheme": "indigo"},
+    {"title": "Advanced", "id": "advanced", "colorScheme": "orange"},
+    {"title": "Industrial", "id": "industrial", "colorScheme": "gray"},
+]
 
 
 def main() -> None:
@@ -269,7 +242,7 @@ def write_blocks_docs(blocks_description):
     # create blocks directory if it doesn't exist
     os.makedirs(BLOCK_DOCUMENTATION_DIRECTORY, exist_ok=True)
 
-    # get block assgined to families    
+    # get block assgined to families
     block_families = get_block_families(blocks_description)
 
     # write blocks index file
@@ -280,7 +253,7 @@ def write_blocks_docs(blocks_description):
 
     # write individual block pages
     write_individual_block_pages(block_families, blocks_description)
-    
+
 
 def write_individual_block_pages(block_families, blocks_description):
     block_type2manifest_type_identifier = {
@@ -289,35 +262,45 @@ def write_individual_block_pages(block_families, blocks_description):
     }
     blocks_connections = discover_blocks_connections(
         blocks_description=blocks_description
-    )   
+    )
 
     for family_name, family_members in block_families.items():
-        
+
         documentation_file_name = slugify_block_name(family_name) + ".md"
         documentation_file_path = os.path.join(
             BLOCK_DOCUMENTATION_DIRECTORY, documentation_file_name
         )
-        
+
         versions_content = []
         for block in family_members:
             block_class_name = block.fully_qualified_block_class_name
             block_source_link = get_source_link_for_block_class(block.block_class)
             example_definition = generate_example_step_definition(block=block)
             parsed_manifest = parse_block_manifest(manifest_type=block.manifest_class)
-            long_description = block.block_schema.get("long_description", "Description not available")
-
+            long_description = block.block_schema.get(
+                "long_description", "Description not available"
+            )
 
-            template = BLOCK_VERSION_TEMPLATE_SINGLE_VERSION if len(family_members) == 1 else BLOCK_VERSION_TEMPLATE_MULTIPLE_VERSIONS
+            template = (
+                BLOCK_VERSION_TEMPLATE_SINGLE_VERSION
+                if len(family_members) == 1
+                else BLOCK_VERSION_TEMPLATE_MULTIPLE_VERSIONS
+            )
 
             version_content = template.format(
                 family_name=family_name,
                 version=block.block_schema.get("version", "undefined"),
                 block_source_link=block_source_link,
                 block_class_name=block_class_name,
-                short_block_class_name = block.fully_qualified_block_class_name.split(".")[-1],
+                short_block_class_name=block.fully_qualified_block_class_name.split(
+                    "."
+                )[-1],
                 type_identifier=block.manifest_type_identifier,
                 description=long_description,
                 block_inputs=format_block_inputs(parsed_manifest),
+                block_runtime_compatibility=format_runtime_compatibility(
+                    manifest_class=block.manifest_class,
+                ),
                 block_input_bindings=format_input_bindings(parsed_manifest),
                 block_output_bindings=format_block_outputs(block.outputs_manifest),
                 input_connections=format_block_connections(
@@ -332,7 +315,9 @@ def write_individual_block_pages(block_families, blocks_description):
                     ],
                     block_type2manifest_type_identifier=block_type2manifest_type_identifier,
                 ),
-                example=_dump_step_example_definition(example_definition=example_definition),
+                example=_dump_step_example_definition(
+                    example_definition=example_definition
+                ),
             )
             versions_content.append(version_content)
         all_versions_combined = combined_content_from_versions(versions_content)
@@ -342,7 +327,10 @@ def write_individual_block_pages(block_families, blocks_description):
         if all_deprecated:
             # Use custom deprecation message if provided, otherwise use default
             custom_message = family_members[0].block_schema.get("deprecation_message")
-            message = custom_message or "This block is deprecated and may be removed in a future release."
+            message = (
+                custom_message
+                or "This block is deprecated and may be removed in a future release."
+            )
             deprecation_warning = f'!!! warning "Deprecated"\n\n{" " * 4}{message}\n\n'
         else:
             deprecation_warning = ""
@@ -356,8 +344,6 @@ def write_individual_block_pages(block_families, blocks_description):
             documentation_file.write(family_document_content)
 
 
-
-
 def get_block_families_by_section(block_families):
     # Group families by block_type
     blocks_by_section = defaultdict(list)
@@ -365,9 +351,9 @@ def get_block_families_by_section(block_families):
         if not members:
             section = "custom"
         else:
-            block_name =  members[0].block_schema.get("name", "Missing Name")
+            block_name = members[0].block_schema.get("name", "Missing Name")
             ui_manifest = members[0].block_schema.get("ui_manifest", {})
-            section =  ui_manifest.get("section", "custom")
+            section = ui_manifest.get("section", "custom")
             if not section:
                 section = "custom"
         blocks_by_section[section].append(family_name)
@@ -375,7 +361,6 @@ def get_block_families_by_section(block_families):
     return blocks_by_section
 
 
-
 def write_blocks_summary_md(block_families):
     """
     Creates SUMMARY.md for mkdocs-literate-nav.
@@ -387,19 +372,25 @@ def write_blocks_summary_md(block_families):
 
     # For each block type, create a top-level bullet, then sub-bullets for families
     for block_section in BLOCK_SECTIONS:
-        section_title = block_section['title']
-        section_id = block_section['id']
+        section_title = block_section["title"]
+        section_id = block_section["id"]
 
         if not block_families_by_section[section_id]:
             continue
 
         lines.append(f"* {section_title}")
-        for family_name in sorted(block_families_by_section[section_id], key=lambda x: block_families[x][0].block_schema.get("ui_manifest", {}).get("blockPriority", 99)):
+        for family_name in sorted(
+            block_families_by_section[section_id],
+            key=lambda x: block_families[x][0]
+            .block_schema.get("ui_manifest", {})
+            .get("blockPriority", 99),
+        ):
             # Suppose you had a function slugify_block_name:
             slug = slugify_block_name(family_name)
             # Link to foo.md (or bar.md, etc.)
             all_deprecated = all(
-                b.block_schema.get("deprecated", False) for b in block_families[family_name]
+                b.block_schema.get("deprecated", False)
+                for b in block_families[family_name]
             )
             label = f"{family_name} (Deprecated)" if all_deprecated else family_name
             lines.append(f"{' ' * 4}* [{label}]({slug}.md)")
@@ -415,43 +406,52 @@ def write_blocks_index_file(block_families):
     blocks_by_section = {}
 
     for block_section in BLOCK_SECTIONS:
-        section_title = block_section['title']
-        section_id = block_section['id']
+        section_title = block_section["title"]
+        section_id = block_section["id"]
 
         blocks_by_section[section_id] = []
-        
-        
-        for family_name in sorted(block_families_by_section[section_id], key=lambda x: block_families[x][0].block_schema.get("ui_manifest", {}).get("blockPriority", 99)):
+
+        for family_name in sorted(
+            block_families_by_section[section_id],
+            key=lambda x: block_families[x][0]
+            .block_schema.get("ui_manifest", {})
+            .get("blockPriority", 99),
+        ):
             block_schema = block_families[family_name][0].block_schema
             # Hide deprecated blocks from the gallery index
             all_deprecated = all(
-                b.block_schema.get("deprecated", False) for b in block_families[family_name]
+                b.block_schema.get("deprecated", False)
+                for b in block_families[family_name]
             )
             if all_deprecated:
                 continue
             block_data = {
                 "name": family_name,
                 "url": slugify_block_name(family_name),
-                "description": block_schema.get("short_description", "Description not available"),
+                "description": block_schema.get(
+                    "short_description", "Description not available"
+                ),
                 "license": block_schema.get("license", "").upper(),
-                "icon": block_schema.get("ui_manifest", {}).get("icon", "far fa-sparkles"),
+                "icon": block_schema.get("ui_manifest", {}).get(
+                    "icon", "far fa-sparkles"
+                ),
             }
             blocks_by_section[section_id].append(block_data)
 
-        
-    
-
-    output = render_template("blocks_index.md", blocks_by_section=blocks_by_section, block_sections=BLOCK_SECTIONS)
-
+    output = render_template(
+        "blocks_index.md",
+        blocks_by_section=blocks_by_section,
+        block_sections=BLOCK_SECTIONS,
+    )
 
     with open(BLOCK_DOCUMENTATION_FILE, "w", encoding="utf-8") as f:
         f.write(output)
 
 
 def get_block_families(blocks_description):
-    '''
+    """
     Get block families and sort them by version.
-    '''
+    """
     block_families = defaultdict(list)
     for block in blocks_description.blocks:
         block_families[block.human_friendly_block_name].append(block)
@@ -467,9 +467,14 @@ def get_block_families(blocks_description):
 def combined_content_from_versions(versions_content: List[str]) -> str:
     return "\n\n".join(versions_content)
 
+
 def _dump_step_example_definition(example_definition: dict) -> str:
-    definition_stringified = "\n\t".join(json.dumps(example_definition, indent=4).split("\n"))
-    return INLINE_UQL_PARAMETER_PATTERN.sub(_escape_uql_brackets, definition_stringified)
+    definition_stringified = "\n\t".join(
+        json.dumps(example_definition, indent=4).split("\n")
+    )
+    return INLINE_UQL_PARAMETER_PATTERN.sub(
+        _escape_uql_brackets, definition_stringified
+    )
 
 
 def _escape_uql_brackets(match: re.Match) -> str:
@@ -482,7 +487,9 @@ def _escape_jinja2_expressions(content: str) -> str:
     (Jinja2) does not attempt to evaluate them. Already-escaped expressions
     like ``{{ '{{' }}`` are left untouched because they do not contain ``$``.
     """
-    return JINJA2_DOLLAR_EXPRESSION_PATTERN.sub(_escape_jinja2_dollar_expression, content)
+    return JINJA2_DOLLAR_EXPRESSION_PATTERN.sub(
+        _escape_jinja2_dollar_expression, content
+    )
 
 
 def _escape_jinja2_dollar_expression(match: re.Match) -> str:
@@ -498,6 +505,7 @@ def get_source_link_for_block_class(block_class: Type[WorkflowBlock]) -> str:
     except Exception as e:
         return None
 
+
 def get_auto_generation_markers(
     documentation_lines: List[str],
     token: str,
@@ -558,6 +566,106 @@ def format_block_inputs(parsed_manifest: BlockManifestMetadata) -> str:
     return "\n".join(USER_CONFIGURATION_HEADER + rows)
 
 
+RUNTIME_COMPATIBILITY_HEADING_ICON = (
+    ':material-shield-half-full:{ style="color: #5e6c75" }'
+)
+SEVERITY_ICONS = {
+    "hard": ':material-shield-alert:{ style="color: #d32f2f" }',
+    "soft": ':material-alert-circle-outline:{ style="color: #f57c00" }',
+}
+AIR_GAP_ICON = ':material-cloud-off-outline:{ style="color: #546e7a" }'
+
+
+def format_runtime_compatibility(manifest_class: Type) -> str:
+    """Render air-gapped status and runtime restrictions for a block.
+
+    Returns an empty string when the block has no notable caveats (the default
+    for most blocks), so the section is omitted from the rendered page rather
+    than adding boilerplate to every page.
+    """
+    sections: List[str] = []
+
+    air_gapped_block = _format_air_gapped_block(manifest_class=manifest_class)
+    if air_gapped_block:
+        sections.append(air_gapped_block)
+
+    restrictions_block = _format_restrictions_block(manifest_class=manifest_class)
+    if restrictions_block:
+        sections.append(restrictions_block)
+
+    if not sections:
+        return ""
+
+    body = "\n\n".join(sections)
+    heading = f"### {RUNTIME_COMPATIBILITY_HEADING_ICON} Runtime compatibility"
+    return f"\n{heading}\n\n{body}\n"
+
+
+def _format_air_gapped_block(manifest_class: Type) -> str:
+    get_air_gapped = getattr(manifest_class, "get_air_gapped_availability", None)
+    if not callable(get_air_gapped):
+        return ""
+    try:
+        air_gapped = get_air_gapped()
+    except Exception:
+        return ""
+    if air_gapped is None or getattr(air_gapped, "available", True):
+        return ""
+    reason = getattr(air_gapped, "reason", None)
+    reason_label = f"`{reason}`" if reason else "`air_gapped_unavailable`"
+    return (
+        f"{AIR_GAP_ICON} {reason_label} — air-gapped / offline deployments\n"
+        f":   This block depends on a service that is not reachable from "
+        f"fully offline / air-gapped deployments."
+    )
+
+
+def _format_restrictions_block(manifest_class: Type) -> str:
+    get_restrictions = getattr(manifest_class, "get_restrictions", None)
+    if not callable(get_restrictions):
+        return ""
+    try:
+        restrictions = list(get_restrictions() or [])
+    except Exception:
+        return ""
+    if not restrictions:
+        return ""
+
+    items: List[str] = []
+    for restriction in restrictions:
+        severity = _severity_value(restriction) or "soft"
+        scope = _format_restriction_scope(restriction)
+        note = (getattr(restriction, "note", "") or "").strip().replace("\n", " ")
+        scope_clause = f" — {scope}" if scope else " — all runtimes"
+        icon = SEVERITY_ICONS.get(severity, "")
+        icon_prefix = f"{icon} " if icon else ""
+        items.append(f"{icon_prefix}`{severity}`{scope_clause}\n:   {note}")
+    return "\n\n".join(items)
+
+
+def _severity_value(restriction) -> str:
+    severity = getattr(restriction, "severity", None)
+    return getattr(severity, "value", str(severity) if severity is not None else "")
+
+
+def _format_restriction_scope(restriction) -> str:
+    scope_parts: List[str] = []
+    runtimes = getattr(restriction, "applies_to_runtimes", None)
+    if runtimes:
+        scope_parts.append(
+            "runtime " + ", ".join(f"`{item.value}`" for item in runtimes)
+        )
+    modes = getattr(restriction, "applies_to_step_execution_modes", None)
+    if modes:
+        scope_parts.append(
+            "execution " + ", ".join(f"`{item.value}`" for item in modes)
+        )
+    inputs = getattr(restriction, "applies_to_input_modes", None)
+    if inputs:
+        scope_parts.append("input " + ", ".join(f"`{item.value}`" for item in inputs))
+    return "; ".join(scope_parts)
+
+
 def format_input_bindings(parsed_manifest: BlockManifestMetadata) -> str:
     rows = []
     for selector in parsed_manifest.selectors.values():
@@ -644,13 +752,15 @@ def generate_example_step_definition(block: BlockDescription) -> dict:
         result[property_name] = example
     return result
 
+
 def to_title_case(s: str) -> str:
     """
     Convert e.g. 'object_detection' -> 'Object Detection'
     """
-    words = re.split(r'[_\s]+', s.lower())
+    words = re.split(r"[_\s]+", s.lower())
     return " ".join(w.capitalize() for w in words if w)
 
+
 def write_kinds_summary_md(kinds):
     """
     Creates docs/workflows/kinds/SUMMARY.md for mkdocs-literate-nav.
@@ -659,7 +769,7 @@ def write_kinds_summary_md(kinds):
 
     for line in sorted(kinds):
         # replace everything including and after the :
-        line = re.sub(r':.*$', '', line)
+        line = re.sub(r":.*$", "", line)
 
         # replace back ticks
         line = line.replace("`", "")
@@ -674,13 +784,12 @@ def write_kinds_summary_md(kinds):
         f.write("".join(lines) + "\n")
 
 
-
 def write_kinds_docs(blocks_description):
     os.makedirs(KINDS_DOCUMENTATION_DIRECTORY, exist_ok=True)
 
     generated_kinds_index_lines = []
     for declared_kind in blocks_description.declared_kinds:
-        
+
         description = (
             declared_kind.description
             if declared_kind.description is not None
@@ -700,9 +809,7 @@ def write_kinds_docs(blocks_description):
             serialised_data_type=declared_kind.serialised_data_type,
             internal_data_type=declared_kind.internal_data_type,
         )
-        relative_link = (
-            f"../kinds/{slugify_kind_name(kind_name=declared_kind.name)}.md"
-        )
+        relative_link = f"../kinds/{slugify_kind_name(kind_name=declared_kind.name)}.md"
         generated_kinds_index_lines.append(
             f"* [`{declared_kind.name}`]({relative_link}): {description}\n"
         )
@@ -710,7 +817,7 @@ def write_kinds_docs(blocks_description):
         kind_page = _escape_jinja2_expressions(kind_page)
         with open(kind_file_path, "w") as documentation_file:
             documentation_file.write(kind_page)
-    
+
     generated_kinds_index_lines = sorted(generated_kinds_index_lines)
     write_kinds_summary_md(generated_kinds_index_lines)
 
@@ -738,5 +845,6 @@ def write_kinds_docs(blocks_description):
 def write_blocks_gallery():
     pass
 
+
 if __name__ == "__main__":
     main()
diff --git a/docs/workflows/create_workflow_block.md b/docs/workflows/create_workflow_block.md
index 18a0636ff0..53f5be5c49 100644
--- a/docs/workflows/create_workflow_block.md
+++ b/docs/workflows/create_workflow_block.md
@@ -2252,3 +2252,126 @@ class BlockManifest(WorkflowBlockManifest):
 The default returns `None` — appropriate for blocks that are not
 parameterised by a Roboflow model (foundation models, logic blocks, sinks,
 etc.).
+
+### Runtime restrictions
+
+Some blocks behave differently — or fail outright — depending on the runtime
+they are deployed in (hosted serverless, dedicated deployment, self-hosted,
+inference pipeline), the step execution mode (local vs. remote), and the
+input mode (image vs. video). Override `get_restrictions()` on
+`WorkflowBlockManifest` to declare those caveats once, in the block, so the
+execution engine, the schema endpoint, and the auto-generated block gallery
+can all surface them consistently.
+
+The default returns `[]`, so existing blocks need **no changes**.
+
+#### Severity: `soft` vs. `hard`
+
+Each restriction carries a `Severity`:
+
+* **`Severity.SOFT`** — the block runs to completion and returns the right
+  output shape, but the values are degraded or meaningless (e.g. tracker IDs
+  reset across requests, cooldown does not throttle, file is written to
+  ephemeral disk). The workflow still runs; the result is just not what the
+  user expects.
+* **`Severity.HARD`** — the block does not run / raises / cannot produce a
+  usable output in this runtime. The engine should refuse to compile or
+  fail-fast.
+
+#### Scoping a restriction
+
+A `RuntimeRestriction` can scope itself along any combination of three axes.
+When an axis is left as `None`, the restriction applies to every value of
+that axis.
+
+* `applies_to_runtimes`: `Runtime.HOSTED_SERVERLESS`,
+  `Runtime.DEDICATED_DEPLOYMENT`, `Runtime.SELF_HOSTED_CPU`,
+  `Runtime.SELF_HOSTED_GPU`, `Runtime.INFERENCE_PIPELINE`.
+* `applies_to_step_execution_modes`: `StepExecutionMode.LOCAL`,
+  `StepExecutionMode.REMOTE`.
+* `applies_to_input_modes`: `RuntimeInputMode.IMAGE`,
+  `RuntimeInputMode.VIDEO`.
+
+The `note` field is a one-line, human-readable explanation of the failure
+mode or degraded behavior — describe what happens (e.g. "track_ids reset
+between requests", "writes to ephemeral /tmp"), not abstract preconditions.
+
+#### Shared presets
+
+Most caveats fall into a handful of common patterns, so reusable presets are
+exported alongside the dataclasses to keep wording consistent across the
+codebase. Reach for these first:
+
+* `STATEFUL_VIDEO_HTTP_SOFT_RESTRICTION` — for video-tracking / counting /
+  aggregation blocks whose per-video state lives in process memory and
+  resets between stateless HTTP requests.
+* `COOLDOWN_HTTP_SOFT_RESTRICTION` — for blocks whose cooldown / rate-limit
+  timer is held in process memory and therefore does not throttle on
+  multi-replica HTTP runtimes.
+* `STILL_IMAGE_INPUT_SOFT_RESTRICTION` — for blocks that depend on temporal
+  context (video or repeated frames) and provide little or no benefit on
+  still images.
+
+#### Example
+
+A line-counter block that maintains per-video state and is also meaningless
+on a still image declares both restrictions:
+
+```python
+from typing import List
+
+from inference.core.workflows.prototypes.block import (
+    STATEFUL_VIDEO_HTTP_SOFT_RESTRICTION,
+    STILL_IMAGE_INPUT_SOFT_RESTRICTION,
+    RuntimeRestriction,
+    WorkflowBlockManifest,
+)
+
+class BlockManifest(WorkflowBlockManifest):
+    # ...
+
+    @classmethod
+    def get_restrictions(cls) -> List[RuntimeRestriction]:
+        return [
+            STATEFUL_VIDEO_HTTP_SOFT_RESTRICTION,
+            STILL_IMAGE_INPUT_SOFT_RESTRICTION,
+        ]
+```
+
+A custom restriction (e.g. a `Severity.HARD` block that requires GPU
+hardware that is not present on the hosted serverless runtime) is declared
+inline:
+
+```python
+from inference.core.workflows.prototypes.block import (
+    Runtime,
+    RuntimeRestriction,
+    Severity,
+    WorkflowBlockManifest,
+)
+
+class BlockManifest(WorkflowBlockManifest):
+    # ...
+
+    @classmethod
+    def get_restrictions(cls) -> List[RuntimeRestriction]:
+        return [
+            RuntimeRestriction(
+                severity=Severity.HARD,
+                note="Block requires a CUDA GPU; raises on CPU-only workers.",
+                applies_to_runtimes=[
+                    Runtime.HOSTED_SERVERLESS,
+                    Runtime.SELF_HOSTED_CPU,
+                ],
+            ),
+        ]
+```
+
+Restrictions declared this way are surfaced in three places:
+
+1. The `describe_interface` HTTP payload (via `RuntimeRestriction.to_dict()`),
+   so workflow clients and builders can warn users before a workflow is run.
+2. The auto-generated block gallery page for the block, under a "Runtime
+   compatibility" section right after **Properties**.
+3. The execution engine, which can choose to fail-fast on `Severity.HARD`
+   restrictions for the current runtime.
diff --git a/inference/core/workflows/core_steps/models/foundation/qwen3_5vl/v2.py b/inference/core/workflows/core_steps/models/foundation/qwen3_5vl/v2.py
index 6b38e49dd6..6fc41c5ab5 100644
--- a/inference/core/workflows/core_steps/models/foundation/qwen3_5vl/v2.py
+++ b/inference/core/workflows/core_steps/models/foundation/qwen3_5vl/v2.py
@@ -6,6 +6,7 @@
 from inference.core.env import (
     HOSTED_CORE_MODEL_URL,
     LOCAL_INFERENCE_API_URL,
+    QWEN_3_5_ENABLED,
     WORKFLOWS_REMOTE_API_TARGET,
 )
 from inference.core.managers.base import ModelManager
@@ -24,6 +25,9 @@
 )
 from inference.core.workflows.prototypes.block import (
     BlockResult,
+    Runtime,
+    RuntimeRestriction,
+    Severity,
     WorkflowBlock,
     WorkflowBlockManifest,
 )
@@ -112,6 +116,31 @@ def get_execution_engine_compatibility(cls) -> Optional[str]:
     def get_supported_model_variants(cls) -> Optional[List[str]]:
         return ["qwen3_5-0.8b", "qwen3_5-2b", "qwen3_5-4b"]
 
+    @classmethod
+    def get_restrictions(cls) -> List[RuntimeRestriction]:
+        restrictions = [
+            RuntimeRestriction(
+                severity=Severity.HARD,
+                note="Requires a GPU; run_locally() loads a model that needs CUDA.",
+                applies_to_runtimes=[Runtime.SELF_HOSTED_CPU],
+                applies_to_step_execution_modes=[StepExecutionMode.LOCAL],
+            ),
+        ]
+        if not QWEN_3_5_ENABLED:
+            restrictions.append(
+                RuntimeRestriction(
+                    severity=Severity.HARD,
+                    note=(
+                        "QWEN_3_5_ENABLED=False on Roboflow Hosted Serverless: "
+                        "the Qwen3.5-VL endpoint is not registered, so "
+                        "run_remotely() returns 404."
+                    ),
+                    applies_to_runtimes=[Runtime.HOSTED_SERVERLESS],
+                    applies_to_step_execution_modes=[StepExecutionMode.REMOTE],
+                )
+            )
+        return restrictions
+
 
 ##########################################################################
 # Qwen3.5 Workflow Block
diff --git a/mkdocs.yml b/mkdocs.yml
index 4420413d95..dfc93c05fb 100644
--- a/mkdocs.yml
+++ b/mkdocs.yml
@@ -254,6 +254,7 @@ markdown_extensions:
   - meta
   - admonition
   - attr_list
+  - def_list
   - md_in_html
   - pymdownx.details
   - pymdownx.emoji:

From 38d76bec500c9686f6900ee72a7eab5d51f5699f Mon Sep 17 00:00:00 2001
From: Madhav-C <madhavcbusiness@gmail.com>
Date: Fri, 5 Jun 2026 12:05:10 -0500
Subject: [PATCH 69/76] fix(workflows): correct values_difference aggregation
 in Data Aggregator (#2388)

ValuesDifferenceState locked the first observed value into the min slot and the second into the max slot and never cross-compared them, so values_difference could be negative or wrong (e.g. [10,1] gave -9, [10,1,5] gave 0). Seed both min and max from the first value and update both on every observation so it always equals max-min. Adds unit tests for the aggregation.

Co-authored-by: madhavcodez <madhavcodez@users.noreply.github.com>
---
 .../analytics/data_aggregator/v1.py           |   4 +-
 .../analytics/test_data_aggregator_v1.py      | 156 ++++++++++++++++++
 2 files changed, 157 insertions(+), 3 deletions(-)
 create mode 100644 tests/workflows/unit_tests/core_steps/analytics/test_data_aggregator_v1.py

diff --git a/inference/core/workflows/core_steps/analytics/data_aggregator/v1.py b/inference/core/workflows/core_steps/analytics/data_aggregator/v1.py
index 85b15d02a4..f2b80946fb 100644
--- a/inference/core/workflows/core_steps/analytics/data_aggregator/v1.py
+++ b/inference/core/workflows/core_steps/analytics/data_aggregator/v1.py
@@ -451,15 +451,13 @@ def __init__(self):
     def on_data(self, value: Any) -> None:
         if self._min_value is None:
             self._min_value = value
-            return None
-        if self._max_value is None:
             self._max_value = value
             return None
         self._min_value = min(self._min_value, value)
         self._max_value = max(self._max_value, value)
 
     def get_result(self) -> Any:
-        if self._min_value is None or self._max_value is None:
+        if self._min_value is None:
             return None
         return self._max_value - self._min_value
 
diff --git a/tests/workflows/unit_tests/core_steps/analytics/test_data_aggregator_v1.py b/tests/workflows/unit_tests/core_steps/analytics/test_data_aggregator_v1.py
new file mode 100644
index 0000000000..0c51f5b509
--- /dev/null
+++ b/tests/workflows/unit_tests/core_steps/analytics/test_data_aggregator_v1.py
@@ -0,0 +1,156 @@
+import pytest
+
+from inference.core.workflows.core_steps.analytics.data_aggregator.v1 import (
+    DataAggregatorBlockV1,
+    ValuesDifferenceState,
+)
+
+
+def _feed(state: ValuesDifferenceState, values):
+    for value in values:
+        state.on_data(value=value)
+    return state.get_result()
+
+
+def test_values_difference_with_decreasing_then_min_pair():
+    # Arrange
+    state = ValuesDifferenceState()
+
+    # Act
+    result = _feed(state, [10, 1])
+
+    # Assert
+    assert result == 9
+
+
+def test_values_difference_with_max_first_then_smaller_values():
+    # Arrange
+    state = ValuesDifferenceState()
+
+    # Act
+    result = _feed(state, [10, 1, 5])
+
+    # Assert
+    assert result == 9
+
+
+def test_values_difference_with_monotonically_decreasing_values():
+    # Arrange
+    state = ValuesDifferenceState()
+
+    # Act
+    result = _feed(state, [5, 4, 3, 2, 1])
+
+    # Assert
+    assert result == 4
+
+
+def test_values_difference_with_monotonically_increasing_values():
+    # Arrange
+    state = ValuesDifferenceState()
+
+    # Act
+    result = _feed(state, [1, 2, 3, 4, 5])
+
+    # Assert
+    assert result == 4
+
+
+def test_values_difference_with_single_value_returns_zero():
+    # Arrange
+    state = ValuesDifferenceState()
+
+    # Act
+    result = _feed(state, [7])
+
+    # Assert
+    assert result == 0
+
+
+def test_values_difference_with_no_values_returns_none():
+    # Arrange
+    state = ValuesDifferenceState()
+
+    # Act
+    result = state.get_result()
+
+    # Assert
+    assert result is None
+
+
+def test_values_difference_with_all_equal_values_returns_zero():
+    # Arrange
+    state = ValuesDifferenceState()
+
+    # Act
+    result = _feed(state, [5, 5, 5])
+
+    # Assert
+    assert result == 0
+
+
+def test_values_difference_with_negative_values():
+    # Arrange
+    state = ValuesDifferenceState()
+
+    # Act
+    result = _feed(state, [-10, -3, -7])
+
+    # Assert
+    assert result == 7
+
+
+def test_values_difference_with_float_values():
+    # Arrange
+    state = ValuesDifferenceState()
+
+    # Act
+    result = _feed(state, [1.5, 3.7, 2.0])
+
+    # Assert
+    assert result == pytest.approx(2.2)
+
+
+def test_values_difference_is_never_negative():
+    # Regression test: previously the first observed value was locked into the
+    # min slot and the second into the max slot without cross-comparison, so a
+    # descending stream like [10, 1] produced -9 instead of 9.
+    # Arrange
+    state = ValuesDifferenceState()
+
+    # Act
+    result = _feed(state, [10, 1])
+
+    # Assert
+    assert result == 9
+    assert result >= 0
+
+
+def test_values_difference_matches_max_minus_min_end_to_end():
+    # Regression test at the block level: the documented behaviour is
+    # values_difference == max - min. Before the fix the block could return
+    # values_difference=0 while reporting max=10 and min=1.
+    # Arrange
+    block = DataAggregatorBlockV1()
+    aggregation_mode = {"speed": ["values_difference", "max", "min"]}
+    observations = [10, 1, 5, 5]
+
+    # Act
+    result = None
+    for value in observations:
+        result = block.run(
+            data={"speed": value},
+            data_operations={},
+            aggregation_mode=aggregation_mode,
+            interval_unit="runs",
+            interval=4,
+        )
+
+    # Assert
+    assert result is not None
+    assert result["speed_max"] == 10
+    assert result["speed_min"] == 1
+    assert (
+        result["speed_values_difference"] == result["speed_max"] - result["speed_min"]
+    )
+    assert result["speed_values_difference"] == 9

From 91ea00cfbefe319c982a63fb8a233a1689e188c1 Mon Sep 17 00:00:00 2001
From: Damian Kosowski <kosowski.d@gmail.com>
Date: Fri, 5 Jun 2026 19:11:58 +0200
Subject: [PATCH 70/76] Add CacheUnavailableError exception and enhance
 workflow specification retrieval (#2387)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This commit introduces a new exception, CacheUnavailableError, to handle cases where the ephemeral cache (e.g., Redis/Dragonfly) is unreachable. The get_workflow_specification function is updated to fall back to the Roboflow API when the cache is unavailable, improving error handling and robustness. Additionally, helper functions for retrieving and caching workflow specifications are added, ensuring that cache failures are logged and managed gracefully.

Co-authored-by: Paweł Pęczek <146137186+PawelPeczek-Roboflow@users.noreply.github.com>
---
 inference/core/exceptions.py                  |   4 +
 inference/core/roboflow_api.py                | 115 ++++++++++++++++--
 .../unit_tests/core/test_roboflow_api.py      |  90 ++++++++++++++
 3 files changed, 201 insertions(+), 8 deletions(-)

diff --git a/inference/core/exceptions.py b/inference/core/exceptions.py
index b8f01b156f..37b6f4447a 100644
--- a/inference/core/exceptions.py
+++ b/inference/core/exceptions.py
@@ -191,6 +191,10 @@ class MalformedWorkflowResponseError(RoboflowAPIRequestError):
     pass
 
 
+class CacheUnavailableError(Exception):
+    """Raised when the ephemeral cache (e.g. Redis/Dragonfly) cannot be reached."""
+
+
 class RoboflowAPIIAlreadyAnnotatedError(RoboflowAPIIAnnotationRejectionError):
     pass
 
diff --git a/inference/core/roboflow_api.py b/inference/core/roboflow_api.py
index 221bbb8243..220966c206 100644
--- a/inference/core/roboflow_api.py
+++ b/inference/core/roboflow_api.py
@@ -18,6 +18,8 @@
 import backoff
 import requests
 from cachetools.func import ttl_cache
+from redis.exceptions import ConnectionError as RedisConnectionError
+from redis.exceptions import TimeoutError as RedisTimeoutError
 from requests import Response, Timeout
 from requests_toolbelt import MultipartEncoder
 from yarl import URL
@@ -59,6 +61,7 @@
     WORKFLOWS_DEFINITION_CACHE_EXPIRY,
 )
 from inference.core.exceptions import (
+    CacheUnavailableError,
     MalformedRoboflowAPIResponseError,
     MalformedWorkflowResponseError,
     MissingDefaultModelError,
@@ -87,6 +90,11 @@
 
 LOCAL_API_KEY = "local"
 
+_EPHEMERAL_CACHE_UNAVAILABLE_EXCEPTIONS = (
+    RedisConnectionError,
+    RedisTimeoutError,
+)
+
 ENFORCE_CREDITS_VERIFICATION_HEADER = "x-enforce-credits-verification"
 ENFORCE_INTERNAL_ARTIFACTS_URLS_HEADER = "x-enforce-internal-artefacts-urls"
 
@@ -959,9 +967,31 @@ def get_workflow_specification(
     ephemeral_cache: Optional[BaseCache] = None,
     workflow_version_id: Optional[str] = None,
 ) -> dict:
+    """Fetch a workflow specification from cache or the Roboflow API.
+
+    When ephemeral cache (Redis/Dragonfly) is enabled but unreachable, falls back
+    to the Roboflow API instead of failing the request.
+
+    Args:
+        api_key: Roboflow API key, or None for unauthenticated fetches.
+        workspace_id: Workspace slug, or ``local`` for filesystem-backed workflows.
+        workflow_id: Workflow identifier within the workspace.
+        use_cache: If True, read and write the ephemeral workflow-definition cache.
+        ephemeral_cache: Cache backend; defaults to the process-global cache.
+        workflow_version_id: Optional pinned workflow version.
+
+    Returns:
+        Parsed workflow specification dict.
+
+    Raises:
+        MalformedWorkflowResponseError: API response lacks a valid specification.
+        RoboflowAPIRequestError: API request failed and no file-cache fallback applies.
+        FileNotFoundError: Local workspace workflow file is missing.
+        ValueError: Invalid local workflow id.
+    """
     ephemeral_cache = ephemeral_cache or cache
     if use_cache:
-        cached_entry = _retrieve_workflow_specification_from_ephemeral_cache(
+        cached_entry = _try_retrieve_workflow_specification_from_ephemeral_cache(
             api_key=api_key,
             workspace_id=workspace_id,
             workflow_id=workflow_id,
@@ -1033,7 +1063,7 @@ def get_workflow_specification(
         if isinstance(specification, dict):
             specification["id"] = response["workflow"].get("id")
         if use_cache:
-            _cache_workflow_specification_in_ephemeral_cache(
+            _try_cache_workflow_specification_in_ephemeral_cache(
                 api_key=api_key,
                 workspace_id=workspace_id,
                 workflow_id=workflow_id,
@@ -1041,6 +1071,7 @@ def get_workflow_specification(
                 specification=specification,
                 ephemeral_cache=ephemeral_cache,
             )
+
         return specification
     except KeyError as error:
         raise MalformedWorkflowResponseError(
@@ -1052,6 +1083,57 @@ def get_workflow_specification(
         ) from error
 
 
+def _try_retrieve_workflow_specification_from_ephemeral_cache(
+    api_key: Optional[str],
+    workspace_id: WorkspaceID,
+    workflow_id: str,
+    ephemeral_cache: BaseCache,
+    workflow_version_id: Optional[str] = None,
+) -> Optional[dict]:
+    """Return a cached specification, or None when the cache is down or misses."""
+    try:
+        cached_entry = _retrieve_workflow_specification_from_ephemeral_cache(
+            api_key=api_key,
+            workspace_id=workspace_id,
+            workflow_id=workflow_id,
+            workflow_version_id=workflow_version_id,
+            ephemeral_cache=ephemeral_cache,
+        )
+    except CacheUnavailableError as error:
+        logger.warning(
+            "Ephemeral workflow specification cache unavailable, fetching from Roboflow API: %s",
+            error,
+        )
+        return None
+
+    return cached_entry
+
+
+def _try_cache_workflow_specification_in_ephemeral_cache(
+    api_key: Optional[str],
+    workspace_id: WorkspaceID,
+    workflow_id: str,
+    specification: dict,
+    ephemeral_cache: BaseCache,
+    workflow_version_id: Optional[str] = None,
+) -> None:
+    """Best-effort write of a specification to ephemeral cache."""
+    try:
+        _cache_workflow_specification_in_ephemeral_cache(
+            api_key=api_key,
+            workspace_id=workspace_id,
+            workflow_id=workflow_id,
+            workflow_version_id=workflow_version_id,
+            specification=specification,
+            ephemeral_cache=ephemeral_cache,
+        )
+    except CacheUnavailableError as error:
+        logger.warning(
+            "Failed to cache workflow specification in ephemeral cache: %s",
+            error,
+        )
+
+
 def _retrieve_workflow_specification_from_ephemeral_cache(
     api_key: Optional[str],
     workspace_id: WorkspaceID,
@@ -1065,7 +1147,12 @@ def _retrieve_workflow_specification_from_ephemeral_cache(
         workflow_id=workflow_id,
         workflow_version_id=workflow_version_id,
     )
-    return ephemeral_cache.get(key=cache_key)
+    try:
+        cached_entry = ephemeral_cache.get(key=cache_key)
+    except _EPHEMERAL_CACHE_UNAVAILABLE_EXCEPTIONS as error:
+        _raise_cache_unavailable_error(operation="read", error=error)
+
+    return cached_entry
 
 
 def _cache_workflow_specification_in_ephemeral_cache(
@@ -1082,11 +1169,23 @@ def _cache_workflow_specification_in_ephemeral_cache(
         workflow_id=workflow_id,
         workflow_version_id=workflow_version_id,
     )
-    ephemeral_cache.set(
-        key=cache_key,
-        value=specification,
-        expire=WORKFLOWS_DEFINITION_CACHE_EXPIRY,
-    )
+    try:
+        ephemeral_cache.set(
+            key=cache_key,
+            value=specification,
+            expire=WORKFLOWS_DEFINITION_CACHE_EXPIRY,
+        )
+    except _EPHEMERAL_CACHE_UNAVAILABLE_EXCEPTIONS as error:
+        _raise_cache_unavailable_error(operation="write", error=error)
+
+
+def _raise_cache_unavailable_error(operation: str, error: Exception) -> None:
+    if operation == "read":
+        message = "Could not read workflow specification from ephemeral cache"
+    else:
+        message = "Could not write workflow specification to ephemeral cache"
+
+    raise CacheUnavailableError(message) from error
 
 
 def _prepare_workflow_response_cache_key(
diff --git a/tests/inference/unit_tests/core/test_roboflow_api.py b/tests/inference/unit_tests/core/test_roboflow_api.py
index 64b3719c85..5928c87a43 100644
--- a/tests/inference/unit_tests/core/test_roboflow_api.py
+++ b/tests/inference/unit_tests/core/test_roboflow_api.py
@@ -7,6 +7,8 @@
 import pytest
 import requests.exceptions
 from aioresponses import aioresponses
+from redis.exceptions import ConnectionError as RedisConnectionError
+from redis.exceptions import TimeoutError as RedisTimeoutError
 from requests_mock import Mocker
 from yarl import URL
 
@@ -3326,6 +3328,94 @@ def test_get_workflow_specification_raises_timeout_when_no_cache(
         )
 
 
+@mock.patch.object(roboflow_api.requests, "get")
+def test_get_workflow_specification_falls_back_to_api_when_ephemeral_cache_get_fails(
+    get_mock: MagicMock,
+) -> None:
+    # Previously redis connection error would bubble up and look like a Roboflow API outage
+    # this was because redis.exceptions.ConnectionError subclasses builtin ConnectionError,
+    # which wrap_roboflow_api_errors would map to RoboflowAPIConnectionError if it
+    # escaped the ephemeral cache layer.
+
+    # given
+    delete_cached_workflow_response_if_exists(
+        workspace_id="my_workspace",
+        workflow_id="cache_unavailable_workflow",
+        api_key="my_api_key",
+    )
+    get_mock.return_value = MagicMock(
+        status_code=200,
+        json=MagicMock(
+            return_value={
+                "workflow": {
+                    "config": json.dumps(
+                        {"specification": {"from": "api_after_cache_failure"}}
+                    )
+                }
+            }
+        ),
+    )
+    ephemeral_cache = MagicMock()
+    ephemeral_cache.get.side_effect = RedisConnectionError("Dragonfly unreachable")
+
+    # when
+    result = get_workflow_specification(
+        api_key="my_api_key",
+        workspace_id="my_workspace",
+        workflow_id="cache_unavailable_workflow",
+        ephemeral_cache=ephemeral_cache,
+    )
+
+    # then
+    assert result == {
+        "from": "api_after_cache_failure",
+        "id": None,
+    }
+    assert get_mock.call_count == 1
+
+
+@mock.patch.object(roboflow_api.requests, "get")
+def test_get_workflow_specification_returns_when_ephemeral_cache_set_fails(
+    get_mock: MagicMock,
+) -> None:
+    # given
+    delete_cached_workflow_response_if_exists(
+        workspace_id="my_workspace",
+        workflow_id="cache_set_failure_workflow",
+        api_key="my_api_key",
+    )
+    get_mock.return_value = MagicMock(
+        status_code=200,
+        json=MagicMock(
+            return_value={
+                "workflow": {
+                    "config": json.dumps(
+                        {"specification": {"from": "api_despite_cache_set_failure"}}
+                    )
+                }
+            }
+        ),
+    )
+    ephemeral_cache = MagicMock()
+    ephemeral_cache.get.return_value = None
+    ephemeral_cache.set.side_effect = RedisTimeoutError("Dragonfly write timeout")
+
+    # when
+    result = get_workflow_specification(
+        api_key="my_api_key",
+        workspace_id="my_workspace",
+        workflow_id="cache_set_failure_workflow",
+        ephemeral_cache=ephemeral_cache,
+    )
+
+    # then
+    assert result == {
+        "from": "api_despite_cache_set_failure",
+        "id": None,
+    }
+    assert get_mock.call_count == 1
+
+
 # --- LICENSE_SERVER / wrap_url proxy tests ---
 # These verify that all API calls route through the license server proxy
 # when LICENSE_SERVER is configured (air-gapped deployment support).

From 462f17612a5920bf2fead945d7d47371768d9013 Mon Sep 17 00:00:00 2001
From: Riaz Virani <riaz@roboflow.com>
Date: Fri, 5 Jun 2026 13:13:30 -0400
Subject: [PATCH 71/76] docs: set LOAD_ENTERPRISE_BLOCKS=TRUE in docs workflow
 build step (#2386)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* docs: set LOAD_ENTERPRISE_BLOCKS=TRUE in docs workflow build step

Without this flag, the block-enumeration loader silently drops every
enterprise block, so the docs CI never generates pages for them and
links from the frontend 404. The workflow already checks out and
installs the private workflows-enterprise-blocks repo; this just makes
the build step actually load them.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* docs: fix source links for enterprise blocks in build_block_docs

get_source_link_for_block_class only handled inference/core/workflows/,
so enabling enterprise blocks produced pages with href="None" (the
split raised IndexError and the link fell back to None). Resolve the
path relative to the inference package root instead, so both core
(inference/core/workflows/...) and enterprise
(inference/enterprise/workflows/...) blocks get valid links. Core-block
links are unchanged.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

---------

Co-authored-by: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
Co-authored-by: Paweł Pęczek <146137186+PawelPeczek-Roboflow@users.noreply.github.com>
---
 .github/workflows/docs.yml           |  2 ++
 development/docs/build_block_docs.py | 14 +++++++++++---
 2 files changed, 13 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml
index c7fcdc74f7..44b7bb2e3e 100644
--- a/.github/workflows/docs.yml
+++ b/.github/workflows/docs.yml
@@ -93,6 +93,8 @@ jobs:
         working-directory: ./inference_repo
 
       - name: Build block docs
+        env:
+          LOAD_ENTERPRISE_BLOCKS: "TRUE"
         run: python -m development.docs.build_block_docs
         working-directory: ./inference_repo
 
diff --git a/development/docs/build_block_docs.py b/development/docs/build_block_docs.py
index 3b82732ad0..4b7ba4306e 100644
--- a/development/docs/build_block_docs.py
+++ b/development/docs/build_block_docs.py
@@ -500,9 +500,17 @@ def _escape_jinja2_dollar_expression(match: re.Match) -> str:
 
 def get_source_link_for_block_class(block_class: Type[WorkflowBlock]) -> str:
     try:
-        filename = inspect.getfile(block_class).split("inference/core/workflows/")[1]
-        return f"https://github.com/roboflow/inference/blob/main/inference/core/workflows/{filename}"
-    except Exception as e:
+        filepath = inspect.getfile(block_class)
+        # Resolve the path relative to the `inference` package root so that both
+        # core blocks (inference/core/workflows/...) and enterprise blocks
+        # (inference/enterprise/workflows/...) produce valid links.
+        marker = f"{os.sep}inference{os.sep}"
+        idx = filepath.rfind(marker)
+        if idx == -1:
+            return None
+        relative_path = filepath[idx + 1 :].replace(os.sep, "/")
+        return f"https://github.com/roboflow/inference/blob/main/{relative_path}"
+    except Exception:
         return None
 
 

From 0be06cf5d02733b266b70c6cffbb271c7e291c96 Mon Sep 17 00:00:00 2001
From: Patrick Nihranz <patrick.nihranz@roboflow.com>
Date: Fri, 5 Jun 2026 13:15:33 -0400
Subject: [PATCH 72/76] Expose NumberInRange operator in workflow builder UI
 (#2229)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* Expose NumberInRange operator in workflow builder UI

The (Number) in range operator was already implemented in the evaluation
engine and BinaryStatement union but was not included in the introspection
export, making it invisible to the workflow builder UI.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

* Implement NumberInRange operator in query language backend

Adds the NumberInRange BinaryOperator class and its evaluation lambda so
the operator exposed in the workflow builder UI has a working backend.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

---------

Co-authored-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
Co-authored-by: Paweł Pęczek <146137186+PawelPeczek-Roboflow@users.noreply.github.com>

From 815c4eb1bb9051dd30973e117241be0a5c17f0b0 Mon Sep 17 00:00:00 2001
From: Sachin Agarwal <sachin@roboflow.com>
Date: Fri, 5 Jun 2026 13:41:38 -0400
Subject: [PATCH 73/76] fix(cache): set server-side TTL on model-monitoring
 zset writes (#2390)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

RedisCache.zadd() accepted an `expire` arg but never issued a Redis
EXPIRE — it only recorded the intended expiry in the in-process
`self.zexpires` dict, reaped by the `_expire()` daemon thread. That
bookkeeping lives solely in the writing process' memory, so if the
process dies before a member is trimmed (e.g. an autoscaled/serverless
inference pod scaling down), the key is orphaned in Redis forever with
TTL -1.

The model-monitoring buffer in ModelManager writes one record per
inference into `inference:<server-id>:<model>` (and `error:<...>`,
`models`) via this path with `expire=METRICS_INTERVAL*2`. Because the
server id is unique per pod and pods churn constantly under autoscaling,
every pod death stranded its keys — observed as ~45k keys / 7GB of dead
zsets (e.g. a single `inference:<dead-id>:printed-doc/2` with 282k
members / 133MB, last written 46 days prior) filling a shared backing
store to its memory ceiling.

Issue ZADD + EXPIRE in one pipeline so the key carries a real sliding
server-side TTL: a live model keeps re-arming it (and `_expire()` still
trims individual members), while a dead pod's keys self-reclaim ~expire
seconds after the last write regardless of process lifecycle. The
in-process bookkeeping is retained as a best-effort fine-grained trim.

Co-authored-by: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
Co-authored-by: Paweł Pęczek <146137186+PawelPeczek-Roboflow@users.noreply.github.com>
---
 inference/core/cache/redis.py                 | 19 +++++-
 .../unit_tests/core/cache/test_redis_cache.py | 59 +++++++++++++++++++
 2 files changed, 77 insertions(+), 1 deletion(-)
 create mode 100644 tests/inference/unit_tests/core/cache/test_redis_cache.py

diff --git a/inference/core/cache/redis.py b/inference/core/cache/redis.py
index 53d4ea5e9f..d1f101be2f 100644
--- a/inference/core/cache/redis.py
+++ b/inference/core/cache/redis.py
@@ -115,9 +115,26 @@ def zadd(self, key: str, value: Any, score: float, expire: float = None):
         """
         # serializable_value = self.ensure_serializable(value)
         value = json.dumps(value)
-        self.client.zadd(key, {value: score})
         if expire:
+            # Set a server-side (sliding) TTL on the whole sorted set in the same
+            # round-trip as the ZADD. Without this the key only ever expired via the
+            # in-process ``self.zexpires`` bookkeeping reaped by ``_expire()`` below,
+            # which lives solely in this process' memory. If the process dies before
+            # those members are trimmed (e.g. a serverless/autoscaled pod scaling
+            # down), the key is orphaned in Redis forever with TTL -1 — an unbounded
+            # memory leak. A real EXPIRE lets Redis reclaim the key ``expire`` seconds
+            # after the last write regardless of process lifecycle. ``max(1, ...)``
+            # guards against EXPIRE 0 (immediate delete) for sub-second values.
+            with self.client.pipeline() as pipe:
+                pipe.zadd(key, {value: score})
+                pipe.expire(key, max(1, int(expire)))
+                pipe.execute()
+            # Keep per-member bookkeeping so ``_expire()`` can still trim individual
+            # expired members from an otherwise-live key (now a best-effort
+            # optimisation rather than the only safety net).
             self.zexpires[(key, score)] = expire + time.time()
+        else:
+            self.client.zadd(key, {value: score})
 
     def zrangebyscore(
         self,
diff --git a/tests/inference/unit_tests/core/cache/test_redis_cache.py b/tests/inference/unit_tests/core/cache/test_redis_cache.py
new file mode 100644
index 0000000000..054b5635a1
--- /dev/null
+++ b/tests/inference/unit_tests/core/cache/test_redis_cache.py
@@ -0,0 +1,59 @@
+from unittest.mock import MagicMock, patch
+
+from inference.core.cache.redis import RedisCache
+
+
+def _build_cache_with_mock_client():
+    """Construct a RedisCache without touching a real Redis or starting the
+    background ``_expire`` thread. Returns (cache, mock_client)."""
+    with patch("inference.core.cache.redis.redis.Redis") as mock_redis_cls, patch(
+        "inference.core.cache.redis.threading.Thread"
+    ):
+        mock_client = MagicMock()
+        mock_redis_cls.return_value = mock_client
+        cache = RedisCache()
+    return cache, mock_client
+
+
+def test_zadd_with_expire_sets_server_side_ttl():
+    # given
+    cache, mock_client = _build_cache_with_mock_client()
+    pipe = MagicMock()
+    mock_client.pipeline.return_value.__enter__.return_value = pipe
+
+    # when
+    cache.zadd("inference:srv-0:model/1", value={"foo": "bar"}, score=123.0, expire=120.0)
+
+    # then - ZADD and a real server-side EXPIRE are issued in one pipeline
+    pipe.zadd.assert_called_once()
+    assert pipe.zadd.call_args.args[0] == "inference:srv-0:model/1"
+    pipe.expire.assert_called_once_with("inference:srv-0:model/1", 120)
+    pipe.execute.assert_called_once()
+    # and the in-process bookkeeping is still recorded for fine-grained trimming
+    assert ("inference:srv-0:model/1", 123.0) in cache.zexpires
+
+
+def test_zadd_expire_is_floored_to_at_least_one_second():
+    # given - a sub-second expire must never become EXPIRE 0 (immediate delete)
+    cache, mock_client = _build_cache_with_mock_client()
+    pipe = MagicMock()
+    mock_client.pipeline.return_value.__enter__.return_value = pipe
+
+    # when
+    cache.zadd("k", value={"a": 1}, score=1.0, expire=0.4)
+
+    # then
+    pipe.expire.assert_called_once_with("k", 1)
+
+
+def test_zadd_without_expire_does_not_set_ttl_or_bookkeeping():
+    # given
+    cache, mock_client = _build_cache_with_mock_client()
+
+    # when
+    cache.zadd("k", value={"a": 1}, score=1.0)
+
+    # then - plain ZADD, no pipeline / EXPIRE, no zexpires entry
+    mock_client.zadd.assert_called_once()
+    mock_client.pipeline.assert_not_called()
+    assert cache.zexpires == {}

From 62463967e97a474976761b4501528dbddf12dcbb Mon Sep 17 00:00:00 2001
From: Damian Kosowski <damian@roboflow.com>
Date: Mon, 8 Jun 2026 18:24:58 +0200
Subject: [PATCH 74/76] Ignore rules

---
 .gitignore | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/.gitignore b/.gitignore
index 66c046a0b5..470a6181e0 100644
--- a/.gitignore
+++ b/.gitignore
@@ -217,4 +217,6 @@ inference_testing
 *.rrd
 
 openspec*/
-opsx/
\ No newline at end of file
+opsx/
+
+.cursor/
\ No newline at end of file

From 2a83816a6307d22170056b729e796f549bb51f26 Mon Sep 17 00:00:00 2001
From: Damian Kosowski <damian@roboflow.com>
Date: Mon, 8 Jun 2026 18:49:14 +0200
Subject: [PATCH 75/76] Add triton to jetson

---
 docker/dockerfiles/Dockerfile.onnx.jetson.6.0.0 | 7 ++++++-
 docker/dockerfiles/Dockerfile.onnx.jetson.6.2.0 | 5 +++++
 docker/dockerfiles/Dockerfile.onnx.jetson.7.1.0 | 7 ++++++-
 3 files changed, 17 insertions(+), 2 deletions(-)

diff --git a/docker/dockerfiles/Dockerfile.onnx.jetson.6.0.0 b/docker/dockerfiles/Dockerfile.onnx.jetson.6.0.0
index 4c96726c6e..97becf42e4 100644
--- a/docker/dockerfiles/Dockerfile.onnx.jetson.6.0.0
+++ b/docker/dockerfiles/Dockerfile.onnx.jetson.6.0.0
@@ -222,6 +222,7 @@ RUN cd /usr/local/lib/python3.10/dist-packages && \
 FROM nvcr.io/nvidia/l4t-cuda:12.6.11-runtime
 
 ARG DEBIAN_FRONTEND=noninteractive
+ARG TRITON_VERSION=3.6.0
 ENV LANG=en_US.UTF-8
 
 WORKDIR /app
@@ -259,7 +260,11 @@ RUN apt-get update -y && \
     ffmpeg \
     && rm -rf /var/lib/apt/lists/*
 
-# Copy GDAL (skip headers - not needed in runtime)
+RUN python3 -m pip install --no-cache-dir \
+    --extra-index-url https://pypi.jetson-ai-lab.io/jp6/cu126 \
+    "triton==${TRITON_VERSION}"
+
+    # Copy GDAL (skip headers - not needed in runtime)
 COPY --from=builder /usr/local/bin/gdal* /usr/local/bin/
 COPY --from=builder /usr/local/bin/ogr* /usr/local/bin/
 COPY --from=builder /usr/local/bin/gnm* /usr/local/bin/
diff --git a/docker/dockerfiles/Dockerfile.onnx.jetson.6.2.0 b/docker/dockerfiles/Dockerfile.onnx.jetson.6.2.0
index 1f2b9d3234..84f844e3b4 100644
--- a/docker/dockerfiles/Dockerfile.onnx.jetson.6.2.0
+++ b/docker/dockerfiles/Dockerfile.onnx.jetson.6.2.0
@@ -343,6 +343,7 @@ RUN cd /usr/local/lib/python3.10/dist-packages && \
 FROM nvcr.io/nvidia/l4t-cuda:12.6.11-runtime
 
 ARG DEBIAN_FRONTEND=noninteractive
+ARG TRITON_VERSION=3.6.0
 ENV LANG=en_US.UTF-8
 
 WORKDIR /app
@@ -391,6 +392,10 @@ RUN --mount=type=cache,target=/var/cache/apt,sharing=locked,id=apt-cache-jp62-ru
     gir1.2-gst-plugins-base-1.0 \
     libatlas3-base
 
+RUN python3 -m pip install --no-cache-dir \
+    --extra-index-url https://pypi.jetson-ai-lab.io/jp6/cu126 \
+    "triton==${TRITON_VERSION}"
+
 # Copy GDAL (skip headers - not needed in runtime)
 COPY --from=builder /usr/local/bin/gdal* /usr/local/bin/
 COPY --from=builder /usr/local/bin/ogr* /usr/local/bin/
diff --git a/docker/dockerfiles/Dockerfile.onnx.jetson.7.1.0 b/docker/dockerfiles/Dockerfile.onnx.jetson.7.1.0
index b0ae5b2cc4..e3c53e78ef 100644
--- a/docker/dockerfiles/Dockerfile.onnx.jetson.7.1.0
+++ b/docker/dockerfiles/Dockerfile.onnx.jetson.7.1.0
@@ -252,6 +252,7 @@ RUN cd /usr/local/lib/python3.12/dist-packages && \
 FROM ubuntu:24.04
 
 ARG DEBIAN_FRONTEND=noninteractive
+ARG TRITON_VERSION=3.6.0
 ENV LANG=en_US.UTF-8
 
 WORKDIR /app
@@ -313,7 +314,11 @@ RUN --mount=type=cache,target=/var/cache/apt,sharing=locked,id=apt-cache-jp71-ru
     libglib2.0-0 \
     libatomic1
 
-# Copy GDAL (skip headers - not needed in runtime)
+RUN python3 -m pip install --break-system-packages --no-cache-dir \
+    --extra-index-url https://pypi.jetson-ai-lab.io/sbsa/cu130 \
+    "triton==${TRITON_VERSION}"
+
+    # Copy GDAL (skip headers - not needed in runtime)
 COPY --from=builder /usr/local/bin/gdal* /usr/local/bin/
 COPY --from=builder /usr/local/bin/ogr* /usr/local/bin/
 COPY --from=builder /usr/local/bin/gnm* /usr/local/bin/

From a0612bdc786406195a613897c8a34ba70c489235 Mon Sep 17 00:00:00 2001
From: Aseem Saxena <aseem.bits@gmail.com>
Date: Mon, 8 Jun 2026 11:39:08 -0700
Subject: [PATCH 76/76] Delete
 development/stream_interface/rfdetr_seg_trt_1080_benchmark.md

---
 .../rfdetr_seg_trt_1080_benchmark.md          | 70 -------------------
 1 file changed, 70 deletions(-)
 delete mode 100644 development/stream_interface/rfdetr_seg_trt_1080_benchmark.md

diff --git a/development/stream_interface/rfdetr_seg_trt_1080_benchmark.md b/development/stream_interface/rfdetr_seg_trt_1080_benchmark.md
deleted file mode 100644
index 17dcac8efa..0000000000
--- a/development/stream_interface/rfdetr_seg_trt_1080_benchmark.md
+++ /dev/null
@@ -1,70 +0,0 @@
-# RF-DETR Seg TensorRT 1080p Variant Benchmark
-
-This note records the June 4, 2026 check for the largest RF-DETR segmentation
-variant that can run the `vehicles_1080p.mp4` stream workflow at 30 FPS on the
-Jetson Orin NX 8GB target used for PR 2405.
-
-## Context
-
-The public non-nano RF-DETR segmentation TensorRT packages are built for L4/T4,
-so they are not directly loadable on Jetson Orin. For this benchmark, local Orin
-FP16 TensorRT packages were compiled from the public ONNX packages and wired into
-the workflow as untracked local directories.
-
-The Triton sparse RLE postprocess path previously rejected non-nano mask sizes
-because it scanned the source mask with one Triton vector and capped source mask
-area below the `small` model's 96x96 mask. The current patch adds a tiled source
-mask bounds pass and raises the supported sparse path shape limit to RF-DETR Seg
-2XLarge's 192x192 mask with 300 queries and COCO class logits.
-
-## Benchmark Command
-
-Use the stream workflow with the optimization flags enabled:
-
-```bash
-env \
-  PYTHONPATH=/app/helloworld/inference/inference_models:/app/helloworld/inference \
-  USE_INFERENCE_MODELS=True \
-  ALLOW_INFERENCE_MODELS_UNTRUSTED_PACKAGES=True \
-  ALLOW_INFERENCE_MODELS_DIRECTLY_ACCESS_LOCAL_PACKAGES=True \
-  INFERENCE_MODELS_RFDETR_TRITON_POSTPROC_ENABLED=true \
-  INFERENCE_MODELS_RFDETR_TRITON_PREPROC_ENABLED=true \
-  RFDETR_PIPELINE_DEPTH=2 \
-  ENABLE_AUTO_CUDA_GRAPHS_FOR_TRT_BACKEND=true \
-  python development/stream_interface/rfdetr_nano_seg_trt_workflow.py \
-    --video_reference vehicles_1080p.mp4 \
-    --model_id rfdetr-seg-large/1 \
-    --backend trt
-```
-
-Change `--model_id` to the local package alias for each variant. A depth-3
-sanity run was also performed for `xlarge`.
-
-## Results
-
-| Variant | Input size | Pipeline depth | FPS |
-| --- | ---: | ---: | ---: |
-| `rfdetr-seg-small/1` | 384 | 2 | 63.85 |
-| `rfdetr-seg-large/1` | 504 | 2 | 35.49 |
-| `rfdetr-seg-xlarge/1` | 624 | 2 | 20.94 |
-| `rfdetr-seg-xlarge/1` | 624 | 3 | 20.91 |
-| `rfdetr-seg-2xlarge/1` | 768 | 2 | 12.90 |
-
-`large` is the largest tested non-nano RF-DETR Seg variant that clears 30 FPS on
-this 1080p workload with all optimization flags enabled. `xlarge` remains below
-30 FPS even when increasing pipeline depth from 2 to 3.
-
-## Verification
-
-The focused postprocess test suite passed after the 2XLarge shape-limit patch:
-
-```bash
-PYTHONPATH=/app/helloworld/inference/inference_models:/app/helloworld/inference \
-  python -m pytest tests/unit_tests/models/rfdetr/test_triton_postprocess.py
-```
-
-Result:
-
-```text
-24 passed, 23 warnings
-```