aseembits93 · aseembits93 · May 22, 2026 · May 22, 2026 · May 22, 2026 · May 23, 2026
@@ -45,7 +45,7 @@ jobs:
         run: |
           python -m pip install --upgrade pip
           pip install --upgrade setuptools
-          pip install -r requirements/requirements.cli.txt -r requirements/requirements.test.unit.txt -r requirements/requirements.sdk.http.txt
+          pip install -r requirements/requirements.cli.txt -r requirements/_requirements.txt -r requirements/requirements.cpu.txt -r requirements/requirements.test.unit.txt -r requirements/requirements.sdk.http.txt
       - name: 🧪 Unit Tests of Inference CLI
         timeout-minutes: 30
         run: python -m pytest tests/inference_cli/unit_tests
@@ -28,6 +28,39 @@ RUN python -m pip install \
     "torchvision==0.19.1" \
     "transformers<=5.7.0"
 
+# Torch 2.4.1 on JP5.1 pairs with Triton 3.0.0.
+RUN set -eux; \
+    apt-get update -y; \
+    apt-get install -y --no-install-recommends \
+        build-essential \
+        cmake \
+        git \
+        libxml2-dev \
+        ninja-build \
+        python3-dev \
+        zlib1g-dev; \
+    rm -rf /var/lib/apt/lists/*; \
+    rm -rf /tmp/triton-build /tmp/triton-wheels; \
+    mkdir -p /tmp/triton-wheels; \
+    git clone --depth 1 --branch v3.0.0 https://github.com/triton-lang/triton.git /tmp/triton-build/src; \
+    git -C /tmp/triton-build/src submodule update --init --recursive; \
+    python -m pip install --no-cache-dir \
+        "setuptools>=40.8.0" \
+        wheel \
+        "cmake>=3.18,<4.0" \
+        "ninja>=1.11.1"; \
+    MAX_JOBS=4 python -m pip wheel --no-cache-dir \
+        --no-build-isolation \
+        --no-deps \
+        /tmp/triton-build/src/python \
+        --wheel-dir /tmp/triton-wheels; \
+    set -- /tmp/triton-wheels/triton-3.0.0-*.whl; \
+    if [ ! -e "$1" ]; then \
+        echo "Expected source-built Triton wheel for 3.0.0 was not found" >&2; \
+        exit 1; \
+    fi; \
+    python -m pip install --no-cache-dir "$1"; \
+    rm -rf /var/lib/apt/lists/* /tmp/triton-build /tmp/triton-wheels
 
 WORKDIR /app/
 COPY inference inference

@@ -259,6 +259,41 @@ RUN apt-get update -y && \
     ffmpeg \
     && rm -rf /var/lib/apt/lists/*
 
+# Torch 2.6.0 on JP6.0 pairs with Triton 3.2.0.
+RUN set -eux; \
+    apt-get update -y; \
+    apt-get install -y --no-install-recommends \
+        build-essential \
+        cmake \
+        git \
+        libxml2-dev \
+        ninja-build \
+        python3-dev \
+        zlib1g-dev; \
+    rm -rf /var/lib/apt/lists/*; \
+    rm -rf /tmp/triton-build /tmp/triton-wheels; \
+    mkdir -p /tmp/triton-wheels; \
+    git clone --depth 1 --branch v3.2.0 https://github.com/triton-lang/triton.git /tmp/triton-build/src; \
+    git -C /tmp/triton-build/src submodule update --init --recursive; \
+    python3 -m pip install --no-cache-dir \
+        "setuptools>=40.8.0" \
+        wheel \
+        "cmake>=3.18,<4.0" \
+        "ninja>=1.11.1" \
+        "pybind11>=2.13.1"; \
+    MAX_JOBS=4 python3 -m pip wheel --no-cache-dir \
+        --no-build-isolation \
+        --no-deps \
+        /tmp/triton-build/src/python \
+        --wheel-dir /tmp/triton-wheels; \
+    set -- /tmp/triton-wheels/triton-3.2.0-*.whl; \
+    if [ ! -e "$1" ]; then \
+        echo "Expected source-built Triton wheel for 3.2.0 was not found" >&2; \
+        exit 1; \
+    fi; \
+    python3 -m pip install --no-cache-dir "$1"; \
+    rm -rf /var/lib/apt/lists/* /tmp/triton-build /tmp/triton-wheels
+
 # Copy GDAL (skip headers - not needed in runtime)
 COPY --from=builder /usr/local/bin/gdal* /usr/local/bin/
 COPY --from=builder /usr/local/bin/ogr* /usr/local/bin/

@@ -391,6 +391,41 @@ RUN --mount=type=cache,target=/var/cache/apt,sharing=locked,id=apt-cache-jp62-ru
     gir1.2-gst-plugins-base-1.0 \
     libatlas3-base
 
+# Torch 2.6.0 on JP6.2 pairs with Triton 3.2.0.
+RUN set -eux; \
+    apt-get update -y; \
+    apt-get install -y --no-install-recommends \
+        build-essential \
+        cmake \
+        git \
+        libxml2-dev \
+        ninja-build \
+        python3-dev \
+        zlib1g-dev; \
+    rm -rf /var/lib/apt/lists/*; \
+    rm -rf /tmp/triton-build /tmp/triton-wheels; \
+    mkdir -p /tmp/triton-wheels; \
+    git clone --depth 1 --branch v3.2.0 https://github.com/triton-lang/triton.git /tmp/triton-build/src; \
+    git -C /tmp/triton-build/src submodule update --init --recursive; \
+    python3 -m pip install --no-cache-dir \
+        "setuptools>=40.8.0" \
+        wheel \
+        "cmake>=3.18,<4.0" \
+        "ninja>=1.11.1" \
+        "pybind11>=2.13.1"; \
+    MAX_JOBS=4 python3 -m pip wheel --no-cache-dir \
+        --no-build-isolation \
+        --no-deps \
+        /tmp/triton-build/src/python \
+        --wheel-dir /tmp/triton-wheels; \
+    set -- /tmp/triton-wheels/triton-3.2.0-*.whl; \
+    if [ ! -e "$1" ]; then \
+        echo "Expected source-built Triton wheel for 3.2.0 was not found" >&2; \
+        exit 1; \
+    fi; \
+    python3 -m pip install --no-cache-dir "$1"; \
+    rm -rf /var/lib/apt/lists/* /tmp/triton-build /tmp/triton-wheels
+
 # Copy GDAL (skip headers - not needed in runtime)
 COPY --from=builder /usr/local/bin/gdal* /usr/local/bin/
 COPY --from=builder /usr/local/bin/ogr* /usr/local/bin/

@@ -313,6 +313,36 @@ RUN --mount=type=cache,target=/var/cache/apt,sharing=locked,id=apt-cache-jp71-ru
     libglib2.0-0 \
     libatomic1
 
+# Torch 2.10.0 on JP7.1 pairs with Triton 3.6.0.
+RUN set -eux; \
+    apt-get update -y; \
+    apt-get install -y --no-install-recommends \
+        build-essential \
+        cmake \
+        git \
+        libxml2-dev \
+        ninja-build \
+        python3-dev \
+        zlib1g-dev; \
+    rm -rf /tmp/triton-build /tmp/triton-wheels; \
+    mkdir -p /tmp/triton-wheels; \
+    git clone --depth 1 --branch v3.6.0 https://github.com/triton-lang/triton.git /tmp/triton-build/src; \
+    git -C /tmp/triton-build/src submodule update --init --recursive; \
+    python3 -m pip install --break-system-packages --no-cache-dir \
+        -r /tmp/triton-build/src/python/requirements.txt; \
+    MAX_JOBS=4 python3 -m pip wheel --no-cache-dir \
+        --no-build-isolation \
+        --no-deps \
+        /tmp/triton-build/src \
+        --wheel-dir /tmp/triton-wheels; \
+    set -- /tmp/triton-wheels/triton-3.6.0*.whl; \
+    if [ ! -e "$1" ]; then \
+        echo "Expected source-built Triton wheel for 3.6.0 was not found" >&2; \
+        exit 1; \
+    fi; \
+    python3 -m pip install --break-system-packages --no-cache-dir "$1"; \
+    rm -rf /var/lib/apt/lists/* /tmp/triton-build /tmp/triton-wheels
+
 # Copy GDAL (skip headers - not needed in runtime)
 COPY --from=builder /usr/local/bin/gdal* /usr/local/bin/
 COPY --from=builder /usr/local/bin/ogr* /usr/local/bin/

@@ -3,8 +3,9 @@
 [Segment Anything 3 (SAM 3)](https://ai.meta.com/sam3) is a unified foundation model for promptable segmentation in images and videos. It builds upon SAM 2 by introducing the ability to exhaustively segment all instances of an open-vocabulary concept specified by a short text phrase or exemplars.
 
 SAM 3 can detect, segment, and track objects using:
-- **Text prompts** (e.g., "a person", "red car")
-- **Visual prompts** (boxes, points)
+- **Text prompts** (e.g., "a person", "red car") — segment every instance of a concept
+- **Exemplar box prompts** — box one example object and segment every similar instance, optionally combined with text and with negative exemplars to exclude lookalikes
+- **Interactive visual prompts** (points, boxes) — segment one specific object, SAM 2 style
 
 ## How to Use SAM 3 with Inference
 
@@ -43,10 +44,29 @@ model = SegmentAnything3(model_id="sam3/sam3_final")
 image_path = "path/to/your/image.jpg"
 
 # Define prompts
-# SAM 3 supports both text and visual prompts
+# SAM 3 supports text prompts, exemplar box prompts, and combinations of both
 prompts = [
+    # Text prompt: segment every instance of a concept
     Sam3Prompt(type="text", text="person"),
-    Sam3Prompt(type="text", text="car")
+    # Exemplar prompt: box one example object (absolute pixels, top-left
+    # anchored XYWH) and segment every similar instance in the image.
+    # box_labels is required: 1 = positive exemplar, 0 = negative exemplar.
+    Sam3Prompt(
+        type="visual",
+        boxes=[Sam3Prompt.Box(x=1409, y=705, width=112, height=183)],
+        box_labels=[1],
+    ),
+    # Combined prompt: text narrowed by exemplars. Here the negative
+    # exemplar suppresses instances similar to the second box.
+    Sam3Prompt(
+        type="visual",
+        text="car",
+        boxes=[
+            Sam3Prompt.BoxXYXY(x0=100, y0=200, x1=300, y1=400),
+            Sam3Prompt.BoxXYXY(x0=500, y0=200, x1=700, y1=400),
+        ],
+        box_labels=[1, 0],
+    ),
 ]
 
 # Run inference
@@ -106,9 +126,9 @@ SAM 3 exposes two main modes via API:
 docker run -it --rm -p 9001:9001 --gpus=all roboflow/inference-server:latest
 ```
 
-#### 2. Concept Segmentation (Text Prompts)
+#### 2. Concept Segmentation (Text and Exemplar Prompts)
 
-This is the most common usage for SAM 3, allowing you to segment objects by text description.
+This is the most common usage for SAM 3, allowing you to segment all instances of a concept. Concepts can be described by text:
 
 ```bash
 curl -X POST 'http://localhost:9001/sam3/concept_segment?api_key=<YOUR_API_KEY>' \
@@ -125,6 +145,30 @@ curl -X POST 'http://localhost:9001/sam3/concept_segment?api_key=<YOUR_API_KEY>'
   }'
 ```
 
+Concepts can also be described by exemplar boxes — box one example object and SAM 3 segments every similar instance. Boxes use absolute pixel coordinates, either top-left anchored XYWH (`{"x", "y", "width", "height"}`) or corner form (`{"x0", "y0", "x1", "y1"}`). `box_labels` is required alongside `boxes`: `1` marks a positive exemplar, `0` a negative exemplar to exclude lookalikes. Text and exemplars can be combined in one prompt:
+
+```bash
+curl -X POST 'http://localhost:9001/sam3/concept_segment?api_key=<YOUR_API_KEY>' \
+  -H 'Content-Type: application/json' \
+  -d '{
+    "image": {
+      "type": "url",
+      "value": "https://media.roboflow.com/inference/sample.jpg"
+    },
+    "prompts": [
+        {
+          "type": "visual",
+          "text": "dog",
+          "boxes": [
+            { "x": 100, "y": 200, "width": 150, "height": 120 },
+            { "x0": 400, "y0": 200, "x1": 550, "y1": 320 }
+          ],
+          "box_labels": [1, 0]
+        }
+    ]
+  }'
+```
+
 #### 3. Visual Segmentation (Points/Boxes)
 
 For interactive segmentation similar to SAM 2, you can use the visual segmentation endpoints.
@@ -143,19 +187,27 @@ curl -X POST 'http://localhost:9001/sam3/embed_image?api_key=<YOUR_API_KEY>' \
 # Returns an "image_id"
 ```
 
-**Step 2: Segment with Points**
+**Step 2: Segment with Points and/or a Box**
 
 ```bash
 curl -X POST 'http://localhost:9001/sam3/visual_segment?api_key=<YOUR_API_KEY>' \
   -H 'Content-Type: application/json' \
   -d '{
     "image_id": "<IMAGE_ID_FROM_STEP_1>",
     "prompts": [
-      { "points": [ { "x": 100, "y": 100, "positive": true } ] }
-    ]
+      {
+        "points": [ { "x": 100, "y": 100, "positive": true } ],
+        "box": { "x": 100, "y": 100, "width": 200, "height": 150 }
+      }
+    ],
+    "multimask_output": false
   }'
 ```
 
+A prompt can contain `points`, a `box`, or both. Positive points include the clicked region; negative points (`"positive": false`) exclude it — add points to iteratively refine the mask. Note that the PVS `box` is **center-anchored** XYWH (`x`, `y` is the box center), unlike concept segmentation boxes which are top-left anchored.
+
+The response contains the single highest-confidence mask for the prompt: `multimask_output` controls how many internal mask proposals the model generates (three when `true`), but the best proposal is always selected for the response. Send one prompt per request — multiple prompts in one request currently return only one prediction.
+
 ## Workflow Integration
 
 SAM 3 is fully integrated into [Inference Workflows](https://inference.roboflow.com/workflows/core_steps/). You can use the **SAM 3** block to add zero-shot instance segmentation to your pipeline.
@@ -253,7 +305,41 @@ cv.waitKey(0)
 cv.destroyAllWindows()
 ```
 
-### 2. SAM3 raw API
+### 2. SAM3 via the Inference SDK
+
+The `inference-sdk` client wraps both SAM3 endpoints. Prompt dicts take the same shape as the HTTP payloads, so text, exemplar, and combined prompts all work:
+
+```python
+from inference_sdk import InferenceHTTPClient
+
+client = InferenceHTTPClient(
+    api_url="https://serverless.roboflow.com",
+    api_key="<YOUR_ROBOFLOW_API_KEY>",
+)
+
+# Concept segmentation: text, exemplar boxes, or both per prompt
+result = client.sam3_concept_segment(
+    inference_input="https://media.roboflow.com/inference/people-walking.jpg",
+    prompts=[
+        {"type": "text", "text": "person"},
+        {
+            "type": "visual",
+            "boxes": [{"x": 1409, "y": 705, "width": 112, "height": 183}],
+            "box_labels": [1],
+        },
+    ],
+    output_prob_thresh=0.5,
+)
+
+# Interactive visual segmentation: points and/or a center-anchored box
+result = client.sam3_visual_segment(
+    inference_input="https://media.roboflow.com/inference/people-walking.jpg",
+    prompts=[{"points": [{"x": 1465, "y": 796, "positive": True}]}],
+    multimask_output=False,
+)
+```
+
+### 3. SAM3 raw API
 
 For direct API access to SAM3 without workflows, you can use Roboflow's serverless endpoint.
 This approach gives you raw segmentation results that you can process however you need.

@@ -16,9 +16,14 @@ class Sam3Prompt(BaseModel):
     """
 
     type: Optional[str] = Field(
-        default=None, description="Optional hint: 'text' or 'visual'"
+        default=None,
+        description="Optional hint: 'text' or 'visual'. 'visual' requires at least one box.",
+    )
+    text: Optional[str] = Field(
+        default=None,
+        description="Concept to segment as a short noun phrase (e.g. 'person'). "
+        "All matching instances are returned. Can be combined with exemplar boxes in the same prompt.",
     )
-    text: Optional[str] = Field(default=None)
 
     output_prob_thresh: Optional[float] = Field(
         default=None,
@@ -43,10 +48,18 @@ class BoxXYXY(BaseModel):
     # Single unified boxes field; each entry can be XYWH or XYXY
     boxes: Optional[List[Union[Box, BoxXYXY]]] = Field(
         default=None,
-        description="Absolute pixel boxes as either XYWH or XYXY entries",
+        description="Exemplar boxes in absolute pixels, as XYWH entries "
+        "({x, y, width, height}, top-left anchored) or XYXY entries ({x0, y0, x1, y1}). "
+        "Each box marks an example object; the model segments every instance matching "
+        "the exemplars (and text, if provided), not just the boxed objects. "
+        "Requires box_labels.",
     )
     box_labels: Optional[List[Union[int, bool]]] = Field(
-        default=None, description="List of 0/1 or booleans for boxes"
+        default=None,
+        description="Per-box exemplar labels, one per entry in boxes: "
+        "1/true marks a positive exemplar (segment objects like this), "
+        "0/false marks a negative exemplar (exclude objects like this). "
+        "Required when boxes is set.",
     )
 
     @validator("boxes", always=True)

@@ -1,4 +1,4 @@
-__version__ = "1.3.0"
+__version__ = "1.3.1"
 
 
 if __name__ == "__main__":