diff --git a/inference/core/entities/responses/inference.py b/inference/core/entities/responses/inference.py index 912dbd98f3..acc761eb8b 100644 --- a/inference/core/entities/responses/inference.py +++ b/inference/core/entities/responses/inference.py @@ -4,6 +4,12 @@ from pydantic import BaseModel, ConfigDict, Field, ValidationError, field_serializer +# Attr name the instance-segmentation fast path uses on response.__dict__ to +# hand a pre-built sv.Detections to the v3 workflow block without going through +# pydantic serialization. Pydantic v2 ignores extra __dict__ keys in +# model_dump/jsonable_encoder, so this never leaks into serialized output. +SV_DETECTIONS_FAST_ATTR = "_sv_detections_fast" + class ObjectDetectionPrediction(BaseModel): """Object Detection prediction. diff --git a/inference/core/models/inference_models_adapters.py b/inference/core/models/inference_models_adapters.py index 5620ff16c3..a7c9da4863 100644 --- a/inference/core/models/inference_models_adapters.py +++ b/inference/core/models/inference_models_adapters.py @@ -4,16 +4,20 @@ from time import perf_counter from typing import Any, List, Optional, Tuple, Union +import cv2 import numpy as np +import supervision as sv import torch from PIL import Image, ImageDraw, ImageFont from pycocotools import mask as mask_utils +from supervision.config import CLASS_NAME_DATA_FIELD from inference.core.entities.requests import ( ClassificationInferenceRequest, InferenceRequest, ) from inference.core.entities.responses.inference import ( + SV_DETECTIONS_FAST_ATTR, ClassificationInferenceResponse, InferenceResponse, InferenceResponseImage, @@ -313,6 +317,10 @@ def postprocess( predictions, preprocess_return_metadata, **mapped_kwargs ) + is_workflow = ( + kwargs.get("source") == "workflow-execution" and not return_in_rle + ) + responses: List[InstanceSegmentationInferenceResponse] = [] for preproc_metadata, det in zip(preprocess_return_metadata, detections_list): H = preproc_metadata.original_size.height @@ -320,6 +328,21 @@ def postprocess( xyxy = det.xyxy.detach().cpu().numpy() confs = det.confidence.detach().cpu().numpy() + class_ids = det.class_id.detach().cpu().numpy() + + if is_workflow and isinstance(det.mask, torch.Tensor): + masks_np = det.mask.detach().cpu().numpy() + response = self._build_workflow_fastpath_response( + xyxy=xyxy, + confs=confs, + class_ids=class_ids, + masks=masks_np, + width=W, + height=H, + ) + responses.append(response) + continue + if isinstance(det.mask, torch.Tensor): masks = det.mask.detach().cpu().numpy() if return_in_rle: @@ -333,7 +356,6 @@ def postprocess( polys_or_rles = det.mask.to_coco_rle_masks() else: polys_or_rles = rle_masks2poly(det.mask) - class_ids = det.class_id.detach().cpu().numpy() predictions: List[ Union[InstanceSegmentationPrediction, InstanceSegmentationRLEPrediction] @@ -399,6 +421,85 @@ def postprocess( ) return responses + def _build_workflow_fastpath_response( + self, + xyxy: np.ndarray, + confs: np.ndarray, + class_ids: np.ndarray, + masks: np.ndarray, + width: int, + height: int, + ) -> InstanceSegmentationInferenceResponse: + n = int(class_ids.shape[0]) if class_ids.ndim else 0 + class_names_map = self.class_names + n_classes = len(class_names_map) + + if n == 0: + sv_dets = sv.Detections.empty() + sv_dets.data = {CLASS_NAME_DATA_FIELD: np.empty(0, dtype=object)} + else: + # Reproduce the slow path's mask denoising: per mask, keep only + # the largest external contour (by vertex count) and refill it, + # which filters disconnected mask fragments AND fills interior + # holes (RETR_EXTERNAL ignores inner contours). Detections whose + # largest contour has fewer than 3 vertices are dropped, matching + # `filter_out_invalid_polygons` + the `>= 3` check in + # supervision's `process_roboflow_result`. + denoised = np.zeros_like(masks, dtype=np.uint8) + keep_mask = np.zeros(n, dtype=bool) + for i in range(n): + m = masks[i] + if m.dtype == np.bool_: + m = m.view(np.uint8) + elif m.dtype != np.uint8: + m = (m > 0).astype(np.uint8) + if not m.flags.c_contiguous: + m = np.ascontiguousarray(m) + contours = cv2.findContours( + m, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE + )[0] + if not contours: + continue + best = max(contours, key=len) + # Match supervision's `>= 3` threshold on polygon vertex count. + if len(best) < 3: + continue + cv2.fillPoly(denoised[i], [best.reshape(-1, 2)], color=1) + keep_mask[i] = True + + class_id_int = class_ids.astype(np.int64, copy=False) + class_name_arr = np.empty(n, dtype=object) + for i, cid in enumerate(class_id_int): + ci = int(cid) + class_name_arr[i] = ( + class_names_map[ci] if 0 <= ci < n_classes else str(ci) + ) + + if not keep_mask.all(): + xyxy = xyxy[keep_mask] + confs = confs[keep_mask] + class_id_int = class_id_int[keep_mask] + class_name_arr = class_name_arr[keep_mask] + denoised = denoised[keep_mask] + + mask_bool = denoised.astype(bool, copy=False) + sv_dets = sv.Detections( + xyxy=xyxy.astype(np.float32, copy=False), + confidence=confs.astype(np.float32, copy=False), + class_id=class_id_int, + mask=mask_bool if mask_bool.size else None, + data={CLASS_NAME_DATA_FIELD: class_name_arr}, + ) + + response = InstanceSegmentationInferenceResponse( + predictions=[], + image=InferenceResponseImage(width=width, height=height), + ) + # Pydantic v2 ignores extra __dict__ keys in model_dump and + # jsonable_encoder, so this never leaks into serialized output. + response.__dict__[SV_DETECTIONS_FAST_ATTR] = sv_dets + return response + def clear_cache(self, delete_from_disk: bool = True) -> None: """Clears any cache if necessary. TODO: Implement this to delete the cache from the experimental model. diff --git a/inference/core/workflows/core_steps/models/roboflow/instance_segmentation/v3.py b/inference/core/workflows/core_steps/models/roboflow/instance_segmentation/v3.py index 4e9e62eb16..3c1e6322ae 100644 --- a/inference/core/workflows/core_steps/models/roboflow/instance_segmentation/v3.py +++ b/inference/core/workflows/core_steps/models/roboflow/instance_segmentation/v3.py @@ -1,5 +1,8 @@ +import uuid from typing import List, Literal, Optional, Type, Union +import numpy as np +import supervision as sv from pydantic import ConfigDict, Field, PositiveInt, model_validator from inference.core.entities.requests.inference import ( @@ -12,6 +15,7 @@ WORKFLOWS_REMOTE_EXECUTION_MAX_STEP_BATCH_SIZE, WORKFLOWS_REMOTE_EXECUTION_MAX_STEP_CONCURRENT_REQUESTS, ) +from inference.core.entities.responses.inference import SV_DETECTIONS_FAST_ATTR from inference.core.managers.base import ModelManager from inference.core.workflows.core_steps.common.entities import StepExecutionMode from inference.core.workflows.core_steps.common.utils import ( @@ -20,7 +24,11 @@ convert_inference_detections_batch_to_sv_detections, filter_out_unwanted_classes_from_sv_detections_batch, ) -from inference.core.workflows.execution_engine.constants import INFERENCE_ID_KEY +from inference.core.workflows.execution_engine.constants import ( + DETECTION_ID_KEY, + IMAGE_DIMENSIONS_KEY, + INFERENCE_ID_KEY, +) from inference.core.workflows.execution_engine.entities.base import ( Batch, OutputDefinition, @@ -327,6 +335,16 @@ def run_locally( ) if not isinstance(predictions, list): predictions = [predictions] + sv_fast = [p.__dict__.get(SV_DETECTIONS_FAST_ATTR) for p in predictions] + if all(det is not None for det in sv_fast): + inference_ids = [p.inference_id for p in predictions] + return self._post_process_result_fast( + images=images, + sv_detections=sv_fast, + inference_ids=inference_ids, + class_filter=class_filter, + model_id=model_id, + ) predictions = [ e.model_dump(by_alias=True, exclude_none=True) for e in predictions ] @@ -422,3 +440,45 @@ def _post_process_result( } for inference_id, prediction in zip(inference_ids, predictions) ] + + def _post_process_result_fast( + self, + images: Batch[WorkflowImageData], + sv_detections: List[sv.Detections], + inference_ids: List[Optional[str]], + class_filter: Optional[List[str]], + model_id: str, + ) -> BlockResult: + augmented: List[sv.Detections] = [] + for image, detections, inference_id in zip( + images, sv_detections, inference_ids + ): + n = len(detections) + detections[DETECTION_ID_KEY] = np.array( + [str(uuid.uuid4()) for _ in range(n)] + ) + h, w = image.numpy_image.shape[:2] + detections[IMAGE_DIMENSIONS_KEY] = np.array([[h, w]] * n) + if inference_id is not None: + detections[INFERENCE_ID_KEY] = np.array([inference_id] * n) + augmented.append(detections) + augmented = attach_prediction_type_info_to_sv_detections_batch( + predictions=augmented, + prediction_type="instance-segmentation", + ) + augmented = filter_out_unwanted_classes_from_sv_detections_batch( + predictions=augmented, + classes_to_accept=class_filter, + ) + augmented = attach_parents_coordinates_to_batch_of_sv_detections( + images=images, + predictions=augmented, + ) + return [ + { + "inference_id": inference_id, + "predictions": prediction, + "model_id": model_id, + } + for inference_id, prediction in zip(inference_ids, augmented) + ]