Add serialise_sv_detections to orjson_utils.py

roboflow · May 20, 2024 · f4967f5 · f4967f5
1 parent 28abb8f
commit f4967f5
Show file tree

Hide file tree

Showing 3 changed files with 236 additions and 10 deletions.
diff --git a/inference/core/interfaces/http/orjson_utils.py b/inference/core/interfaces/http/orjson_utils.py
@@ -1,12 +1,32 @@
 import base64
 from typing import Any, Dict, List, Optional, Union
 
-import orjson
 from fastapi.responses import ORJSONResponse
+import numpy as np
+import orjson
 from pydantic import BaseModel
+import supervision as sv
 
 from inference.core.entities.responses.inference import InferenceResponse
 from inference.core.utils.image_utils import ImageType, encode_image_to_jpeg_bytes
+from inference.core.workflows.constants import (
+    CLASS_ID_KEY,
+    CLASS_NAME_KEY,
+    CONFIDENCE_KEY,
+    DETECTION_ID_KEY,
+    HEIGHT_KEY,
+    KEYPOINTS_CLASS_ID_KEY,
+    KEYPOINTS_CLASS_NAME_KEY,
+    KEYPOINTS_CONFIDENCE_KEY,
+    KEYPOINTS_KEY,
+    KEYPOINTS_XY_KEY,
+    POLYGON_KEY,
+    TRACKER_ID_KEY,
+    WIDTH_KEY,
+    PARENT_ID_KEY,
+    X_KEY,
+    Y_KEY,
+)
 
 
 class ORJSONResponseBytes(ORJSONResponse):
@@ -31,9 +51,9 @@ def orjson_response(
     response: Union[List[InferenceResponse], InferenceResponse, BaseModel]
 ) -> ORJSONResponseBytes:
     if isinstance(response, list):
-        content = [r.dict(by_alias=True, exclude_none=True) for r in response]
+        content = [r.model_dump(by_alias=True, exclude_none=True) for r in response]
     else:
-        content = response.dict(by_alias=True, exclude_none=True)
+        content = response.model_dump(by_alias=True, exclude_none=True)
     return ORJSONResponseBytes(content=content)
 
 
@@ -50,10 +70,12 @@ def serialise_workflow_result(
             continue
         if contains_image(element=value):
             value = serialise_image(image=value)
-        elif issubclass(type(value), dict):
+        elif isinstance(value, dict):
             value = serialise_dict(elements=value)
-        elif issubclass(type(value), list):
+        elif isinstance(value, list):
             value = serialise_list(elements=value)
+        elif isinstance(value, sv.Detections):
+            value = serialise_sv_detections(detections=value)
         serialised_result[key] = value
     return serialised_result
 
@@ -63,10 +85,12 @@ def serialise_list(elements: List[Any]) -> List[Any]:
     for element in elements:
         if contains_image(element=element):
             element = serialise_image(image=element)
-        elif issubclass(type(element), dict):
+        elif isinstance(element, dict):
             element = serialise_dict(elements=element)
-        elif issubclass(type(element), list):
+        elif isinstance(element, list):
             element = serialise_list(elements=element)
+        elif isinstance(element, sv.Detections):
+            element = serialise_sv_detections(detections=element)
         result.append(element)
     return result
 
@@ -76,17 +100,19 @@ def serialise_dict(elements: Dict[str, Any]) -> Dict[str, Any]:
     for key, value in elements.items():
         if contains_image(element=value):
             value = serialise_image(image=value)
-        elif issubclass(type(value), dict):
+        elif isinstance(value, dict):
             value = serialise_dict(elements=value)
-        elif issubclass(type(value), list):
+        elif isinstance(value, list):
             value = serialise_list(elements=value)
+        elif isinstance(value, sv.Detections):
+            value = serialise_sv_detections(detections=value)
         serialised_result[key] = value
     return serialised_result
 
 
 def contains_image(element: Any) -> bool:
     return (
-        issubclass(type(element), dict)
+        isinstance(element, dict)
         and element.get("type") == ImageType.NUMPY_OBJECT.value
     )
 
@@ -97,3 +123,58 @@ def serialise_image(image: Dict[str, Any]) -> Dict[str, Any]:
         encode_image_to_jpeg_bytes(image["value"])
     ).decode("ascii")
     return image
+
+
+def serialise_sv_detections(detections: sv.Detections) -> List[Dict[str, Any]]:
+    serialized_detections = []
+    for xyxy, mask, confidence, class_id, tracker_id, data in detections:
+        detection_dict = {}
+
+        if isinstance(xyxy, np.ndarray):
+            xyxy = xyxy.astype(float).tolist()
+        x1, y1, x2, y2 = xyxy
+        detection_dict[WIDTH_KEY] = abs(x2 - x1)
+        detection_dict[HEIGHT_KEY] = abs(y2 - y1)
+        detection_dict[X_KEY] = x1 + detection_dict[WIDTH_KEY] / 2
+        detection_dict[Y_KEY] = y1 + detection_dict[HEIGHT_KEY] / 2
+
+        if confidence is not None:
+            detection_dict[CONFIDENCE_KEY] = float(confidence)
+        if class_id is not None:
+            detection_dict[CLASS_ID_KEY] = int(class_id)
+        if mask is not None:
+            polygon = sv.mask_to_polygons(mask=mask)
+            detection_dict[POLYGON_KEY] = []
+            for x, y in polygon[0]:
+                detection_dict[POLYGON_KEY].append({
+                    X_KEY: float(x),
+                    Y_KEY: float(y),
+                })
+        if tracker_id is not None:
+            detection_dict[TRACKER_ID_KEY] = int(tracker_id)
+        if "class_name" in data:
+            detection_dict[CLASS_NAME_KEY] = str(data["class_name"])
+        if DETECTION_ID_KEY in data:
+            detection_dict[DETECTION_ID_KEY] = str(data[DETECTION_ID_KEY])
+        if PARENT_ID_KEY in data:
+            detection_dict[PARENT_ID_KEY] = str(data[PARENT_ID_KEY])
+        if KEYPOINTS_CLASS_ID_KEY in data \
+                and KEYPOINTS_CLASS_NAME_KEY in data \
+                and KEYPOINTS_CONFIDENCE_KEY in data \
+                and KEYPOINTS_XY_KEY in data:
+            kp_class_id = data[KEYPOINTS_CLASS_ID_KEY]
+            kp_class_name = data[KEYPOINTS_CLASS_NAME_KEY]
+            kp_confidence = data[KEYPOINTS_CONFIDENCE_KEY]
+            kp_xy = data[KEYPOINTS_XY_KEY]
+            detection_dict[KEYPOINTS_KEY] = []
+            for keypoint_class_id, keypoint_class_name, keypoint_confidence, (x, y) \
+                    in zip(kp_class_id, kp_class_name, kp_confidence, kp_xy):
+                detection_dict[KEYPOINTS_KEY].append({
+                    KEYPOINTS_CLASS_ID_KEY: int(keypoint_class_id),
+                    KEYPOINTS_CLASS_NAME_KEY: str(keypoint_class_name),
+                    KEYPOINTS_CONFIDENCE_KEY: float(keypoint_confidence),
+                    X_KEY: float(x),
+                    Y_KEY: float(y),
+                })
+        serialized_detections.append(detection_dict)
+    return serialized_detections
diff --git a/inference/core/workflows/constants.py b/inference/core/workflows/constants.py
@@ -17,3 +17,10 @@
 HEIGHT_KEY = "height"
 DETECTION_ID_KEY = "detection_id"
 PARENT_COORDINATES_SUFFIX = "_parent_coordinates"
+X_KEY = "x"
+Y_KEY = "y"
+CONFIDENCE_KEY = "confidence"
+CLASS_ID_KEY = "class_id"
+CLASS_NAME_KEY = "class"
+POLYGON_KEY = "points"
+TRACKER_ID_KEY = "tracker_id"
diff --git a/tests/inference/unit_tests/core/interfaces/http/test_orjson_utils.py b/tests/inference/unit_tests/core/interfaces/http/test_orjson_utils.py
@@ -4,12 +4,14 @@
 import cv2
 import numpy as np
 import pytest
+import supervision as sv
 
 from inference.core.interfaces.http.orjson_utils import (
     contains_image,
     serialise_image,
     serialise_list,
     serialise_workflow_result,
+    serialise_sv_detections,
 )
 
 
@@ -168,3 +170,139 @@ def test_serialise_workflow_result() -> None:
     assert (
         result["sixth"][2][1]["type"] == "base64"
     ), "Second element of nested list to be serialised"
+
+
+def test_serialise_sv_detections() -> None:
+    # given
+    np_image = np.zeros((192, 168, 3), dtype=np.uint8)
+    detections = sv.Detections(
+        xyxy=np.array([[1, 1, 2, 2], [3, 3, 4, 4]], dtype=np.float64),
+        class_id=np.array([1, 2]),
+        confidence=np.array([0.1, 0.9], dtype=np.float64),
+        tracker_id=np.array([1, 2]),
+        mask=np.array([
+            sv.polygon_to_mask(np.array([[1, 1], [1, 10], [10, 10], [10, 1]]), resolution_wh=(15, 15)),
+            sv.polygon_to_mask(np.array([[1, 1], [1, 10], [10, 10], [10, 1]]), resolution_wh=(15, 15)),
+        ], dtype=bool),
+        data={
+            "class_name": np.array(["cat", "dog"]),
+            "detection_id": np.array(["first", "second"]),
+            "parent_id": np.array(["image", "image"]),
+            "keypoints_xy": np.array(
+                [
+                    np.array([[11, 11], [12, 13], [14, 15]], dtype=np.float64),
+                    np.array([[16, 16], [17, 17], [18, 18], [19, 19]], dtype=np.float64),
+                ], dtype="object"),
+            "keypoints_class_id": np.array(
+                [
+                    np.array([1, 2, 3]),
+                    np.array([1, 2, 3, 4]),
+                ], dtype="object"),
+            "keypoints_class_name": np.array(
+                [
+                    np.array(["nose", "ear", "eye"]),
+                    np.array(["nose", "ear", "eye", "tail"]),
+                ], dtype="object"),
+            "keypoints_confidence": np.array(
+                [
+                    np.array([0.1, 0.2, 0.3], dtype=np.float64),
+                    np.array([0.1, 0.2, 0.3, 0.4], dtype=np.float64),
+                ], dtype="object")
+        }
+    )
+
+    # when
+    result = serialise_sv_detections(detections=detections)
+
+    # then
+    assert result == [
+        {
+            'width': 1.0,
+            'height': 1.0,
+            'x': 1.5,
+            'y': 1.5,
+            'confidence': 0.1,
+            'class_id': 1,
+            'points': [
+                {'x': 1.0, 'y': 1.0},
+                {'x': 1.0, 'y': 10.0},
+                {'x': 10.0, 'y': 10.0},
+                {'x': 10.0, 'y': 1.0}
+            ],
+            'tracker_id': 1,
+            'class': 'cat',
+            'detection_id': 'first',
+            'parent_id': 'image',
+            'keypoints': [
+                {
+                    'keypoints_class_id': 1,
+                    'keypoints_class_name': 'nose',
+                    'keypoints_confidence': 0.1,
+                    'x': 11.0,
+                    'y': 11.0
+                },
+                {
+                    'keypoints_class_id': 2,
+                    'keypoints_class_name': 'ear',
+                    'keypoints_confidence': 0.2,
+                    'x': 12.0,
+                    'y': 13.0
+                },
+                {
+                    'keypoints_class_id': 3,
+                    'keypoints_class_name': 'eye',
+                    'keypoints_confidence': 0.3,
+                    'x': 14.0,
+                    'y': 15.0
+                }
+            ]
+        },
+        {
+            'width': 1.0,
+            'height': 1.0,
+            'x': 3.5,
+            'y': 3.5,
+            'confidence': 0.9,
+            'class_id': 2,
+            'points': [
+                {'x': 1.0, 'y': 1.0},
+                {'x': 1.0, 'y': 10.0},
+                {'x': 10.0, 'y': 10.0},
+                {'x': 10.0, 'y': 1.0}
+            ],
+            'tracker_id': 2,
+            'class': 'dog',
+            'detection_id': 'second',
+            'parent_id': 'image',
+            'keypoints': [
+                {
+                    'keypoints_class_id': 1,
+                    'keypoints_class_name': 'nose',
+                    'keypoints_confidence': 0.1,
+                    'x': 16.0,
+                    'y': 16.0
+                },
+                {
+                    'keypoints_class_id': 2,
+                    'keypoints_class_name': 'ear',
+                    'keypoints_confidence': 0.2,
+                    'x': 17.0,
+                    'y': 17.0
+                },
+                {
+                    'keypoints_class_id': 3,
+                    'keypoints_class_name': 'eye',
+                    'keypoints_confidence': 0.3,
+                    'x': 18.0,
+                    'y': 18.0
+                },
+                {
+                    'keypoints_class_id': 4,
+                    'keypoints_class_name': 'tail',
+                    'keypoints_confidence': 0.4,
+                    'x': 19.0,
+                    'y': 19.0
+                }
+            ]
+        }
+    ]