pytorch · rebel-ysseo · May 8, 2025
diff --git a/docs/hardware_support/hardware_support.rst b/docs/hardware_support/hardware_support.rst
@@ -6,3 +6,4 @@
   linux_aarch64
   nvidia_mps
   Intel Extension for PyTorch <https://github.com/pytorch/serve/tree/master/examples/intel_extension_for_pytorch>
+  rbln_support
diff --git a/docs/hardware_support/rbln_support.md b/docs/hardware_support/rbln_support.md
@@ -0,0 +1,47 @@
+<font size="6" style="font-weight: bold;"> ⚠️ Notice: Limited Maintenance </font>
+
+This project is no longer actively maintained. While existing releases remain available, there are no planned updates, bug fixes, new features, or security patches. Users should be aware that vulnerabilities may not be addressed.
+
+# Rebellions Support
+
+RBLN (Rebellions) NPUs are fully compatible with TorchServe. Rebellions provides documentation and tutorials to help you easily get started using the RBLN NPU with TorchServe.
+
+- [Rebellions TorchServe supports document](https://docs.rbln.ai/software/model_serving/torchserve/torchserve.html)
+
+## Support Matrix
+For details on supported features and compatibility of the RBLN NPU, see the `Support Matrix` below:
+- [Support Matrix](https://docs.rbln.ai/supports/version_matrix.html)
+
+## Installation
+Please refer to the Installation Guide in the Rebellions documentation for instructions on installing the Driver and RBLN SDK.
+- [Installation Guide](https://docs.rbln.ai/getting_started/installation_guide.html)
+
+## TorchServe with RBLN NPUs
+
+To start properly, please refer to the Rebellions' TorchServe Tutorials.
+
+Tutorials are available for various models including `Image Classification (ResNet50)`, `Object Detection (YOLOv8)`, and `LLM (Llama3-8B)`.
+
+- [Tutorial - ResNet50](https://docs.rbln.ai/software/model_serving/torchserve/tutorial/resnet50.html)
+- [Tutorial - YOLOv8](https://docs.rbln.ai/software/model_serving/torchserve/tutorial/yolov8.html)
+- [Tutorial - Llama3-8B](https://docs.rbln.ai/software/model_serving/torchserve/tutorial/llama3-8B.html)
+
+## Docker
+
+Please refer to the `Docker Support` documentation.
+
+- [Docker Support](https://docs.rbln.ai/software/system_management/docker.html)
+
+## Multi Devices
+
+For monitoring NPU statistics and utilizing multiple RBLN NPUs, please refer to the `Device Management` documentation.
+
+- [Device Management](https://docs.rbln.ai/software/system_management/device_management.html)
+
+## Contact
+
+We are always interested in improving the utilization of the RBLN NPU and providing technical support.
+
+Please contact us through the following page:
+
+- [Contact Us](https://docs.rbln.ai/supports/contact_us.html)
diff --git a/frontend/server/src/main/java/org/pytorch/serve/device/AcceleratorVendor.java b/frontend/server/src/main/java/org/pytorch/serve/device/AcceleratorVendor.java
@@ -5,5 +5,6 @@ public enum AcceleratorVendor {
     NVIDIA,
     INTEL,
     APPLE,
+    RBLN,
     UNKNOWN
 }
diff --git a/frontend/server/src/main/java/org/pytorch/serve/device/SystemInfo.java b/frontend/server/src/main/java/org/pytorch/serve/device/SystemInfo.java
@@ -63,6 +63,8 @@ private IAcceleratorUtility createAcceleratorUtility() {
                 return new XpuUtil();
             case APPLE:
                 return new AppleUtil();
+            case RBLN:
+                return new RblnUtil();
             default:
                 return null;
         }
@@ -107,7 +109,9 @@ public static AcceleratorVendor detectVendorType() {
             return AcceleratorVendor.INTEL;
         } else if (isCommandAvailable("system_profiler")) {
             return AcceleratorVendor.APPLE;
-        } else {
+        } else if (isCommandAvailable("rbln-stat")) {
+            return AcceleratorVendor.RBLN;
+        }else {
             return AcceleratorVendor.UNKNOWN;
         }
     }

diff --git a/frontend/server/src/main/java/org/pytorch/serve/device/utils/RblnUtil.java b/frontend/server/src/main/java/org/pytorch/serve/device/utils/RblnUtil.java
@@ -0,0 +1,108 @@
+package org.pytorch.serve.device.utils;
+
+import com.google.gson.JsonArray;
+import com.google.gson.JsonElement;
+import com.google.gson.JsonObject;
+import com.google.gson.JsonParser;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.LinkedHashSet;
+import java.util.List;
+import org.pytorch.serve.device.Accelerator;
+import org.pytorch.serve.device.AcceleratorVendor;
+import org.pytorch.serve.device.interfaces.IAcceleratorUtility;
+import org.pytorch.serve.device.interfaces.IJsonSmiParser;
+
+public class RblnUtil implements IAcceleratorUtility, IJsonSmiParser {
+
+    @Override
+    public String getGpuEnvVariableName() {
+        return "RBLN_DEVICES";
+    }
+
+    @Override
+    public String[] getUtilizationSmiCommand() {
+        return new String[] {
+            "rbln-stat", "-j"
+        };
+    }
+
+    @Override
+    public ArrayList<Accelerator> getAvailableAccelerators(
+            LinkedHashSet<Integer> availableAcceleratorIds) {
+        String jsonOutput = IAcceleratorUtility.callSMI(getUtilizationSmiCommand());
+        JsonObject rootObject = JsonParser.parseString(jsonOutput).getAsJsonObject();
+        return jsonOutputToAccelerators(rootObject, availableAcceleratorIds);
+            }
+
+    @Override
+    public ArrayList<Accelerator> smiOutputToUpdatedAccelerators(
+            String smiOutput, LinkedHashSet<Integer> parsedGpuIds) {
+        JsonObject rootObject = JsonParser.parseString(smiOutput).getAsJsonObject();
+        return jsonOutputToAccelerators(rootObject, parsedGpuIds);
+            }
+
+    @Override
+    public Accelerator jsonObjectToAccelerator(JsonObject gpuObject) {
+        String model = gpuObject.get("name").getAsString();
+        if (!model.startsWith("RBLN")) {
+            return null;
+        }
+        int npuId = gpuObject.get("npu").getAsInt();
+        float npuUtil = gpuObject.get("util").getAsFloat();
+        long memoryTotal = gpuObject.getAsJsonObject("memory").get("total").getAsLong();
+        long memoryUsed = gpuObject.getAsJsonObject("memory").get("used").getAsLong();
+
+        Accelerator accelerator = new Accelerator(model, AcceleratorVendor.RBLN, npuId);
+
+        // Set additional information
+        accelerator.setUsagePercentage(npuUtil);
+        accelerator.setMemoryUtilizationPercentage((memoryUsed==0)?0f:(memoryUsed/(float)memoryTotal));
+        accelerator.setMemoryUtilizationMegabytes((int)(memoryUsed/1024/1024));
+
+        return accelerator;
+    }
+
+    @Override
+    public Integer extractAcceleratorId(JsonObject cardObject) {
+        Integer npuId = cardObject.get("npu").getAsInt();
+        return npuId;
+    }
+
+    @Override
+    public List<JsonObject> extractAccelerators(JsonElement rootObject) {
+        List<JsonObject> accelerators = new ArrayList<>();
+        JsonArray devicesArray =
+            rootObject
+            .getAsJsonObject()
+            .get("devices")
+            .getAsJsonArray();
+
+        for (JsonElement elem : devicesArray){
+            accelerators.add(elem.getAsJsonObject());
+        }
+
+        return accelerators;
+    }
+
+    public ArrayList<Accelerator> jsonOutputToAccelerators(
+            JsonObject rootObject, LinkedHashSet<Integer> parsedAcceleratorIds) {
+
+        ArrayList<Accelerator> accelerators = new ArrayList<>();
+        List<JsonObject> acceleratorObjects = extractAccelerators(rootObject);
+
+        int i=0;
+        for (JsonObject acceleratorObject : acceleratorObjects) {
+            Integer acceleratorId = extractAcceleratorId(acceleratorObject);
+            if (acceleratorId != null
+                    && (parsedAcceleratorIds.isEmpty()
+                        || parsedAcceleratorIds.contains(acceleratorId))) {
+                Accelerator accelerator = jsonObjectToAccelerator(acceleratorObject);
+                accelerators.add(accelerator);
+            }
+            i++;
+        }
+
+        return accelerators;
+    }
+}
diff --git a/test/pytest/test_data/rbln_compile/compile.py b/test/pytest/test_data/rbln_compile/compile.py
@@ -0,0 +1,13 @@
+import rebel
+import torch
+from torchvision.models import ResNet50_Weights, resnet50
+
+weights = ResNet50_Weights.DEFAULT
+model = resnet50(weights=weights)
+model.eval()
+
+compiled_model = rebel.compile_from_torch(
+    model,
+    [("input", [1, 3, 224, 224], torch.float32)],
+)
+compiled_model.save("resnet50.rbln")
diff --git a/test/pytest/test_data/rbln_compile/config.properties b/test/pytest/test_data/rbln_compile/config.properties
@@ -0,0 +1,10 @@
+default_workers_per_model:1
+
+models={\
+    "resnet50":{\
+        "1.0":{\
+            "marName": "resnet50.mar",\
+            "responseTimeout": 120\
+        }\
+    }\
+}
diff --git a/test/pytest/test_data/rbln_compile/rbln_handler.py b/test/pytest/test_data/rbln_compile/rbln_handler.py
@@ -0,0 +1,102 @@
+# resnet50_handler.py
+#
+
+import io
+import os
+
+import PIL.Image as Image
+import rebel  # RBLN Runtime
+import torch
+from torchvision.models import ResNet50_Weights
+
+from ts.torch_handler.base_handler import BaseHandler
+
+
+class Resnet50Handler(BaseHandler):
+    def __init__(self):
+        self._context = None
+        self.initialized = False
+        self.model = None
+        self.weights = None
+
+    def initialize(self, context):
+        """
+        Initialize model. This will be called during model loading time
+        :param context: Initial context contains model server system properties.
+        :return:
+        """
+        self._context = context
+        #  load the model, refer 'custom handler class' above for details
+        model_dir = context.system_properties.get("model_dir")
+        serialized_file = context.manifest["model"].get("serializedFile")
+        model_path = os.path.join(model_dir, serialized_file)
+        if not os.path.isfile(model_path):
+            raise RuntimeError(
+                f"[RBLN ERROR] File not found at the specified model_path({model_path})."
+            )
+
+        self.module = rebel.Runtime(model_path, tensor_type="pt")
+        self.weights = ResNet50_Weights.DEFAULT
+        self.initialized = True
+
+    def preprocess(self, data):
+        """
+        Transform raw input into model input data.
+        :param batch: list of raw requests, should match batch size
+        :return: list of preprocessed model input data
+        """
+        input_data = data[0].get("data")
+        if input_data is None:
+            input_data = data[0].get("body")
+        assert input_data is not None, print(
+            "[RBLN][ERROR] Data not found with client request."
+        )
+        if not isinstance(input_data, (bytes, bytearray)):
+            raise ValueError("[RBLN][ERROR] Preprocessed data is not binary data.")
+
+        try:
+            image = Image.open(io.BytesIO(input_data))
+        except Exception as e:
+            raise ValueError(f"[RBLN][ERROR]Invalid image data: {e}")
+        prep = self.weights.transforms()
+        batch = prep(image).unsqueeze(0)
+        preprocessed_data = batch.numpy()
+
+        return torch.from_numpy(preprocessed_data)
+
+    def inference(self, model_input):
+        """
+        Internal inference methods
+        :param model_input: transformed model input data
+        :return: list of inference output in NDArray
+        """
+
+        model_output = self.module.run(model_input)
+
+        return model_output
+
+    def postprocess(self, inference_output):
+        """
+        Return inference result.
+        :param inference_output: list of inference output
+        :return: list of predict results
+        """
+        score, class_id = torch.topk(inference_output, 1, dim=1)
+        category_name = self.weights.meta["categories"][class_id]
+        return category_name
+
+    def handle(self, data, context):
+        """
+        Invoke by TorchServe for prediction request.
+        Do pre-processing of data, prediction using model and postprocessing of prediciton output
+        :param data: Input data for prediction
+        :param context: Initial context contains model server system properties.
+        :return: prediction output
+        """
+        model_input = self.preprocess(data)
+        model_output = self.inference(model_input)
+        category_name = self.postprocess(model_output)
+
+        print("[RBLN][INFO] Top1 category: ", category_name)
+
+        return [{"result": category_name}]
-Original file line number
+Diff line change
@@ Expand Up / @@ -5,5 +5,6 @@ public enum AcceleratorVendor { @@
         NVIDIA,
         INTEL,
         APPLE,
+        RBLN,
         UNKNOWN
     }