luxonis · tersekmatija · May 6, 2021 · Sep 21, 2021 · Sep 21, 2021 · Sep 24, 2021
diff --git a/gen2-mediapipe-selfiesegmentation/conversion/selfie_segmentation_landscape.xml b/gen2-mediapipe-selfiesegmentation/conversion/selfie_segmentation_landscape.xml
diff --git a/gen2-mediapipe-selfiesegmentation/gen2-mediapipe-selfiesegmentation.zip b/gen2-mediapipe-selfiesegmentation/gen2-mediapipe-selfiesegmentation.zip
diff --git a/gen2-mediapipe-selfiesegmentation/main.py b/gen2-mediapipe-selfiesegmentation/main.py
@@ -0,0 +1,108 @@
+import cv2
+import depthai as dai
+import numpy as np
+import argparse
+import time
+
+cam_options = ['rgb', 'left', 'right']
+
+parser = argparse.ArgumentParser()
+parser.add_argument("-cam", "--cam_input", help="select camera input source for inference", default='rgb', choices=cam_options)
+parser.add_argument("-nn", "--nn_model", help="select model path for inference", default='models/selfie_segmentation_landscape_openvino_2021.4_6shave_RGB_interleaved.blob', type=str)
+
+args = parser.parse_args()
+
+cam_source = args.cam_input
+nn_path = args.nn_model
+
+NN_W, NN_H = 256, 144
+
+
+# Start defining a pipeline
+pipeline = dai.Pipeline()
+pipeline.setOpenVINOVersion(version = dai.OpenVINO.Version.VERSION_2021_4)
+
+# Define a neural network that will make predictions based on the source frames
+detection_nn = pipeline.createNeuralNetwork()
+detection_nn.setBlobPath(nn_path)
+
+detection_nn.setNumPoolFrames(4)
+detection_nn.input.setBlocking(False)
+detection_nn.setNumInferenceThreads(2)
+
+cam=None
+# Define a source - color camera
+cam = pipeline.createColorCamera()
+cam.setPreviewSize(NN_W,NN_H)
+cam.setInterleaved(True)
+cam.setColorOrder(dai.ColorCameraProperties.ColorOrder.RGB)
+cam.preview.link(detection_nn.input)
+cam.setFps(50)
+
+# Create outputs
+xout_rgb = pipeline.createXLinkOut()
+xout_rgb.setStreamName("nn_input")
+xout_rgb.input.setBlocking(False)
+
+detection_nn.passthrough.link(xout_rgb.input)
+
+xout_nn = pipeline.createXLinkOut()
+xout_nn.setStreamName("nn")
+xout_nn.input.setBlocking(False)
+
+detection_nn.out.link(xout_nn.input)
+
+# Pipeline defined, now the device is assigned and pipeline is started
+device = dai.Device(pipeline)
+device.startPipeline()
+
+# Output queues will be used to get the rgb frames and nn data from the outputs defined above
+q_nn_input = device.getOutputQueue(name="nn_input", maxSize=4, blocking=False)
+q_nn = device.getOutputQueue(name="nn", maxSize=4, blocking=False)
+
+start_time = time.time()
+counter = 0
+fps = 0
+layer_info_printed = False
+
+while True:
+    # instead of get (blocking) used tryGet (nonblocking) which will return the available data or None otherwise
+    in_nn_input = q_nn_input.get()
+    in_nn = q_nn.get()
+
+    frame = in_nn_input.getCvFrame()
+    lay1 = in_nn.getFirstLayerFp16()
+    pred = np.array(lay1, dtype=np.float16).reshape((NN_H, NN_W))
+    #pred = np.transpose(pred, (1,0))
+
+    condition = np.stack([pred > 0.15] * 3, axis = 2)
+    bg_image = np.zeros(frame.shape, dtype=np.uint8)
+    bg_image[:] = (255, 255, 255)
+    output_image = np.where(condition, frame, bg_image)
+    output_image = output_image.astype(np.uint8)
+
+    # transpose
+    #output_image = np.transpose(output_image, (2, 0, 1))
+    #frame = np.transpose(frame, (2, 0, 1))
+
+
+    color_black, color_white = (0, 0, 0), (255, 255, 255)
+    label_fps = "Fps: {:.2f}".format(fps)
+    (w1, h1), _ = cv2.getTextSize(label_fps, cv2.FONT_HERSHEY_TRIPLEX, 0.4, 1)
+    cv2.rectangle(output_image, (0, output_image.shape[0] - h1 - 6), (w1 + 2, output_image.shape[0]), color_white, -1)
+    cv2.putText(output_image, label_fps, (2, output_image.shape[0] - 4), cv2.FONT_HERSHEY_TRIPLEX,
+                0.4, color_black)
+
+
+    cv2.imshow("nn_input", frame)
+    cv2.imshow("result", output_image)
+
+    counter+=1
+    if (time.time() - start_time) > 1 :
+        fps = counter / (time.time() - start_time)
+        counter = 0
+        start_time = time.time()
+
+
+    if cv2.waitKey(1) == ord('q'):
+        break
diff --git a/gen2-mediapipe-selfiesegmentation/models/selfie_segmentation_landscape.blob b/gen2-mediapipe-selfiesegmentation/models/selfie_segmentation_landscape.blob
diff --git a/...e-selfiesegmentation/models/selfie_segmentation_landscape_openvino_2021.4_6shave (5).blob b/...e-selfiesegmentation/models/selfie_segmentation_landscape_openvino_2021.4_6shave (5).blob
diff --git a/...apipe-selfiesegmentation/models/selfie_segmentation_landscape_openvino_2021.4_6shave.blob b/...apipe-selfiesegmentation/models/selfie_segmentation_landscape_openvino_2021.4_6shave.blob
diff --git a/...entation/models/selfie_segmentation_landscape_openvino_2021.4_6shave_RGB_interleaved.blob b/...entation/models/selfie_segmentation_landscape_openvino_2021.4_6shave_RGB_interleaved.blob
diff --git a/gen2-selfie-segmentation/main.py b/gen2-selfie-segmentation/main.py
@@ -0,0 +1,124 @@
+#!/usr/bin/env python3
+
+from pathlib import Path
+import cv2
+import depthai as dai
+import numpy as np
+import argparse
+import time
+import sys
+
+cam_options = ['rgb', 'left', 'right']
+
+parser = argparse.ArgumentParser()
+parser.add_argument("-cam", "--cam_input", help="select camera input source for inference", default='rgb', choices=cam_options)
+parser.add_argument("-nn", "--nn_model", help="select model path for inference", default='models/model.blob', type=str)
+
+args = parser.parse_args()
+
+cam_source = args.cam_input
+nn_path = args.nn_model
+
+nn_shape = 256
+
+def decode_deeplabv3p(output):
+    class_colors = [[0,0,0],  [0,255,0]]
+    class_colors = np.asarray(class_colors, dtype=np.uint8)
+
+    # output = output_tensor.reshape(nn_shape,nn_shape)
+    output_colors = np.take(class_colors, output, axis=0)
+    return output_colors
+
+def show_deeplabv3p(output_colors, frame, weight=0.2):
+    return cv2.addWeighted(frame, 1, output_colors,weight,0)
+
+
+
+# Start defining a pipeline
+pipeline = dai.Pipeline()
+
+# pipeline.setOpenVINOVersion(version = dai.OpenVINO.Version.VERSION_2021_2)
+
+# Define a neural network that will make predictions based on the source frames
+detection_nn = pipeline.createNeuralNetwork()
+detection_nn.setBlobPath(nn_path)
+
+detection_nn.setNumPoolFrames(4)
+detection_nn.input.setBlocking(False)
+detection_nn.setNumInferenceThreads(2)
+
+cam=None
+# Define a source - color camera
+cam = pipeline.createColorCamera()
+cam.setPreviewSize(nn_shape,nn_shape)
+cam.setFp16(True)
+cam.setInterleaved(False)
+cam.preview.link(detection_nn.input)
+cam.setFps(50)
+
+# Create outputs
+xout_rgb = pipeline.createXLinkOut()
+xout_rgb.setStreamName("nn_input")
+xout_rgb.input.setBlocking(False)
+
+detection_nn.passthrough.link(xout_rgb.input)
+
+xout_nn = pipeline.createXLinkOut()
+xout_nn.setStreamName("nn")
+xout_nn.input.setBlocking(False)
+
+detection_nn.out.link(xout_nn.input)
+
+# Pipeline defined, now the device is assigned and pipeline is started
+device = dai.Device(pipeline)
+device.startPipeline()
+
+# Output queues will be used to get the rgb frames and nn data from the outputs defined above
+q_nn_input = device.getOutputQueue(name="nn_input", maxSize=4, blocking=False)
+q_nn = device.getOutputQueue(name="nn", maxSize=4, blocking=False)
+
+start_time = time.time()
+counter = 0
+fps = 0
+layer_info_printed = False
+
+while True:
+    # instead of get (blocking) used tryGet (nonblocking) which will return the available data or None otherwise
+    in_nn_input = q_nn_input.get()
+    in_nn = q_nn.get()
+
+    if in_nn_input is not None:
+        # if the data from the rgb camera is available, transform the 1D data into a HxWxC frame
+        shape = (3, in_nn_input.getHeight(), in_nn_input.getWidth())
+        data = in_nn_input.getData()
+        # TODO: FIx this mess
+        frame = np.array(data).astype(np.uint8).view(np.float16).reshape(shape).transpose(1, 2, 0).astype(np.uint8)
+        cv2.imshow("rgb", frame)
+        # frame = np.ascontiguousarray(frame)
+
+    if in_nn is not None:
+        # print("NN received")
+        layer1 = in_nn.getFirstLayerFp16()
+            # reshape to numpy array
+        lay1 = np.asarray(layer1, dtype=np.float16).reshape((nn_shape, nn_shape))
+
+        # print(lay1)
+
+        newMatrix = np.array(lay1, dtype=np.int32)
+        # print(newMatrix)
+        output_colors = decode_deeplabv3p(newMatrix)
+        if frame is not None:
+            cv2.imshow("selfie", show_deeplabv3p(output_colors, frame, 1.0))
+            frame = show_deeplabv3p(output_colors, frame)
+            cv2.putText(frame, "NN fps: {:.2f}".format(fps), (20,20), cv2.FONT_HERSHEY_TRIPLEX, 0.4, (255, 0, 0))
+            cv2.imshow("nn_input", frame)
+
+    counter+=1
+    if (time.time() - start_time) > 1 :
+        fps = counter / (time.time() - start_time)
+        counter = 0
+        start_time = time.time()
+
+
+    if cv2.waitKey(1) == ord('q'):
+        break
diff --git a/gen2-selfie-segmentation/models/selfie-model.blob b/gen2-selfie-segmentation/models/selfie-model.blob
diff --git a/gen2-selfie-segmentation/requirements.txt b/gen2-selfie-segmentation/requirements.txt
@@ -0,0 +1,2 @@
+opencv-python
+depthai==2.0.0.1