From ea2f1b0346f7d557cf8d4408b4f84f4cb4920203 Mon Sep 17 00:00:00 2001 From: richard-xx <99704599+richard-xx@users.noreply.github.com> Date: Thu, 7 Dec 2023 17:33:31 +0800 Subject: [PATCH] Update yolo example for stereo/no-stereo - Humble (#447) --- depthai_examples/CMakeLists.txt | 3 + .../launch/yolov4_publisher.launch.py | 54 ++++-- depthai_examples/src/yolov4_publisher.cpp | 141 +++++++++++++++ .../src/yolov4_spatial_publisher.cpp | 160 +++++++----------- 4 files changed, 246 insertions(+), 112 deletions(-) create mode 100644 depthai_examples/src/yolov4_publisher.cpp diff --git a/depthai_examples/CMakeLists.txt b/depthai_examples/CMakeLists.txt index 61273f8b..fd2e230f 100644 --- a/depthai_examples/CMakeLists.txt +++ b/depthai_examples/CMakeLists.txt @@ -101,9 +101,11 @@ dai_add_node_ros2(stereo_inertial_node src/stereo_inertial_publisher.cpp) dai_add_node_ros2(feature_tracker src/feature_tracker_publisher.cpp) dai_add_node_ros2(stereo_node src/stereo_publisher.cpp) dai_add_node_ros2(yolov4_spatial_node src/yolov4_spatial_publisher.cpp) +dai_add_node_ros2(yolov4_node src/yolov4_publisher.cpp) target_compile_definitions(mobilenet_node PRIVATE BLOB_NAME="${mobilenet_blob_name}") target_compile_definitions(yolov4_spatial_node PRIVATE BLOB_NAME="${tiny_yolo_v4_blob_name}") +target_compile_definitions(yolov4_node PRIVATE BLOB_NAME="${tiny_yolo_v4_blob_name}") target_compile_definitions(stereo_inertial_node PRIVATE BLOB_NAME="${tiny_yolo_v4_blob_name}") if($ENV{ROS_DISTRO} STREQUAL "galactic") @@ -124,6 +126,7 @@ install(TARGETS stereo_inertial_node stereo_node yolov4_spatial_node + yolov4_node feature_tracker DESTINATION lib/${PROJECT_NAME}) diff --git a/depthai_examples/launch/yolov4_publisher.launch.py b/depthai_examples/launch/yolov4_publisher.launch.py index 683379ba..79a3e773 100644 --- a/depthai_examples/launch/yolov4_publisher.launch.py +++ b/depthai_examples/launch/yolov4_publisher.launch.py @@ -4,6 +4,7 @@ from launch import LaunchDescription, launch_description_sources from launch.actions import IncludeLaunchDescription from launch.actions import DeclareLaunchArgument +from launch.conditions import IfCondition,UnlessCondition from launch.substitutions import LaunchConfiguration import launch_ros.actions import launch_ros.descriptions @@ -131,7 +132,14 @@ def generate_launch_description(): 'monoResolution', default_value=monoResolution, description='Contains the resolution of the Mono Cameras. Available resolutions are 800p, 720p & 400p for OAK-D & 480p for OAK-D-Lite.') - + + # declare_spatial_camera_cmd = DeclareLaunchArgument( + # 'spatial_camera', + # default_value="true", + # description='Enable spatial camera' + # ) + + urdf_launch = IncludeLaunchDescription( launch_description_sources.PythonLaunchDescriptionSource( os.path.join(urdf_launch_dir, 'urdf_launch.py')), @@ -146,15 +154,32 @@ def generate_launch_description(): 'cam_pitch' : cam_pitch, 'cam_yaw' : cam_yaw}.items()) yolov4_spatial_node = launch_ros.actions.Node( - package='depthai_examples', executable='yolov4_spatial_node', - output='screen', - parameters=[{'tf_prefix': tf_prefix}, - {'camera_param_uri': camera_param_uri}, - {'sync_nn': sync_nn}, - {'nnName': nnName}, - {'resourceBaseFolder': resourceBaseFolder}, - {'monoResolution': monoResolution}, - {'spatial_camera': spatial_camera}]) + package='depthai_examples', executable='yolov4_spatial_node', + output='screen', + parameters=[{'tf_prefix': tf_prefix}, + {'camera_param_uri': camera_param_uri}, + {'sync_nn': sync_nn}, + {'nnName': nnName}, + {'resourceBaseFolder': resourceBaseFolder}, + {'monoResolution': monoResolution}, + {'subpixel': subpixel}, + {'lrCheckTresh': lrCheckTresh}, + {'confidence': confidence}, + ], + condition=IfCondition(LaunchConfiguration('spatial_camera')) + ) + + yolov4_node = launch_ros.actions.Node( + package='depthai_examples', executable='yolov4_node', + output='screen', + parameters=[{'tf_prefix': tf_prefix}, + {'camera_param_uri': camera_param_uri}, + {'sync_nn': sync_nn}, + {'nnName': nnName}, + {'resourceBaseFolder': resourceBaseFolder}, + ], + condition=UnlessCondition(LaunchConfiguration('spatial_camera')) + ) rviz_node = launch_ros.actions.Node( package='rviz2', executable='rviz2', output='screen', @@ -181,11 +206,12 @@ def generate_launch_description(): ld.add_action(declare_sync_nn_cmd) ld.add_action(urdf_launch) - if spatial_camera == True: - ld.add_action(declare_subpixel_cmd) - ld.add_action(declare_lrCheckTresh_cmd) - ld.add_action(declare_monoResolution_cmd) + ld.add_action(declare_subpixel_cmd) + ld.add_action(declare_lrCheckTresh_cmd) + ld.add_action(declare_monoResolution_cmd) + ld.add_action(yolov4_spatial_node) + ld.add_action(yolov4_node) return ld diff --git a/depthai_examples/src/yolov4_publisher.cpp b/depthai_examples/src/yolov4_publisher.cpp new file mode 100644 index 00000000..2147c521 --- /dev/null +++ b/depthai_examples/src/yolov4_publisher.cpp @@ -0,0 +1,141 @@ + +#include +#include + +#include "camera_info_manager/camera_info_manager.hpp" +#include "depthai_bridge/BridgePublisher.hpp" +#include "depthai_bridge/ImageConverter.hpp" +#include "depthai_bridge/ImgDetectionConverter.hpp" +#include "rclcpp/executors.hpp" +#include "rclcpp/node.hpp" +#include "sensor_msgs/msg/image.hpp" +#include "vision_msgs/msg/detection2_d_array.hpp" + +// Inludes common necessary includes for development using depthai library +#include "depthai/device/DataQueue.hpp" +#include "depthai/device/Device.hpp" +#include "depthai/pipeline/Pipeline.hpp" +#include "depthai/pipeline/node/ColorCamera.hpp" +#include "depthai/pipeline/node/DetectionNetwork.hpp" +#include "depthai/pipeline/node/XLinkOut.hpp" + +const std::vector label_map = { + "person", "bicycle", "car", "motorbike", "aeroplane", "bus", "train", "truck", "boat", + "traffic light", "fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat", "dog", "horse", + "sheep", "cow", "elephant", "bear", "zebra", "giraffe", "backpack", "umbrella", "handbag", + "tie", "suitcase", "frisbee", "skis", "snowboard", "sports ball", "kite", "baseball bat", "baseball glove", + "skateboard", "surfboard", "tennis racket", "bottle", "wine glass", "cup", "fork", "knife", "spoon", + "bowl", "banana", "apple", "sandwich", "orange", "broccoli", "carrot", "hot dog", "pizza", + "donut", "cake", "chair", "sofa", "pottedplant", "bed", "diningtable", "toilet", "tvmonitor", + "laptop", "mouse", "remote", "keyboard", "cell phone", "microwave", "oven", "toaster", "sink", + "refrigerator", "book", "clock", "vase", "scissors", "teddy bear", "hair drier", "toothbrush"}; + +dai::Pipeline createPipeline(bool syncNN, std::string nnPath) { + dai::Pipeline pipeline; + auto colorCam = pipeline.create(); + auto detectionNetwork = pipeline.create(); + + // create xlink connections + auto xoutRgb = pipeline.create(); + auto xoutNN = pipeline.create(); + + xoutRgb->setStreamName("preview"); + xoutNN->setStreamName("detections"); + + // Properties + colorCam->setPreviewSize(416, 416); + colorCam->setResolution(dai::ColorCameraProperties::SensorResolution::THE_1080_P); + colorCam->setInterleaved(false); + colorCam->setColorOrder(dai::ColorCameraProperties::ColorOrder::BGR); + colorCam->setFps(40); + + // Network specific settings + detectionNetwork->setConfidenceThreshold(0.5f); + detectionNetwork->setNumClasses(80); + detectionNetwork->setCoordinateSize(4); + detectionNetwork->setAnchors({10, 14, 23, 27, 37, 58, 81, 82, 135, 169, 344, 319}); + detectionNetwork->setAnchorMasks({{"side26", {1, 2, 3}}, {"side13", {3, 4, 5}}}); + detectionNetwork->setIouThreshold(0.5f); + detectionNetwork->setBlobPath(nnPath); + detectionNetwork->setNumInferenceThreads(2); + detectionNetwork->input.setBlocking(false); + + // Linking + colorCam->preview.link(detectionNetwork->input); + if(syncNN) + detectionNetwork->passthrough.link(xoutRgb->input); + else + colorCam->preview.link(xoutRgb->input); + + detectionNetwork->out.link(xoutNN->input); + return pipeline; +} + +int main(int argc, char** argv) { + rclcpp::init(argc, argv); + auto node = rclcpp::Node::make_shared("yolov4_node"); + + std::string tfPrefix, resourceBaseFolder, nnPath; + std::string camera_param_uri; + std::string nnName(BLOB_NAME); // Set your blob name for the model here + bool syncNN; + std::string monoResolution = "400p"; + + node->declare_parameter("tf_prefix", "oak"); + node->declare_parameter("camera_param_uri", camera_param_uri); + node->declare_parameter("sync_nn", true); + node->declare_parameter("nnName", ""); + node->declare_parameter("resourceBaseFolder", ""); + + node->get_parameter("tf_prefix", tfPrefix); + node->get_parameter("camera_param_uri", camera_param_uri); + node->get_parameter("sync_nn", syncNN); + node->get_parameter("resourceBaseFolder", resourceBaseFolder); + + if(resourceBaseFolder.empty()) { + throw std::runtime_error("Send the path to the resouce folder containing NNBlob in \'resourceBaseFolder\' "); + } + + std::string nnParam; + node->get_parameter("nnName", nnParam); + if(nnParam != "x") { + node->get_parameter("nnName", nnName); + } + + nnPath = resourceBaseFolder + "/" + nnName; + dai::Pipeline pipeline = createPipeline(syncNN, nnPath); + dai::Device device(pipeline); + + auto colorQueue = device.getOutputQueue("preview", 30, false); + auto detectionQueue = device.getOutputQueue("detections", 30, false); + auto calibrationHandler = device.readCalibration(); + + dai::rosBridge::ImageConverter rgbConverter(tfPrefix + "_rgb_camera_optical_frame", false); + auto rgbCameraInfo = rgbConverter.calibrationToCameraInfo(calibrationHandler, dai::CameraBoardSocket::CAM_A, -1, -1); + dai::rosBridge::BridgePublisher rgbPublish(colorQueue, + node, + std::string("color/image"), + std::bind(&dai::rosBridge::ImageConverter::toRosMsg, + &rgbConverter, // since the converter has the same frame name + // and image type is also same we can reuse it + std::placeholders::_1, + std::placeholders::_2), + 30, + rgbCameraInfo, + "color"); + + dai::rosBridge::ImgDetectionConverter detConverter(tfPrefix + "_rgb_camera_optical_frame", 416, 416, false); + dai::rosBridge::BridgePublisher detectionPublish( + detectionQueue, + node, + std::string("color/yolov4_detections"), + std::bind(&dai::rosBridge::ImgDetectionConverter::toRosMsg, &detConverter, std::placeholders::_1, std::placeholders::_2), + 30); + + detectionPublish.addPublisherCallback(); + rgbPublish.addPublisherCallback(); // addPublisherCallback works only when the dataqueue is non blocking. + + rclcpp::spin(node); + + return 0; +} diff --git a/depthai_examples/src/yolov4_spatial_publisher.cpp b/depthai_examples/src/yolov4_spatial_publisher.cpp index 1d7df58e..fe07bb9f 100644 --- a/depthai_examples/src/yolov4_spatial_publisher.cpp +++ b/depthai_examples/src/yolov4_spatial_publisher.cpp @@ -34,13 +34,11 @@ const std::vector label_map = { "laptop", "mouse", "remote", "keyboard", "cell phone", "microwave", "oven", "toaster", "sink", "refrigerator", "book", "clock", "vase", "scissors", "teddy bear", "hair drier", "toothbrush"}; -dai::Pipeline createPipeline(bool spatial_camera, bool syncNN, bool subpixel, std::string nnPath, int confidence, int LRchecktresh, std::string resolution) { +dai::Pipeline createPipeline(bool syncNN, bool subpixel, std::string nnPath, int confidence, int LRchecktresh, std::string resolution) { dai::Pipeline pipeline; dai::node::MonoCamera::Properties::SensorResolution monoResolution; auto colorCam = pipeline.create(); - auto camRgb = pipeline.create(); // non spatial add in auto spatialDetectionNetwork = pipeline.create(); - auto detectionNetwork = pipeline.create(); auto monoLeft = pipeline.create(); auto monoRight = pipeline.create(); auto stereo = pipeline.create(); @@ -49,104 +47,71 @@ dai::Pipeline createPipeline(bool spatial_camera, bool syncNN, bool subpixel, st auto xoutRgb = pipeline.create(); auto xoutDepth = pipeline.create(); auto xoutNN = pipeline.create(); - auto nnOut = pipeline.create(); // non spatial add in xoutRgb->setStreamName("preview"); xoutNN->setStreamName("detections"); - if(spatial_camera == true) { - xoutDepth->setStreamName("depth"); - - colorCam->setPreviewSize(416, 416); - colorCam->setResolution(dai::ColorCameraProperties::SensorResolution::THE_1080_P); - colorCam->setInterleaved(false); - colorCam->setColorOrder(dai::ColorCameraProperties::ColorOrder::BGR); - - if(resolution == "720p") { - monoResolution = dai::node::MonoCamera::Properties::SensorResolution::THE_720_P; - } else if(resolution == "400p") { - monoResolution = dai::node::MonoCamera::Properties::SensorResolution::THE_400_P; - } else if(resolution == "800p") { - monoResolution = dai::node::MonoCamera::Properties::SensorResolution::THE_800_P; - } else if(resolution == "480p") { - monoResolution = dai::node::MonoCamera::Properties::SensorResolution::THE_480_P; - } else { - RCLCPP_ERROR(rclcpp::get_logger("rclcpp"), "Invalid parameter. -> monoResolution: %s", resolution.c_str()); - throw std::runtime_error("Invalid mono camera resolution."); - } - - monoLeft->setResolution(monoResolution); - monoLeft->setBoardSocket(dai::CameraBoardSocket::CAM_B); - monoRight->setResolution(monoResolution); - monoRight->setBoardSocket(dai::CameraBoardSocket::CAM_C); - - /// setting node configs - stereo->initialConfig.setConfidenceThreshold(confidence); - stereo->setRectifyEdgeFillColor(0); // black, to better see the cutout - stereo->initialConfig.setLeftRightCheckThreshold(LRchecktresh); - stereo->setSubpixel(subpixel); - stereo->setDepthAlign(dai::CameraBoardSocket::CAM_A); - - spatialDetectionNetwork->setBlobPath(nnPath); - spatialDetectionNetwork->setConfidenceThreshold(0.5f); - spatialDetectionNetwork->input.setBlocking(false); - spatialDetectionNetwork->setBoundingBoxScaleFactor(0.5); - spatialDetectionNetwork->setDepthLowerThreshold(100); - spatialDetectionNetwork->setDepthUpperThreshold(5000); - - // yolo specific parameters - spatialDetectionNetwork->setNumClasses(80); - spatialDetectionNetwork->setCoordinateSize(4); - spatialDetectionNetwork->setAnchors({10, 14, 23, 27, 37, 58, 81, 82, 135, 169, 344, 319}); - spatialDetectionNetwork->setAnchorMasks({{"side13", {3, 4, 5}}, {"side26", {1, 2, 3}}}); - spatialDetectionNetwork->setIouThreshold(0.5f); - - // Link plugins CAM -> STEREO -> XLINK - monoLeft->out.link(stereo->left); - monoRight->out.link(stereo->right); - - // Link plugins CAM -> NN -> XLINK - colorCam->preview.link(spatialDetectionNetwork->input); - if(syncNN) - spatialDetectionNetwork->passthrough.link(xoutRgb->input); - else - colorCam->preview.link(xoutRgb->input); - - spatialDetectionNetwork->out.link(xoutNN->input); - - stereo->depth.link(spatialDetectionNetwork->inputDepth); - spatialDetectionNetwork->passthroughDepth.link(xoutDepth->input); + xoutDepth->setStreamName("depth"); + + colorCam->setPreviewSize(416, 416); + colorCam->setResolution(dai::ColorCameraProperties::SensorResolution::THE_1080_P); + colorCam->setInterleaved(false); + colorCam->setColorOrder(dai::ColorCameraProperties::ColorOrder::BGR); + + if(resolution == "720p") { + monoResolution = dai::node::MonoCamera::Properties::SensorResolution::THE_720_P; + } else if(resolution == "400p") { + monoResolution = dai::node::MonoCamera::Properties::SensorResolution::THE_400_P; + } else if(resolution == "800p") { + monoResolution = dai::node::MonoCamera::Properties::SensorResolution::THE_800_P; + } else if(resolution == "480p") { + monoResolution = dai::node::MonoCamera::Properties::SensorResolution::THE_480_P; } else { - xoutRgb->setStreamName("rgb"); - nnOut->setStreamName("detections"); - - // Properties - camRgb->setPreviewSize(416, 416); - camRgb->setResolution(dai::ColorCameraProperties::SensorResolution::THE_1080_P); - camRgb->setInterleaved(false); - camRgb->setColorOrder(dai::ColorCameraProperties::ColorOrder::BGR); - camRgb->setFps(40); - - // Network specific settings - detectionNetwork->setConfidenceThreshold(0.5f); - detectionNetwork->setNumClasses(80); - detectionNetwork->setCoordinateSize(4); - detectionNetwork->setAnchors({10, 14, 23, 27, 37, 58, 81, 82, 135, 169, 344, 319}); - detectionNetwork->setAnchorMasks({{"side26", {1, 2, 3}}, {"side13", {3, 4, 5}}}); - detectionNetwork->setIouThreshold(0.5f); - detectionNetwork->setBlobPath(nnPath); - detectionNetwork->setNumInferenceThreads(2); - detectionNetwork->input.setBlocking(false); - - // Linking - camRgb->preview.link(detectionNetwork->input); - if(syncNN) - detectionNetwork->passthrough.link(xoutRgb->input); - else - camRgb->preview.link(xoutRgb->input); - - detectionNetwork->out.link(nnOut->input); + RCLCPP_ERROR(rclcpp::get_logger("rclcpp"), "Invalid parameter. -> monoResolution: %s", resolution.c_str()); + throw std::runtime_error("Invalid mono camera resolution."); } + monoLeft->setResolution(monoResolution); + monoLeft->setBoardSocket(dai::CameraBoardSocket::CAM_B); + monoRight->setResolution(monoResolution); + monoRight->setBoardSocket(dai::CameraBoardSocket::CAM_C); + + /// setting node configs + stereo->initialConfig.setConfidenceThreshold(confidence); + stereo->setRectifyEdgeFillColor(0); // black, to better see the cutout + stereo->initialConfig.setLeftRightCheckThreshold(LRchecktresh); + stereo->setSubpixel(subpixel); + stereo->setDepthAlign(dai::CameraBoardSocket::CAM_A); + + spatialDetectionNetwork->setBlobPath(nnPath); + spatialDetectionNetwork->setConfidenceThreshold(0.5f); + spatialDetectionNetwork->input.setBlocking(false); + spatialDetectionNetwork->setBoundingBoxScaleFactor(0.5); + spatialDetectionNetwork->setDepthLowerThreshold(100); + spatialDetectionNetwork->setDepthUpperThreshold(5000); + + // yolo specific parameters + spatialDetectionNetwork->setNumClasses(80); + spatialDetectionNetwork->setCoordinateSize(4); + spatialDetectionNetwork->setAnchors({10, 14, 23, 27, 37, 58, 81, 82, 135, 169, 344, 319}); + spatialDetectionNetwork->setAnchorMasks({{"side13", {3, 4, 5}}, {"side26", {1, 2, 3}}}); + spatialDetectionNetwork->setIouThreshold(0.5f); + + // Link plugins CAM -> STEREO -> XLINK + monoLeft->out.link(stereo->left); + monoRight->out.link(stereo->right); + + // Link plugins CAM -> NN -> XLINK + colorCam->preview.link(spatialDetectionNetwork->input); + if(syncNN) + spatialDetectionNetwork->passthrough.link(xoutRgb->input); + else + colorCam->preview.link(xoutRgb->input); + + spatialDetectionNetwork->out.link(xoutNN->input); + + stereo->depth.link(spatialDetectionNetwork->inputDepth); + spatialDetectionNetwork->passthroughDepth.link(xoutDepth->input); + return pipeline; } @@ -157,7 +122,7 @@ int main(int argc, char** argv) { std::string tfPrefix, resourceBaseFolder, nnPath; std::string camera_param_uri; std::string nnName(BLOB_NAME); // Set your blob name for the model here - bool syncNN, subpixel, spatial_camera; + bool syncNN, subpixel; int confidence = 200, LRchecktresh = 5; std::string monoResolution = "400p"; @@ -179,7 +144,6 @@ int main(int argc, char** argv) { node->get_parameter("LRchecktresh", LRchecktresh); node->get_parameter("monoResolution", monoResolution); node->get_parameter("resourceBaseFolder", resourceBaseFolder); - node->get_parameter("spatial_camera", spatial_camera); if(resourceBaseFolder.empty()) { throw std::runtime_error("Send the path to the resouce folder containing NNBlob in \'resourceBaseFolder\' "); @@ -192,7 +156,7 @@ int main(int argc, char** argv) { } nnPath = resourceBaseFolder + "/" + nnName; - dai::Pipeline pipeline = createPipeline(spatial_camera, syncNN, subpixel, nnPath, confidence, LRchecktresh, monoResolution); + dai::Pipeline pipeline = createPipeline(syncNN, subpixel, nnPath, confidence, LRchecktresh, monoResolution); dai::Device device(pipeline); auto colorQueue = device.getOutputQueue("preview", 30, false);