diff --git a/README.md b/README.md index 6d5180f..906050e 100644 --- a/README.md +++ b/README.md @@ -39,18 +39,18 @@ Current features: - Detect over 80 categories of objects, using an efficient model ([EdgeYOLO](https://github.com/LSH9832/edgeyolo)) - 3 Model sizes: Small, Medium and Large +- Face detection model, fast and efficient ([YuNet](https://github.com/opencv/opencv_zoo/tree/main/models/face_detection_yunet)) - Load custom ONNX detection models from disk -- Control detection threshold -- Select object category filter (e.g. find only "Person") -- Masking: Blur, Solid color, Transparent, output binary mask (combine with other plugins!) -- Tracking: Single object / All objects, Zoom factor, smooth transition +- Filter by: Minimal Detection confidence, Object category (e.g. only "Person"), Object Minimal Size +- Masking: Blur, Pixelate, Solid color, Transparent, output binary mask (combine with other plugins!) +- Tracking: Single object / Biggest / Oldest / All objects, Zoom factor, smooth transition +- SORT algorithm for tracking smoothness and continuity +- Save detections to file in real-time, for integrations e.g. with Streamer.bot Roadmap features: - Precise object mask, beyond bounding box -- Implement SORT tracking for smoothness - Multiple object category selection (e.g. Dog + Cat + Duck) - Make available detection information for other plugins through settings -- More real-time models choices ## Train and use a custom detection model diff --git a/data/effects/pixelate.effect b/data/effects/pixelate.effect new file mode 100644 index 0000000..d371eda --- /dev/null +++ b/data/effects/pixelate.effect @@ -0,0 +1,51 @@ +uniform float4x4 ViewProj; +uniform texture2d image; +uniform texture2d focalmask; + +uniform float pixel_size; // Size of the pixelation +uniform float2 tex_size; // Size of the texture in pixels + +sampler_state textureSampler { + Filter = Linear; + AddressU = Clamp; + AddressV = Clamp; +}; + +struct VertDataIn { + float4 pos : POSITION; + float2 uv : TEXCOORD0; +}; + +struct VertDataOut { + float4 pos : POSITION; + float2 uv : TEXCOORD0; +}; + +VertDataOut VSDefault(VertDataOut v_in) +{ + VertDataOut vert_out; + vert_out.pos = mul(float4(v_in.pos.xyz, 1.0), ViewProj); + vert_out.uv = v_in.uv; + return vert_out; +} + +float4 PSPixelate(VertDataOut v_in) : TARGET +{ + if (focalmask.Sample(textureSampler, v_in.uv).r == 0) { + // No mask - return the original image value without any blur + return image.Sample(textureSampler, v_in.uv); + } + + float2 pixelUV = v_in.uv * tex_size; // Convert to pixel coordinates + float2 pixelatedUV = floor(pixelUV / pixel_size) * pixel_size / tex_size; + return image.Sample(textureSampler, pixelatedUV); +} + +technique Draw +{ + pass + { + vertex_shader = VSDefault(v_in); + pixel_shader = PSPixelate(v_in); + } +} diff --git a/data/locale/en-US.ini b/data/locale/en-US.ini index 9f7433e..c574b17 100644 --- a/data/locale/en-US.ini +++ b/data/locale/en-US.ini @@ -7,7 +7,7 @@ GPUTensorRT="GPU (TensorRT)" GPUDirectML="GPU (DirectML)" CoreML="CoreML" NumThreads="Number of Threads" -ModelSize="Model Size" +ModelSize="Model" SmallFast="Small (Fast)" Medium="Medium" LargeSlow="Large (Accurate)" @@ -22,7 +22,7 @@ Blur="Blur" OutputMask="Output Mask" Transparent="Transparent" MaskingColor="Masking Color" -MaskingBlurRadius="Masking Blur Radius" +MaskingBlurRadius="Blur / Pixelate Size" TrackingZoomFollowGroup="Tracking (Zoom, Follow) Options" ZoomFactor="Zoom Factor" ZoomObject="Zoom Object" @@ -40,3 +40,9 @@ CropLeft="Left" CropTop="Top" CropRight="Right" CropBottom="Bottom" +Pixelate="Pixelate" +DilationIterations="Dilation" +Biggest="Biggest" +Oldest="Oldest" +FaceDetect="Face Detection" +MinSizeThreshold="Min. Object Area" diff --git a/src/FilterData.h b/src/FilterData.h index ab0aa4e..25f5640 100644 --- a/src/FilterData.h +++ b/src/FilterData.h @@ -17,11 +17,13 @@ struct filter_data { float conf_threshold; std::string modelSize; + int minAreaThreshold; int objectCategory; bool maskingEnabled; std::string maskingType; int maskingColor; int maskingBlurRadius; + int maskingDilateIterations; bool trackingEnabled; float zoomFactor; float zoomSpeedFactor; @@ -46,6 +48,7 @@ struct filter_data { gs_stagesurf_t *stagesurface; gs_effect_t *kawaseBlurEffect; gs_effect_t *maskingEffect; + gs_effect_t *pixelateEffect; cv::Mat inputBGRA; cv::Mat outputPreviewBGRA; diff --git a/src/consts.h b/src/consts.h index 7d9b5b0..f1d927b 100644 --- a/src/consts.h +++ b/src/consts.h @@ -9,6 +9,7 @@ const char *const USEGPU_COREML = "coreml"; const char *const KAWASE_BLUR_EFFECT_PATH = "effects/kawase_blur.effect"; const char *const MASKING_EFFECT_PATH = "effects/masking.effect"; +const char *const PIXELATE_EFFECT_PATH = "effects/pixelate.effect"; const char *const PLUGIN_INFO_TEMPLATE = "Detect Plugin (%1) by " diff --git a/src/detect-filter.cpp b/src/detect-filter.cpp index f20ebd6..09fce84 100644 --- a/src/detect-filter.cpp +++ b/src/detect-filter.cpp @@ -58,7 +58,8 @@ static bool enable_advanced_settings(obs_properties_t *ppts, obs_property_t *p, for (const char *prop_name : {"threshold", "useGPU", "numThreads", "model_size", "detected_object", "sort_tracking", - "max_unseen_frames", "show_unseen_objects", "save_detections_path", "crop_group"}) { + "max_unseen_frames", "show_unseen_objects", "save_detections_path", "crop_group", + "min_size_threshold"}) { p = obs_properties_get(ppts, prop_name); obs_property_set_visible(p, enabled); } @@ -159,14 +160,17 @@ obs_properties_t *detect_filter_properties(void *data) obs_property_t *masking_color = obs_properties_get(props_, "masking_color"); obs_property_t *masking_blur_radius = obs_properties_get(props_, "masking_blur_radius"); + obs_property_t *masking_dilation = + obs_properties_get(props_, "dilation_iterations"); obs_property_set_visible(prop, enabled); obs_property_set_visible(masking_color, false); obs_property_set_visible(masking_blur_radius, false); - const char *masking_type_value = obs_data_get_string(settings, "masking_type"); - if (strcmp(masking_type_value, "solid_color") == 0) { + obs_property_set_visible(masking_dilation, enabled); + std::string masking_type_value = obs_data_get_string(settings, "masking_type"); + if (masking_type_value == "solid_color") { obs_property_set_visible(masking_color, enabled); - } else if (strcmp(masking_type_value, "blur") == 0) { + } else if (masking_type_value == "blur" || masking_type_value == "pixelate") { obs_property_set_visible(masking_blur_radius, enabled); } return true; @@ -181,6 +185,7 @@ obs_properties_t *detect_filter_properties(void *data) obs_property_list_add_string(masking_type, obs_module_text("SolidColor"), "solid_color"); obs_property_list_add_string(masking_type, obs_module_text("OutputMask"), "output_mask"); obs_property_list_add_string(masking_type, obs_module_text("Blur"), "blur"); + obs_property_list_add_string(masking_type, obs_module_text("Pixelate"), "pixelate"); obs_property_list_add_string(masking_type, obs_module_text("Transparent"), "transparent"); // add color picker for solid color masking @@ -191,26 +196,31 @@ obs_properties_t *detect_filter_properties(void *data) obs_module_text("MaskingBlurRadius"), 1, 30, 1); // add callback to show/hide blur radius and color picker - obs_property_set_modified_callback( - masking_type, [](obs_properties_t *props_, obs_property_t *, obs_data_t *settings) { - const bool masking_enabled = obs_data_get_bool(settings, "masking_group"); - const char *masking_type_value = - obs_data_get_string(settings, "masking_type"); - obs_property_t *masking_color = obs_properties_get(props_, "masking_color"); - obs_property_t *masking_blur_radius = - obs_properties_get(props_, "masking_blur_radius"); - obs_property_set_visible(masking_color, false); - obs_property_set_visible(masking_blur_radius, false); - - if (masking_enabled) { - if (strcmp(masking_type_value, "solid_color") == 0) { - obs_property_set_visible(masking_color, true); - } else if (strcmp(masking_type_value, "blur") == 0) { - obs_property_set_visible(masking_blur_radius, true); - } - } - return true; - }); + obs_property_set_modified_callback(masking_type, [](obs_properties_t *props_, + obs_property_t *, + obs_data_t *settings) { + std::string masking_type_value = obs_data_get_string(settings, "masking_type"); + obs_property_t *masking_color = obs_properties_get(props_, "masking_color"); + obs_property_t *masking_blur_radius = + obs_properties_get(props_, "masking_blur_radius"); + obs_property_t *masking_dilation = + obs_properties_get(props_, "dilation_iterations"); + obs_property_set_visible(masking_color, false); + obs_property_set_visible(masking_blur_radius, false); + const bool masking_enabled = obs_data_get_bool(settings, "masking_group"); + obs_property_set_visible(masking_dilation, masking_enabled); + + if (masking_type_value == "solid_color") { + obs_property_set_visible(masking_color, masking_enabled); + } else if (masking_type_value == "blur" || masking_type_value == "pixelate") { + obs_property_set_visible(masking_blur_radius, masking_enabled); + } + return true; + }); + + // add slider for dilation iterations + obs_properties_add_int_slider(masking_group, "dilation_iterations", + obs_module_text("DilationIterations"), 0, 20, 1); // add options group for tracking and zoom-follow options obs_properties_t *tracking_group_props = obs_properties_create(); @@ -243,6 +253,8 @@ obs_properties_t *detect_filter_properties(void *data) OBS_COMBO_TYPE_LIST, OBS_COMBO_FORMAT_STRING); obs_property_list_add_string(zoom_object, obs_module_text("SingleFirst"), "single"); + obs_property_list_add_string(zoom_object, obs_module_text("Biggest"), "biggest"); + obs_property_list_add_string(zoom_object, obs_module_text("Oldest"), "oldest"); obs_property_list_add_string(zoom_object, obs_module_text("All"), "all"); obs_property_t *advanced = @@ -288,6 +300,10 @@ obs_properties_t *detect_filter_properties(void *data) obs_properties_add_float_slider(props, "threshold", obs_module_text("ConfThreshold"), 0.0, 1.0, 0.025); + // add minimal size threshold slider + obs_properties_add_int_slider(props, "min_size_threshold", + obs_module_text("MinSizeThreshold"), 0, 10000, 1); + // add SORT tracking enabled checkbox obs_properties_add_bool(props, "sort_tracking", obs_module_text("SORTTracking")); @@ -344,16 +360,24 @@ obs_properties_t *detect_filter_properties(void *data) [](void *data_, obs_properties_t *props_, obs_property_t *p, obs_data_t *settings) { UNUSED_PARAMETER(p); struct detect_filter *tf_ = reinterpret_cast(data_); - const char *model_size_value = obs_data_get_string(settings, "model_size"); - bool is_external = strcmp(model_size_value, EXTERNAL_MODEL_SIZE) == 0; + std::string model_size_value = obs_data_get_string(settings, "model_size"); + bool is_external = model_size_value == EXTERNAL_MODEL_SIZE; obs_property_t *prop = obs_properties_get(props_, "external_model_file"); obs_property_set_visible(prop, is_external); if (!is_external) { - // reset the class names to COCO classes for default models - set_class_names_on_object_category( - obs_properties_get(props_, "object_category"), - edgeyolo_cpp::COCO_CLASSES); - tf_->classNames = edgeyolo_cpp::COCO_CLASSES; + if (model_size_value == FACE_DETECT_MODEL_SIZE) { + // set the class names to COCO classes for face detection model + set_class_names_on_object_category( + obs_properties_get(props_, "object_category"), + yunet::FACE_CLASSES); + tf_->classNames = yunet::FACE_CLASSES; + } else { + // reset the class names to COCO classes for default models + set_class_names_on_object_category( + obs_properties_get(props_, "object_category"), + edgeyolo_cpp::COCO_CLASSES); + tf_->classNames = edgeyolo_cpp::COCO_CLASSES; + } } else { // if the model path is already set - update the class names const char *model_file = @@ -416,6 +440,7 @@ void detect_filter_defaults(obs_data_t *settings) obs_data_set_default_string(settings, "masking_type", "none"); obs_data_set_default_string(settings, "masking_color", "#000000"); obs_data_set_default_int(settings, "masking_blur_radius", 0); + obs_data_set_default_int(settings, "dilation_iterations", 0); obs_data_set_default_bool(settings, "tracking_group", false); obs_data_set_default_double(settings, "zoom_factor", 0.0); obs_data_set_default_double(settings, "zoom_speed_factor", 0.05); @@ -443,6 +468,7 @@ void detect_filter_update(void *data, obs_data_t *settings) tf->maskingType = obs_data_get_string(settings, "masking_type"); tf->maskingColor = (int)obs_data_get_int(settings, "masking_color"); tf->maskingBlurRadius = (int)obs_data_get_int(settings, "masking_blur_radius"); + tf->maskingDilateIterations = (int)obs_data_get_int(settings, "dilation_iterations"); bool newTrackingEnabled = obs_data_get_bool(settings, "tracking_group"); tf->zoomFactor = (float)obs_data_get_double(settings, "zoom_factor"); tf->zoomSpeedFactor = (float)obs_data_get_double(settings, "zoom_speed_factor"); @@ -459,6 +485,7 @@ void detect_filter_update(void *data, obs_data_t *settings) tf->crop_right = (int)obs_data_get_int(settings, "crop_right"); tf->crop_top = (int)obs_data_get_int(settings, "crop_top"); tf->crop_bottom = (int)obs_data_get_int(settings, "crop_bottom"); + tf->minAreaThreshold = (int)obs_data_get_int(settings, "min_size_threshold"); // check if tracking state has changed if (tf->trackingEnabled != newTrackingEnabled) { @@ -701,37 +728,29 @@ void *detect_filter_create(obs_data_t *settings, obs_source_t *source) tf->texrender = gs_texrender_create(GS_BGRA, GS_ZS_NONE); tf->lastDetectedObjectId = -1; - char *kawaseBlurEffectPath = obs_module_file(KAWASE_BLUR_EFFECT_PATH); - if (!kawaseBlurEffectPath) { - obs_log(LOG_ERROR, "Failed to get Kawase Blur effect path"); - tf->isDisabled = true; - return tf; - } - char *maskingEffectPath = obs_module_file(MASKING_EFFECT_PATH); - if (!maskingEffectPath) { - obs_log(LOG_ERROR, "Failed to get masking effect path"); - tf->isDisabled = true; - bfree(kawaseBlurEffectPath); - return tf; - } + std::vector> effects = { + {KAWASE_BLUR_EFFECT_PATH, &tf->kawaseBlurEffect}, + {MASKING_EFFECT_PATH, &tf->maskingEffect}, + {PIXELATE_EFFECT_PATH, &tf->pixelateEffect}, + }; - obs_enter_graphics(); - gs_effect_destroy(tf->kawaseBlurEffect); - tf->kawaseBlurEffect = nullptr; - char *error = nullptr; - tf->kawaseBlurEffect = gs_effect_create_from_file(kawaseBlurEffectPath, &error); - bfree(kawaseBlurEffectPath); - if (!tf->kawaseBlurEffect || error) { - obs_log(LOG_ERROR, "Failed to load Kawase Blur effect: %s", error); - } - gs_effect_destroy(tf->maskingEffect); - tf->maskingEffect = nullptr; - tf->maskingEffect = gs_effect_create_from_file(maskingEffectPath, &error); - bfree(maskingEffectPath); - if (!tf->maskingEffect || error) { - obs_log(LOG_ERROR, "Failed to load masking effect: %s", error); + for (auto [effectPath, effect] : effects) { + char *effectPathPtr = obs_module_file(effectPath); + if (!effectPathPtr) { + obs_log(LOG_ERROR, "Failed to get effect path: %s", effectPath); + tf->isDisabled = true; + return tf; + } + obs_enter_graphics(); + *effect = gs_effect_create_from_file(effectPathPtr, nullptr); + bfree(effectPathPtr); + if (!*effect) { + obs_log(LOG_ERROR, "Failed to load effect: %s", effectPath); + tf->isDisabled = true; + return tf; + } + obs_leave_graphics(); } - obs_leave_graphics(); detect_filter_update(tf, settings); @@ -841,6 +860,16 @@ void detect_filter_video_tick(void *data, float seconds) } } + if (tf->minAreaThreshold > 0) { + std::vector filtered_objects; + for (const Object &obj : objects) { + if (obj.rect.area() > (float)tf->minAreaThreshold) { + filtered_objects.push_back(obj); + } + } + objects = filtered_objects; + } + if (tf->objectCategory != -1) { std::vector filtered_objects; for (const Object &obj : objects) { @@ -903,6 +932,13 @@ void detect_filter_video_tick(void *data, float seconds) } std::lock_guard lock(tf->outputLock); mask.copyTo(tf->outputMask); + + if (tf->maskingDilateIterations > 0) { + cv::Mat dilatedMask; + cv::dilate(tf->outputMask, dilatedMask, cv::Mat(), + cv::Point(-1, -1), tf->maskingDilateIterations); + dilatedMask.copyTo(tf->outputMask); + } } std::lock_guard lock(tf->outputLock); @@ -917,13 +953,45 @@ void detect_filter_video_tick(void *data, float seconds) // get location of the objects if (tf->zoomObject == "single") { if (objects.size() > 0) { - boundingBox = objects[0].rect; + // find first visible object + for (const Object &obj : objects) { + if (obj.unseenFrames == 0) { + boundingBox = obj.rect; + break; + } + } + } + } else if (tf->zoomObject == "biggest") { + // get the bounding box of the biggest object + if (objects.size() > 0) { + float maxArea = 0; + for (const Object &obj : objects) { + const float area = obj.rect.width * obj.rect.height; + if (area > maxArea) { + maxArea = area; + boundingBox = obj.rect; + } + } + } + } else if (tf->zoomObject == "oldest") { + // get the object with the oldest id that's visible currently + if (objects.size() > 0) { + uint64_t oldestId = UINT64_MAX; + for (const Object &obj : objects) { + if (obj.unseenFrames == 0 && obj.id < oldestId) { + oldestId = obj.id; + boundingBox = obj.rect; + } + } } } else { // get the bounding box of all objects if (objects.size() > 0) { boundingBox = objects[0].rect; for (const Object &obj : objects) { + if (obj.unseenFrames > 0) { + continue; + } boundingBox |= obj.rect; } } @@ -1042,6 +1110,10 @@ void detect_filter_video_render(void *data, gs_effect_t *_effect) } else if (tf->maskingType == "blur") { gs_texture_destroy(tex); tex = blur_image(tf, width, height, maskTexture); + } else if (tf->maskingType == "pixelate") { + gs_texture_destroy(tex); + tex = pixelate_image(tf, width, height, maskTexture, + (float)tf->maskingBlurRadius); } else if (tf->maskingType == "transparent") { technique_name = "DrawSolidColor"; gs_effect_set_color(maskColorParam, 0); diff --git a/src/obs-utils/obs-utils.cpp b/src/obs-utils/obs-utils.cpp index 7de9eb5..14b4bf3 100644 --- a/src/obs-utils/obs-utils.cpp +++ b/src/obs-utils/obs-utils.cpp @@ -114,3 +114,50 @@ gs_texture_t *blur_image(struct filter_data *tf, uint32_t width, uint32_t height } return blurredTexture; } + +gs_texture_t *pixelate_image(struct filter_data *tf, uint32_t width, uint32_t height, + gs_texture_t *alphaTexture, float pixelateRadius) +{ + gs_texture_t *blurredTexture = gs_texture_create(width, height, GS_BGRA, 1, nullptr, 0); + gs_copy_texture(blurredTexture, gs_texrender_get_texture(tf->texrender)); + if (tf->pixelateEffect == nullptr) { + obs_log(LOG_ERROR, "tf->pixelateEffect is null"); + return blurredTexture; + } + gs_eparam_t *image = gs_effect_get_param_by_name(tf->pixelateEffect, "image"); + gs_eparam_t *mask = gs_effect_get_param_by_name(tf->pixelateEffect, "focalmask"); + gs_eparam_t *pixel_size = gs_effect_get_param_by_name(tf->pixelateEffect, "pixel_size"); + gs_eparam_t *tex_size = gs_effect_get_param_by_name(tf->pixelateEffect, "tex_size"); + + gs_texrender_reset(tf->texrender); + if (!gs_texrender_begin(tf->texrender, width, height)) { + obs_log(LOG_INFO, "Could not open background blur texrender!"); + return blurredTexture; + } + + gs_effect_set_texture(image, blurredTexture); + if (alphaTexture != nullptr) { + gs_effect_set_texture(mask, alphaTexture); + } + gs_effect_set_float(pixel_size, pixelateRadius); + vec2 texsize_vec; + vec2_set(&texsize_vec, (float)width, (float)height); + gs_effect_set_vec2(tex_size, &texsize_vec); + + struct vec4 background; + vec4_zero(&background); + gs_clear(GS_CLEAR_COLOR, &background, 0.0f, 0); + gs_ortho(0.0f, static_cast(width), 0.0f, static_cast(height), -100.0f, + 100.0f); + gs_blend_state_push(); + gs_blend_function(GS_BLEND_ONE, GS_BLEND_ZERO); + + while (gs_effect_loop(tf->pixelateEffect, "Draw")) { + gs_draw_sprite(blurredTexture, 0, width, height); + } + gs_blend_state_pop(); + gs_texrender_end(tf->texrender); + gs_copy_texture(blurredTexture, gs_texrender_get_texture(tf->texrender)); + + return blurredTexture; +} diff --git a/src/obs-utils/obs-utils.h b/src/obs-utils/obs-utils.h index ff7539e..d7e13c1 100644 --- a/src/obs-utils/obs-utils.h +++ b/src/obs-utils/obs-utils.h @@ -8,4 +8,7 @@ bool getRGBAFromStageSurface(filter_data *tf, uint32_t &width, uint32_t &height) gs_texture_t *blur_image(struct filter_data *tf, uint32_t width, uint32_t height, gs_texture_t *alphaTexture = nullptr); +gs_texture_t *pixelate_image(struct filter_data *tf, uint32_t width, uint32_t height, + gs_texture_t *alphaTexture, float pixelateRadius); + #endif /* OBS_UTILS_H */