diff --git a/data/locale/en-US.ini b/data/locale/en-US.ini index 37ca8ed6..80479ab6 100644 --- a/data/locale/en-US.ini +++ b/data/locale/en-US.ini @@ -221,7 +221,7 @@ Filter.DynamicMask.Channel.Input="%s Input Value" Filter.DynamicMask.Channel.Input.Description="The input value for channel %s.\nSets 'value[%s][%s]' in the calculation 'mask[%s] = (base[%s] + value[%s][Red] * source[Red] + value[%s][Green] * source[Green] + value[%s][Blue] * source[Blue] + value[%s][Alpha] * source[Alpha]) * multiplier[%s]'." # Filter - Nvidia Face Tracking -Filter.Nvidia.FaceTracking="Nvidia Face Tracking" +Filter.Nvidia.FaceTracking="NVIDIA Face Tracking" Filter.Nvidia.FaceTracking.ROI="Region of Interest" Filter.Nvidia.FaceTracking.ROI.Zoom="Zoom" Filter.Nvidia.FaceTracking.ROI.Zoom.Description="Restrict the maximum zoom level based on the current maximum and minimum zoom level.\nValues above 100% zoom into the face, while values below 100% will keep their distance from the face." diff --git a/source/filters/filter-nv-face-tracking.cpp b/source/filters/filter-nv-face-tracking.cpp index a813786a..68fe8bce 100644 --- a/source/filters/filter-nv-face-tracking.cpp +++ b/source/filters/filter-nv-face-tracking.cpp @@ -50,35 +50,17 @@ face_tracking_instance::face_tracking_instance(obs_data_t* settings, obs_source_ _rt_is_fresh(false), _rt(), - _cfg_roi_zoom(1.0), _cfg_roi_offset({0., 0.}), _cfg_roi_stability(1.0), + _cfg_zoom(1.0), _cfg_offset({0., 0.}), _cfg_stability(1.0), - _roi_center(), _roi_size(), _roi_geom(), + _geometry(), _filters(), _values(), _cuda(face_tracking_factory::get()->get_cuda()), _cuda_ctx(face_tracking_factory::get()->get_cuda_context()), _cuda_stream(), - _ar_library(face_tracking_factory::get()->get_ar()), _ar_loaded(false), _ar_feature(), _ar_tracked(true), - _ar_bboxes_data(), _ar_bboxes(), _ar_bboxes_confidence(), - - _ar_texture(), _ar_texture_cuda_fresh(false), _ar_texture_cuda(), _ar_texture_cuda_mem(), _ar_image(), - _ar_image_bgr(), _ar_image_temp() + _ar_library(face_tracking_factory::get()->get_ar()), _ar_loaded(false), _ar_feature(), _ar_is_tracking(false), + _ar_bboxes_confidence(), _ar_bboxes_data(), _ar_bboxes(), _ar_texture(), _ar_texture_cuda_fresh(false), + _ar_texture_cuda(), _ar_texture_cuda_mem(), _ar_image(), _ar_image_bgr(), _ar_image_temp() { - // Create Graphics resources for everything. - { - auto gctx = gs::context{}; - _rt = std::make_shared(GS_RGBA, GS_ZS_NONE); - _roi_geom = std::make_shared(4, 1); - } - - // Initialize everything. - { - auto cctx = std::make_shared<::nvidia::cuda::context_stack>(_cuda, _cuda_ctx); - std::int32_t minPrio, maxPrio; - _cuda->cuCtxGetStreamPriorityRange(&minPrio, &maxPrio); - _cuda_stream = std::make_shared<::nvidia::cuda::stream>(_cuda, ::nvidia::cuda::cu_stream_flags::NON_BLOCKING, - minPrio + ((maxPrio - minPrio) / 2)); - } - #ifdef ENABLE_PROFILING // Profiling _profile_capture = util::profiler::create(); @@ -91,12 +73,34 @@ face_tracking_instance::face_tracking_instance(obs_data_t* settings, obs_source_ _profile_ar_calc = util::profiler::create(); #endif - // Asynchronously load Face Tracking. - async_initialize(nullptr); + { // Create render target, vertex buffer, and CUDA stream. + auto gctx = gs::context{}; + _rt = std::make_shared(GS_RGBA, GS_ZS_NONE); + _geometry = std::make_shared(4, 1); + auto cctx = std::make_shared<::nvidia::cuda::context_stack>(_cuda, _cuda_ctx); + _cuda_stream = + std::make_shared<::nvidia::cuda::stream>(_cuda, ::nvidia::cuda::cu_stream_flags::NON_BLOCKING, 0); + } + + { // Asynchronously load Face Tracking. + async_initialize(); + } + + { // Set up initial tracking data. + _values.center[0] = _values.center[1] = .5; + _values.size[0] = _values.size[1] = 1.; + refresh_region_of_interest(); + } } face_tracking_instance::~face_tracking_instance() { + // Kill pending tasks. + streamfx::threadpool()->pop(_async_initialize); + streamfx::threadpool()->pop(_async_track); + + _ar_loaded.store(false); + std::unique_lock alk{_ar_lock}; _ar_library->image_dealloc(&_ar_image_temp); _ar_library->image_dealloc(&_ar_image_bgr); } @@ -114,21 +118,23 @@ void face_tracking_instance::async_initialize(std::shared_ptr ptr) data->source = std::shared_ptr(obs_source_get_weak_source(_self), obs::obs_weak_source_deleter); - std::filesystem::path models_path = _ar_library->get_ar_sdk_path(); - models_path = models_path.append("models"); - models_path = std::filesystem::absolute(models_path); - models_path.concat("\\"); - data->models_path = models_path.string(); + { + std::filesystem::path models_path = _ar_library->get_ar_sdk_path(); + models_path = models_path.append("models"); + models_path = std::filesystem::absolute(models_path); + models_path.concat("\\"); + data->models_path = models_path.string(); + } - streamfx::threadpool()->push(std::bind(&face_tracking_instance::async_initialize, this, std::placeholders::_1), - data); + _async_initialize = streamfx::threadpool()->push( + std::bind(&face_tracking_instance::async_initialize, this, std::placeholders::_1), data); } else { std::shared_ptr data = std::static_pointer_cast(ptr); // Try and acquire a strong source reference. - std::shared_ptr ref = + std::shared_ptr remote_work = std::shared_ptr(obs_weak_source_get_source(data->source.get()), obs::obs_source_deleter); - if (!ref) { // If that failed, the source we are working for was deleted - abort now. + if (!remote_work) { // If that failed, the source we are working for was deleted - abort now. return; } @@ -162,7 +168,7 @@ void face_tracking_instance::async_initialize(std::shared_ptr ptr) // Finally enable Temporal tracking if possible. if (NvCV_Status res = _ar_library->set_uint32(_ar_feature.get(), NvAR_Parameter_Config(Temporal), 1); res != NVCV_SUCCESS) { - LOG_WARNING("<%s> Unable to enable Temporal tracking mode.", obs_source_get_name(ref.get())); + LOG_WARNING("<%s> Unable to enable Temporal tracking mode.", obs_source_get_name(remote_work.get())); } // Create Bounding Boxes Data @@ -191,55 +197,32 @@ void face_tracking_instance::async_initialize(std::shared_ptr ptr) } else { _ar_loaded = true; } + + _async_initialize.reset(); } } -void face_tracking_instance::refresh_geometry() -{ // Update Region of Interest Geometry. - std::unique_lock lock(_roi_lock); - - auto v0 = _roi_geom->at(0); - auto v1 = _roi_geom->at(1); - auto v2 = _roi_geom->at(2); - auto v3 = _roi_geom->at(3); - - *v0.color = 0xFFFFFFFF; - *v1.color = 0xFFFFFFFF; - *v2.color = 0xFFFFFFFF; - *v3.color = 0xFFFFFFFF; - - vec3_set(v3.position, static_cast(_size.first), static_cast(_size.second), 0.); - vec3_set(v2.position, v3.position->x, 0., 0.); - vec3_set(v1.position, 0., v3.position->y, 0.); - vec3_set(v0.position, 0., 0., 0.); - - vec4_set(v0.uv[0], - static_cast((_roi_center.first - _roi_size.first / 2.) / static_cast(_size.first)), - static_cast((_roi_center.second - _roi_size.second / 2.) / static_cast(_size.second)), - 0., 0.); - vec4_set(v1.uv[0], - static_cast((_roi_center.first - _roi_size.first / 2.) / static_cast(_size.first)), - static_cast((_roi_center.second + _roi_size.second / 2.) / static_cast(_size.second)), - 0., 0.); - vec4_set(v2.uv[0], - static_cast((_roi_center.first + _roi_size.first / 2.) / static_cast(_size.first)), - static_cast((_roi_center.second - _roi_size.second / 2.) / static_cast(_size.second)), - 0., 0.); - vec4_set(v3.uv[0], - static_cast((_roi_center.first + _roi_size.first / 2.) / static_cast(_size.first)), - static_cast((_roi_center.second + _roi_size.second / 2.) / static_cast(_size.second)), - 0., 0.); - - _roi_geom->update(); -} - void face_tracking_instance::async_track(std::shared_ptr ptr) { struct async_data { std::shared_ptr source; }; + if (!_ar_loaded) + return; + if (!ptr) { + // Check if we can track. + if (_ar_is_tracking) + return; // Can't track a new frame right now. + +#ifdef ENABLE_PROFILING + gs::debug_marker gdm{gs::debug_color_convert, "Start Asynchronous Tracking"}; +#endif + + // Don't push additional tracking frames while processing one. + _ar_is_tracking = true; + // Spawn the work for the threadpool. std::shared_ptr data = std::make_shared(); data->source = @@ -251,7 +234,6 @@ void face_tracking_instance::async_track(std::shared_ptr ptr) auto prof = _profile_capture_realloc->track(); gs::debug_marker marker{gs::debug_color_allocate, "Reallocate GPU Buffer"}; #endif - _ar_texture = std::make_shared(_size.first, _size.second, GS_RGBA, 1, nullptr, gs::texture::flags::None); _ar_texture_cuda_fresh = false; @@ -262,25 +244,28 @@ void face_tracking_instance::async_track(std::shared_ptr ptr) auto prof = _profile_capture_copy->track(); gs::debug_marker marker{gs::debug_color_copy, "Copy Capture", obs_source_get_name(_self)}; #endif - gs_copy_texture(_ar_texture->get_object(), _rt->get_texture()->get_object()); } // Push work - streamfx::threadpool()->push(std::bind(&face_tracking_instance::async_track, this, std::placeholders::_1), - data); + _async_track = streamfx::threadpool()->push( + std::bind(&face_tracking_instance::async_track, this, std::placeholders::_1), data); } else { - std::shared_ptr data = std::static_pointer_cast(ptr); + // Prevent conflicts. + std::unique_lock alk{_ar_lock}; + if (!_ar_loaded) + return; // Try and acquire a strong source reference. - std::shared_ptr ref = + std::shared_ptr data = std::static_pointer_cast(ptr); + std::shared_ptr remote_work = std::shared_ptr(obs_weak_source_get_source(data->source.get()), obs::obs_source_deleter); - if (!ref) { // If that failed, the source we are working for was deleted - abort now. + if (!remote_work) { // If that failed, the source we are working for was deleted - abort now. return; } // Acquire GS context. - gs::context gctx; + gs::context gctx{}; // Update the current CUDA context for working. auto cctx = std::make_shared<::nvidia::cuda::context_stack>(_cuda, _cuda_ctx); @@ -292,22 +277,23 @@ void face_tracking_instance::async_track(std::shared_ptr ptr) gs::debug_marker marker{gs::debug_color_allocate, "%s: Reallocate CUDA Buffers", obs_source_get_name(_self)}; #endif - // Assign new texture and allocate new memory. - std::size_t pitch = _size.first * 4ul; + std::size_t pitch = _ar_texture->get_width() * 4ul; _ar_texture_cuda = std::make_shared<::nvidia::cuda::gstexture>(_cuda, _ar_texture); - _ar_texture_cuda_mem = std::make_shared<::nvidia::cuda::memory>(_cuda, pitch * _size.second); - _ar_library->image_init(&_ar_image, static_cast(_size.first), - static_cast(_size.second), static_cast(pitch), + _ar_texture_cuda_mem = std::make_shared<::nvidia::cuda::memory>(_cuda, pitch * _ar_texture->get_height()); + _ar_library->image_init(&_ar_image, static_cast(_ar_texture->get_width()), + static_cast(_ar_texture->get_height()), static_cast(pitch), reinterpret_cast(_ar_texture_cuda_mem->get()), NVCV_RGBA, NVCV_U8, NVCV_INTERLEAVED, NVCV_CUDA); // Reallocate transposed buffer. - _ar_library->image_dealloc(&_ar_image_bgr); - _ar_library->image_alloc(&_ar_image_bgr, static_cast(_size.first), - static_cast(_size.second), NVCV_BGR, NVCV_U8, NVCV_INTERLEAVED, - NVCV_CUDA, 0); _ar_library->image_dealloc(&_ar_image_temp); + _ar_library->image_dealloc(&_ar_image_bgr); + _ar_library->image_alloc(&_ar_image_bgr, _ar_image.width, _ar_image.height, NVCV_BGR, NVCV_U8, + NVCV_INTERLEAVED, NVCV_CUDA, 0); + + // Synchronize Streams. + _cuda->cuStreamSynchronize(_cuda_stream->get()); // Finally set the input object. if (NvCV_Status res = _ar_library->set_object(_ar_feature.get(), NvAR_Parameter_Input(Image), @@ -325,7 +311,6 @@ void face_tracking_instance::async_track(std::shared_ptr ptr) #ifdef ENABLE_PROFILING auto prof = _profile_ar_copy->track(); #endif - ::nvidia::cuda::cu_memcpy2d_t mc; mc.src_x_in_bytes = 0; mc.src_y = 0; @@ -344,7 +329,8 @@ void face_tracking_instance::async_track(std::shared_ptr ptr) mc.width_in_bytes = static_cast(_ar_image.pitch); mc.height = _ar_image.height; - if (::nvidia::cuda::cu_result res = _cuda->cuMemcpy2D(&mc); res != ::nvidia::cuda::cu_result::SUCCESS) { + if (::nvidia::cuda::cu_result res = _cuda->cuMemcpy2DAsync(&mc, _cuda_stream->get()); + res != ::nvidia::cuda::cu_result::SUCCESS) { LOG_ERROR("<%s> Failed to prepare buffers for tracking.", obs_source_get_name(_self)); return; } @@ -354,7 +340,6 @@ void face_tracking_instance::async_track(std::shared_ptr ptr) #ifdef ENABLE_PROFILING auto prof = _profile_ar_transfer->track(); #endif - if (NvCV_Status res = _ar_library->image_transfer(&_ar_image, &_ar_image_bgr, 1.0, reinterpret_cast(_cuda_stream->get()), &_ar_image_temp); @@ -362,90 +347,128 @@ void face_tracking_instance::async_track(std::shared_ptr ptr) LOG_ERROR("<%s> Failed to convert from RGBX 32-bit to BGR 24-bit.", obs_source_get_name(_self)); return; } + + // Synchronize Streams. + _cuda->cuStreamSynchronize(_cuda_stream->get()); + _cuda->cuCtxSynchronize(); } { // Track any faces. #ifdef ENABLE_PROFILING auto prof = _profile_ar_run->track(); #endif - if (NvCV_Status res = _ar_library->run(_ar_feature.get()); res != NVCV_SUCCESS) { LOG_ERROR("<%s> Failed to run tracking.", obs_source_get_name(_self)); return; } } - if ((_ar_bboxes.num_boxes == 0) || (_ar_bboxes_confidence.at(0) < 0.5)) { - // Not confident enough or not tracking anything, return to full frame after a bit. + // Are we tracking anything, and confident enough in the tracking? + if ((_ar_bboxes.num_boxes == 0) || (_ar_bboxes_confidence.at(0) < 0.3333)) { + // If not, just return to full frame. + std::unique_lock tlk{_values.lock}; + _values.center[0] = .5; + _values.center[1] = .5; + _values.size[0] = 1.; + _values.size[1] = 1.; + _values.velocity[0] = 0; + _values.velocity[1] = 0; } else { + // If yes, begin tracking. #ifdef ENABLE_PROFILING auto prof = _profile_ar_calc->track(); #endif - double_t aspect = double_t(_size.first) / double_t(_size.second); + double_t sx = static_cast(_ar_image_bgr.width); + double_t sy = static_cast(_ar_image_bgr.height); + double_t aspect = double_t(sx) / double_t(sy); + double_t fps = 0.; + + { + obs_video_info ovi; + obs_get_video_info(&ovi); + fps = static_cast(ovi.fps_num) / static_cast(ovi.fps_den); + } // Store values and center. - double_t bbox_w = _ar_bboxes.boxes[0].width; - double_t bbox_h = _ar_bboxes.boxes[0].height; - double_t bbox_cx = _ar_bboxes.boxes[0].x + bbox_w / 2.0; - double_t bbox_cy = _ar_bboxes.boxes[0].y + bbox_h / 2.0; + double_t bsx = _ar_bboxes.boxes[0].width; + double_t bsy = _ar_bboxes.boxes[0].height; + double_t bcx = _ar_bboxes.boxes[0].x + bsx / 2.0; + double_t bcy = _ar_bboxes.boxes[0].y + bsy / 2.0; // Zoom, Aspect Ratio, Offset - bbox_h = util::math::lerp(_size.second, bbox_h, _cfg_roi_zoom); - bbox_h = std::clamp(bbox_h, 10 * aspect, static_cast(_size.second)); - bbox_w = bbox_h * aspect; - bbox_cx += _ar_bboxes.boxes[0].width * _cfg_roi_offset.first; - bbox_cy += _ar_bboxes.boxes[0].height * _cfg_roi_offset.second; + bsy = util::math::lerp(sy, bsy, _cfg_zoom); + bsy = std::clamp(bsy, 10 * aspect, static_cast(_size.second)); + bsx = bsy * aspect; + bcx += _ar_bboxes.boxes[0].width * _cfg_offset.first; + bcy += _ar_bboxes.boxes[0].height * _cfg_offset.second; // Fit back into the frame // - Above code guarantees that height is never bigger than the height of the frame. // - Which also guarantees that width is never bigger than the width of the frame. // Only cx and cy need to be adjusted now to always be in the frame. - bbox_cx = std::clamp(bbox_cx, (bbox_w / 2.), static_cast(_size.first) - (bbox_w / 2.)); - bbox_cy = std::clamp(bbox_cy, (bbox_h / 2.), static_cast(_size.second) - (bbox_h / 2.)); + bcx = std::clamp(bcx, (bsx / 2.), sx - (bsx / 2.)); + bcy = std::clamp(bcy, (bsy / 2.), sy - (bsy / 2.)); - // Filter values - auto size_w = _roi_filters[2].filter(bbox_w); - auto size_h = _roi_filters[3].filter(bbox_h); - auto center_x = _roi_filters[0].filter(bbox_cx); - auto center_y = _roi_filters[1].filter(bbox_cy); - - // Fix NaN/Infinity - if (std::isfinite(size_w) && std::isfinite(size_h) && std::isfinite(center_x) && std::isfinite(center_y)) { - std::unique_lock lock(_roi_lock); - _roi_center.first = center_x; - _roi_center.second = center_y; - _roi_size.first = size_w; - _roi_size.second = size_h; - } else { - std::unique_lock lock(_roi_lock); - roi_refresh(); + { // Update target values. + std::unique_lock tlk{_values.lock}; + _values.velocity[0] = -_values.center[0]; + _values.velocity[1] = -_values.center[1]; + _values.center[0] = bcx / sx; + _values.center[1] = bcy / sy; + _values.velocity[0] += _values.center[0]; + _values.velocity[1] += _values.center[1]; + _values.velocity[0] *= fps; + _values.velocity[1] *= fps; + _values.size[0] = bsx / sx; + _values.size[1] = bsy / sy; } } - _ar_tracked = true; + _async_track.reset(); + + // Allow new frames to be queued again. + _ar_is_tracking = false; } } -void face_tracking_instance::roi_refresh() -{ - double_t kalman_q = util::math::lerp(1.0, 1e-6, _cfg_roi_stability); - double_t kalman_r = util::math::lerp(std::numeric_limits::epsilon(), 1e+2, _cfg_roi_stability); +void face_tracking_instance::refresh_geometry() +{ // Update Region of Interest Geometry. + auto v0 = _geometry->at(0); + auto v1 = _geometry->at(1); + auto v2 = _geometry->at(2); + auto v3 = _geometry->at(3); - _roi_filters[0] = util::math::kalman1D{kalman_q, kalman_r, 1.0, _roi_center.first}; - _roi_filters[1] = util::math::kalman1D{kalman_q, kalman_r, 1.0, _roi_center.second}; - _roi_filters[2] = util::math::kalman1D{kalman_q, kalman_r, 1.0, _roi_size.first}; - _roi_filters[3] = util::math::kalman1D{kalman_q, kalman_r, 1.0, _roi_size.second}; + vec3_set(v3.position, static_cast(_size.first), static_cast(_size.second), 0.); + vec3_set(v2.position, v3.position->x, 0., 0.); + vec3_set(v1.position, 0., v3.position->y, 0.); + vec3_set(v0.position, 0., 0., 0.); + + float_t hsx = static_cast(_filters.size[0].get() / 2.); + float_t hsy = static_cast(_filters.size[1].get() / 2.); + vec4_set(v0.uv[0], static_cast(_filters.center[0].get() - hsx), + static_cast(_filters.center[1].get() - hsy), 0., 0.); + vec4_set(v1.uv[0], static_cast(_filters.center[0].get() - hsx), + static_cast(_filters.center[1].get() + hsy), 0., 0.); + vec4_set(v2.uv[0], static_cast(_filters.center[0].get() + hsx), + static_cast(_filters.center[1].get() - hsy), 0., 0.); + vec4_set(v3.uv[0], static_cast(_filters.center[0].get() + hsx), + static_cast(_filters.center[1].get() + hsy), 0., 0.); + + _geometry->update(true); } -void face_tracking_instance::roi_reset() +void face_tracking_instance::refresh_region_of_interest() { - _roi_center.first = static_cast(_size.first / 2); - _roi_center.second = static_cast(_size.second / 2); - _roi_size.first = static_cast(_size.first); - _roi_size.second = static_cast(_size.second); + std::unique_lock tlk(_values.lock); - roi_refresh(); + double_t kalman_q = util::math::lerp(1.0, 1e-6, _cfg_stability); + double_t kalman_r = util::math::lerp(std::numeric_limits::epsilon(), 1e+2, _cfg_stability); + + _filters.center[0] = util::math::kalman1D{kalman_q, kalman_r, 1., _values.center[0]}; + _filters.center[1] = util::math::kalman1D{kalman_q, kalman_r, 1., _values.center[1]}; + _filters.size[0] = util::math::kalman1D{kalman_q, kalman_r, 1., _values.size[0]}; + _filters.size[1] = util::math::kalman1D{kalman_q, kalman_r, 1., _values.size[1]}; } void face_tracking_instance::load(obs_data_t* data) @@ -457,14 +480,13 @@ void face_tracking_instance::migrate(obs_data_t* data, std::uint64_t version) {} void face_tracking_instance::update(obs_data_t* data) { - _cfg_roi_zoom = obs_data_get_double(data, SK_ROI_ZOOM) / 100.0; - _cfg_roi_offset.first = obs_data_get_double(data, SK_ROI_OFFSET_X) / 100.0; - _cfg_roi_offset.second = obs_data_get_double(data, SK_ROI_OFFSET_Y) / 100.0; - _cfg_roi_stability = obs_data_get_double(data, SK_ROI_STABILITY) / 100.0; + _cfg_zoom = obs_data_get_double(data, SK_ROI_ZOOM) / 100.0; + _cfg_offset.first = obs_data_get_double(data, SK_ROI_OFFSET_X) / 100.0; + _cfg_offset.second = obs_data_get_double(data, SK_ROI_OFFSET_Y) / 100.0; + _cfg_stability = obs_data_get_double(data, SK_ROI_STABILITY) / 100.0; // Refresh the Region Of Interest - std::unique_lock lock(_roi_lock); - roi_refresh(); + refresh_region_of_interest(); } void face_tracking_instance::video_tick(float_t seconds) @@ -474,11 +496,24 @@ void face_tracking_instance::video_tick(float_t seconds) return; } - if (obs_source_t* target = obs_filter_get_target(_self); target != nullptr) { - _size.first = obs_source_get_width(target); - _size.second = obs_source_get_height(target); + // Update the input size. + if (obs_source_t* src = obs_filter_get_target(_self); src != nullptr) { + _size.first = obs_source_get_base_width(src); + _size.second = obs_source_get_base_height(src); } + // Update filters and geometry + { + std::unique_lock tlk(_values.lock); + _filters.center[0].filter(_values.center[0]); + _filters.center[1].filter(_values.center[1]); + _filters.size[0].filter(_values.size[0]); + _filters.size[1].filter(_values.size[1]); + _values.center[0] += _values.velocity[0] * seconds; + _values.center[1] += _values.velocity[1] * seconds; + } + refresh_geometry(); + _rt_is_fresh = false; } @@ -494,8 +529,8 @@ void face_tracking_instance::video_render(gs_effect_t* effect) } #ifdef ENABLE_PROFILING - gs::debug_marker gdmp{gs::debug_color_source, "Nvidia Face Tracking '%s' on '%s'", obs_source_get_name(_self), - obs_source_get_name(obs_filter_get_parent(_self))}; + gs::debug_marker gdmp{gs::debug_color_source, "NVIDIA Face Tracking '%s'...", obs_source_get_name(_self)}; + gs::debug_marker gdmp2{gs::debug_color_source, "... on '%s'", obs_source_get_name(obs_filter_get_parent(_self))}; #endif if (!_rt_is_fresh) { // Capture the filter stack "below" us. @@ -512,24 +547,20 @@ void face_tracking_instance::video_render(gs_effect_t* effect) auto op = _rt->render(_size.first, _size.second); vec4 clr = {0., 0., 0., 0.}; - gs_ortho(0., static_cast(_size.first), 0., static_cast(_size.second), 0., 1.); - gs_clear(GS_CLEAR_COLOR, &clr, 0., 0.); + gs_ortho(0., 1., 0., 1., -1., 1.); + gs_clear(GS_CLEAR_COLOR, &clr, 0., 0); + gs_enable_color(true, true, true, true); + gs_enable_blending(false); - obs_source_process_filter_tech_end(_self, default_effect, _size.first, _size.second, "Draw"); + obs_source_process_filter_tech_end(_self, default_effect, 1, 1, "Draw"); } else { obs_source_skip_video_filter(_self); return; } } - if (_ar_tracked) { -#ifdef ENABLE_PROFILING - gs::debug_marker gdm{gs::debug_color_convert, "Trigger Async Tracking Task"}; -#endif - - async_track(nullptr); - refresh_geometry(); - } + // Probably spawn new work. + async_track(nullptr); _rt_is_fresh = true; } @@ -541,9 +572,9 @@ void face_tracking_instance::video_render(gs_effect_t* effect) gs_effect_set_texture(gs_effect_get_param_by_name(effect ? effect : default_effect, "image"), _rt->get_texture()->get_object()); - gs_load_vertexbuffer(_roi_geom->update()); + gs_load_vertexbuffer(_geometry->update(false)); while (gs_effect_loop(effect ? effect : default_effect, "Draw")) { - gs_draw(gs_draw_mode::GS_TRISTRIP, 0, _roi_geom->size()); + gs_draw(gs_draw_mode::GS_TRISTRIP, 0, 0); } gs_load_vertexbuffer(nullptr); } @@ -690,7 +721,7 @@ void streamfx::filter::nvidia::face_tracking_factory::initialize() try { _filter_nvidia_face_tracking_factory_instance = std::make_shared(); } catch (const std::exception& ex) { - LOG_ERROR(" %s", ex.what()); + LOG_ERROR(" %s", ex.what()); } } diff --git a/source/filters/filter-nv-face-tracking.hpp b/source/filters/filter-nv-face-tracking.hpp index 2dbffbbb..f43328c0 100644 --- a/source/filters/filter-nv-face-tracking.hpp +++ b/source/filters/filter-nv-face-tracking.hpp @@ -37,21 +37,29 @@ namespace streamfx::filter::nvidia { class face_tracking_instance : public obs::source_instance { // Filter Cache + std::pair _size; bool _rt_is_fresh; std::shared_ptr _rt; - std::pair _size; + + std::mutex _delete_protection; // Settings - double_t _cfg_roi_zoom; - std::pair _cfg_roi_offset; - double_t _cfg_roi_stability; + double_t _cfg_zoom; + std::pair _cfg_offset; + double_t _cfg_stability; - // Region of Interest - util::math::kalman1D _roi_filters[4]; - std::mutex _roi_lock; - std::pair _roi_center; - std::pair _roi_size; - std::shared_ptr _roi_geom; + // Operational Data + std::shared_ptr _geometry; + struct { + util::math::kalman1D center[2]; + util::math::kalman1D size[2]; + } _filters; + struct { + std::mutex lock; + double_t center[2]; + double_t size[2]; + double_t velocity[2]; + } _values; // Nvidia CUDA interop std::shared_ptr<::nvidia::cuda::cuda> _cuda; @@ -62,7 +70,8 @@ namespace streamfx::filter::nvidia { std::shared_ptr<::nvidia::ar::ar> _ar_library; std::atomic_bool _ar_loaded; std::shared_ptr _ar_feature; - std::atomic_bool _ar_tracked; + std::atomic_bool _ar_is_tracking; + std::mutex _ar_lock; std::vector _ar_bboxes_confidence; std::vector _ar_bboxes_data; NvAR_BBoxes _ar_bboxes; @@ -74,6 +83,10 @@ namespace streamfx::filter::nvidia { NvCVImage _ar_image_bgr; NvCVImage _ar_image_temp; + // Tasks + std::shared_ptr<::util::threadpool::task> _async_initialize; + std::shared_ptr<::util::threadpool::task> _async_track; + #ifdef ENABLE_PROFILING // Profiling std::shared_ptr _profile_capture; @@ -91,18 +104,13 @@ namespace streamfx::filter::nvidia { virtual ~face_tracking_instance() override; // Tasks - void async_initialize(std::shared_ptr); + void async_initialize(std::shared_ptr = nullptr); + + void async_track(std::shared_ptr = nullptr); void refresh_geometry(); - void async_track(std::shared_ptr); - - // Create image buffer. - //void create_image_buffer(std::size_t width, std::size_t height); - - void roi_refresh(); - - void roi_reset(); + void refresh_region_of_interest(); virtual void load(obs_data_t* data) override;