mirror of
https://github.com/Xaymar/obs-StreamFX
synced 2024-11-14 07:45:06 +00:00
5a65cf3525
While the long descriptions were useful, keeping the updated and translated is pretty much impossible. Technology moves fast and not everyone that translates the project knows a lot about technology. Therefore the long descriptions have now been replaced with a button that opens the wiki page for the feature instead. This should drastically reduce the number of help cases, and improve the translation coverage at the same time.
731 lines
25 KiB
C++
731 lines
25 KiB
C++
/*
|
|
* Modern effects for a modern Streamer
|
|
* Copyright (C) 2017 Michael Fabian Dirks
|
|
*
|
|
* This program is free software; you can redistribute it and/or modify
|
|
* it under the terms of the GNU General Public License as published by
|
|
* the Free Software Foundation; either version 2 of the License, or
|
|
* (at your option) any later version.
|
|
*
|
|
* This program is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
* GNU General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU General Public License
|
|
* along with this program; if not, write to the Free Software
|
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
|
|
*/
|
|
|
|
#include "filter-nv-face-tracking.hpp"
|
|
#include <algorithm>
|
|
#include <filesystem>
|
|
#include <util/platform.h>
|
|
#include "nvidia/cuda/nvidia-cuda-context-stack.hpp"
|
|
#include "obs/gs/gs-helper.hpp"
|
|
#include "obs/obs-tools.hpp"
|
|
|
|
#define ST "Filter.Nvidia.FaceTracking"
|
|
#define ST_ROI "Filter.Nvidia.FaceTracking.ROI"
|
|
#define ST_ROI_ZOOM "Filter.Nvidia.FaceTracking.ROI.Zoom"
|
|
#define SK_ROI_ZOOM "ROI.Zoom"
|
|
#define ST_ROI_OFFSET "Filter.Nvidia.FaceTracking.ROI.Offset"
|
|
#define ST_ROI_OFFSET_X "Filter.Nvidia.FaceTracking.ROI.Offset.X"
|
|
#define SK_ROI_OFFSET_X "ROI.Offset.X"
|
|
#define ST_ROI_OFFSET_Y "Filter.Nvidia.FaceTracking.ROI.Offset.Y"
|
|
#define SK_ROI_OFFSET_Y "ROI.Offset.Y"
|
|
#define ST_ROI_STABILITY "Filter.Nvidia.FaceTracking.ROI.Stability"
|
|
#define SK_ROI_STABILITY "ROI.Stability"
|
|
|
|
using namespace streamfx::filter::nvidia;
|
|
|
|
void ar_feature_deleter(NvAR_FeatureHandle v)
|
|
{
|
|
face_tracking_factory::get()->get_ar()->destroy(v);
|
|
}
|
|
|
|
face_tracking_instance::face_tracking_instance(obs_data_t* settings, obs_source_t* self)
|
|
: obs::source_instance(settings, self),
|
|
|
|
_rt_is_fresh(false), _rt(),
|
|
|
|
_cfg_zoom(1.0), _cfg_offset({0., 0.}), _cfg_stability(1.0),
|
|
|
|
_geometry(), _filters(), _values(),
|
|
|
|
_cuda(face_tracking_factory::get()->get_cuda()), _cuda_ctx(face_tracking_factory::get()->get_cuda_context()),
|
|
_cuda_stream(),
|
|
|
|
_ar_library(face_tracking_factory::get()->get_ar()), _ar_loaded(false), _ar_feature(), _ar_is_tracking(false),
|
|
_ar_bboxes_confidence(), _ar_bboxes_data(), _ar_bboxes(), _ar_texture(), _ar_texture_cuda_fresh(false),
|
|
_ar_texture_cuda(), _ar_texture_cuda_mem(), _ar_image(), _ar_image_bgr(), _ar_image_temp()
|
|
{
|
|
#ifdef ENABLE_PROFILING
|
|
// Profiling
|
|
_profile_capture = util::profiler::create();
|
|
_profile_capture_realloc = util::profiler::create();
|
|
_profile_capture_copy = util::profiler::create();
|
|
_profile_ar_realloc = util::profiler::create();
|
|
_profile_ar_copy = util::profiler::create();
|
|
_profile_ar_transfer = util::profiler::create();
|
|
_profile_ar_run = util::profiler::create();
|
|
_profile_ar_calc = util::profiler::create();
|
|
#endif
|
|
|
|
{ // Create render target, vertex buffer, and CUDA stream.
|
|
auto gctx = gs::context{};
|
|
_rt = std::make_shared<gs::rendertarget>(GS_RGBA, GS_ZS_NONE);
|
|
_geometry = std::make_shared<gs::vertex_buffer>(uint32_t(4), uint8_t(1));
|
|
auto cctx = std::make_shared<::nvidia::cuda::context_stack>(_cuda, _cuda_ctx);
|
|
_cuda_stream = std::make_shared<::nvidia::cuda::stream>(_cuda, ::nvidia::cuda::stream_flags::NON_BLOCKING, 0);
|
|
}
|
|
|
|
{ // Asynchronously load Face Tracking.
|
|
async_initialize();
|
|
}
|
|
|
|
{ // Set up initial tracking data.
|
|
_values.center[0] = _values.center[1] = .5;
|
|
_values.size[0] = _values.size[1] = 1.;
|
|
refresh_region_of_interest();
|
|
}
|
|
}
|
|
|
|
face_tracking_instance::~face_tracking_instance()
|
|
{
|
|
// Kill pending tasks.
|
|
streamfx::threadpool()->pop(_async_initialize);
|
|
streamfx::threadpool()->pop(_async_track);
|
|
|
|
_ar_loaded.store(false);
|
|
std::unique_lock<std::mutex> alk{_ar_lock};
|
|
_ar_library->image_dealloc(&_ar_image_temp);
|
|
_ar_library->image_dealloc(&_ar_image_bgr);
|
|
}
|
|
|
|
void face_tracking_instance::async_initialize(std::shared_ptr<void> ptr)
|
|
{
|
|
struct async_data {
|
|
std::shared_ptr<obs_weak_source_t> source;
|
|
std::string models_path;
|
|
};
|
|
|
|
if (!ptr) {
|
|
// Spawn the work for the threadpool.
|
|
std::shared_ptr<async_data> data = std::make_shared<async_data>();
|
|
data->source =
|
|
std::shared_ptr<obs_weak_source_t>(obs_source_get_weak_source(_self), obs::obs_weak_source_deleter);
|
|
|
|
{
|
|
std::filesystem::path models_path = _ar_library->get_ar_sdk_path();
|
|
models_path = models_path.append("models");
|
|
models_path = std::filesystem::absolute(models_path);
|
|
models_path.concat("\\");
|
|
data->models_path = models_path.string();
|
|
}
|
|
|
|
_async_initialize = streamfx::threadpool()->push(
|
|
std::bind(&face_tracking_instance::async_initialize, this, std::placeholders::_1), data);
|
|
} else {
|
|
std::shared_ptr<async_data> data = std::static_pointer_cast<async_data>(ptr);
|
|
|
|
// Try and acquire a strong source reference.
|
|
std::shared_ptr<obs_source_t> remote_work =
|
|
std::shared_ptr<obs_source_t>(obs_weak_source_get_source(data->source.get()), obs::obs_source_deleter);
|
|
if (!remote_work) { // If that failed, the source we are working for was deleted - abort now.
|
|
return;
|
|
}
|
|
|
|
// Update the current CUDA context for working.
|
|
gs::context gctx;
|
|
auto cctx = std::make_shared<::nvidia::cuda::context_stack>(_cuda, _cuda_ctx);
|
|
|
|
// Create Face Detection feature.
|
|
{
|
|
NvAR_FeatureHandle fd_inst;
|
|
if (NvCV_Status res = _ar_library->create(NvAR_Feature_FaceDetection, &fd_inst); res != NVCV_SUCCESS) {
|
|
throw std::runtime_error("Failed to create Face Detection feature.");
|
|
}
|
|
_ar_feature = std::shared_ptr<nvAR_Feature>{fd_inst, ar_feature_deleter};
|
|
}
|
|
|
|
// Set the correct CUDA stream for processing.
|
|
if (NvCV_Status res = _ar_library->set_cuda_stream(_ar_feature.get(), NvAR_Parameter_Config(CUDAStream),
|
|
reinterpret_cast<CUstream>(_cuda_stream->get()));
|
|
res != NVCV_SUCCESS) {
|
|
throw std::runtime_error("Failed to set CUDA stream.");
|
|
}
|
|
|
|
// Set the correct models path.
|
|
if (NvCV_Status res =
|
|
_ar_library->set_string(_ar_feature.get(), NvAR_Parameter_Config(ModelDir), data->models_path.c_str());
|
|
res != NVCV_SUCCESS) {
|
|
throw std::runtime_error("Unable to set model path.");
|
|
}
|
|
|
|
// Finally enable Temporal tracking if possible.
|
|
if (NvCV_Status res = _ar_library->set_uint32(_ar_feature.get(), NvAR_Parameter_Config(Temporal), 1);
|
|
res != NVCV_SUCCESS) {
|
|
DLOG_WARNING("<%s> Unable to enable Temporal tracking mode.", obs_source_get_name(remote_work.get()));
|
|
}
|
|
|
|
// Create Bounding Boxes Data
|
|
_ar_bboxes_data.assign(1, {0., 0., 0., 0.});
|
|
_ar_bboxes.boxes = _ar_bboxes_data.data();
|
|
_ar_bboxes.max_boxes = std::clamp<uint8_t>(static_cast<uint8_t>(_ar_bboxes_data.size()), 0, 255);
|
|
_ar_bboxes.num_boxes = 0;
|
|
_ar_bboxes_confidence.resize(_ar_bboxes_data.size());
|
|
if (NvCV_Status res = _ar_library->set_object(_ar_feature.get(), NvAR_Parameter_Output(BoundingBoxes),
|
|
&_ar_bboxes, sizeof(NvAR_BBoxes));
|
|
res != NVCV_SUCCESS) {
|
|
throw std::runtime_error("Failed to set BoundingBoxes for Face Tracking feature.");
|
|
}
|
|
if (NvCV_Status res = _ar_library->set_float32_array(
|
|
_ar_feature.get(), NvAR_Parameter_Output(BoundingBoxesConfidence), _ar_bboxes_confidence.data(),
|
|
static_cast<int>(_ar_bboxes_confidence.size()));
|
|
res != NVCV_SUCCESS) {
|
|
throw std::runtime_error("Failed to set BoundingBoxesConfidence for Face Tracking feature.");
|
|
}
|
|
|
|
// And finally, load the feature (takes long).
|
|
if (NvCV_Status res = _ar_library->load(_ar_feature.get()); res != NVCV_SUCCESS) {
|
|
DLOG_ERROR("<%s> Failed to load Face Tracking feature.", obs_source_get_name(_self));
|
|
_ar_loaded = false;
|
|
return;
|
|
} else {
|
|
_ar_loaded = true;
|
|
}
|
|
|
|
_async_initialize.reset();
|
|
}
|
|
}
|
|
|
|
void face_tracking_instance::async_track(std::shared_ptr<void> ptr)
|
|
{
|
|
struct async_data {
|
|
std::shared_ptr<obs_weak_source_t> source;
|
|
};
|
|
|
|
if (!_ar_loaded)
|
|
return;
|
|
|
|
if (!ptr) {
|
|
// Check if we can track.
|
|
if (_ar_is_tracking)
|
|
return; // Can't track a new frame right now.
|
|
|
|
#ifdef ENABLE_PROFILING
|
|
gs::debug_marker gdm{gs::debug_color_convert, "Start Asynchronous Tracking"};
|
|
#endif
|
|
|
|
// Don't push additional tracking frames while processing one.
|
|
_ar_is_tracking = true;
|
|
|
|
// Spawn the work for the threadpool.
|
|
std::shared_ptr<async_data> data = std::make_shared<async_data>();
|
|
data->source =
|
|
std::shared_ptr<obs_weak_source_t>(obs_source_get_weak_source(_self), obs::obs_weak_source_deleter);
|
|
|
|
// Check if things exist as planned.
|
|
if (!_ar_texture || (_ar_texture->get_width() != _size.first) || (_ar_texture->get_height() != _size.second)) {
|
|
#ifdef ENABLE_PROFILING
|
|
auto prof = _profile_capture_realloc->track();
|
|
gs::debug_marker marker{gs::debug_color_allocate, "Reallocate GPU Buffer"};
|
|
#endif
|
|
_ar_texture = std::make_shared<gs::texture>(_size.first, _size.second, GS_RGBA, uint32_t(1), nullptr,
|
|
gs::texture::flags::None);
|
|
_ar_texture_cuda_fresh = false;
|
|
}
|
|
|
|
{ // Copy texture
|
|
#ifdef ENABLE_PROFILING
|
|
auto prof = _profile_capture_copy->track();
|
|
gs::debug_marker marker{gs::debug_color_copy, "Copy Capture", obs_source_get_name(_self)};
|
|
#endif
|
|
gs_copy_texture(_ar_texture->get_object(), _rt->get_texture()->get_object());
|
|
}
|
|
|
|
// Push work
|
|
_async_track = streamfx::threadpool()->push(
|
|
std::bind(&face_tracking_instance::async_track, this, std::placeholders::_1), data);
|
|
} else {
|
|
// Prevent conflicts.
|
|
std::unique_lock<std::mutex> alk{_ar_lock};
|
|
if (!_ar_loaded)
|
|
return;
|
|
|
|
// Try and acquire a strong source reference.
|
|
std::shared_ptr<async_data> data = std::static_pointer_cast<async_data>(ptr);
|
|
std::shared_ptr<obs_source_t> remote_work =
|
|
std::shared_ptr<obs_source_t>(obs_weak_source_get_source(data->source.get()), obs::obs_source_deleter);
|
|
if (!remote_work) { // If that failed, the source we are working for was deleted - abort now.
|
|
return;
|
|
}
|
|
|
|
// Acquire GS context.
|
|
gs::context gctx{};
|
|
|
|
// Update the current CUDA context for working.
|
|
auto cctx = std::make_shared<::nvidia::cuda::context_stack>(_cuda, _cuda_ctx);
|
|
|
|
// Refresh any now broken buffers.
|
|
if (!_ar_texture_cuda_fresh) {
|
|
#ifdef ENABLE_PROFILING
|
|
auto prof = _profile_ar_realloc->track();
|
|
gs::debug_marker marker{gs::debug_color_allocate, "%s: Reallocate CUDA Buffers",
|
|
obs_source_get_name(_self)};
|
|
#endif
|
|
// Assign new texture and allocate new memory.
|
|
std::size_t pitch = _ar_texture->get_width() * 4ul;
|
|
_ar_texture_cuda = std::make_shared<::nvidia::cuda::gstexture>(_cuda, _ar_texture);
|
|
_ar_texture_cuda_mem = std::make_shared<::nvidia::cuda::memory>(_cuda, pitch * _ar_texture->get_height());
|
|
_ar_library->image_init(&_ar_image, static_cast<unsigned int>(_ar_texture->get_width()),
|
|
static_cast<unsigned int>(_ar_texture->get_height()), static_cast<int>(pitch),
|
|
reinterpret_cast<void*>(_ar_texture_cuda_mem->get()), NVCV_RGBA, NVCV_U8,
|
|
NVCV_INTERLEAVED, NVCV_CUDA);
|
|
|
|
// Reallocate transposed buffer.
|
|
_ar_library->image_dealloc(&_ar_image_temp);
|
|
_ar_library->image_dealloc(&_ar_image_bgr);
|
|
_ar_library->image_alloc(&_ar_image_bgr, _ar_image.width, _ar_image.height, NVCV_BGR, NVCV_U8,
|
|
NVCV_INTERLEAVED, NVCV_CUDA, 0);
|
|
|
|
// Synchronize Streams.
|
|
_cuda->cuStreamSynchronize(_cuda_stream->get());
|
|
|
|
// Finally set the input object.
|
|
if (NvCV_Status res = _ar_library->set_object(_ar_feature.get(), NvAR_Parameter_Input(Image),
|
|
&_ar_image_bgr, sizeof(NvCVImage));
|
|
res != NVCV_SUCCESS) {
|
|
DLOG_ERROR("<%s> Failed to update input image for tracking.", obs_source_get_name(_self));
|
|
return;
|
|
}
|
|
|
|
// And mark the new texture as fresh.
|
|
_ar_texture_cuda_fresh = true;
|
|
}
|
|
|
|
{ // Copy from CUDA array to CUDA device memory.
|
|
#ifdef ENABLE_PROFILING
|
|
auto prof = _profile_ar_copy->track();
|
|
#endif
|
|
::nvidia::cuda::memcpy2d_t mc;
|
|
mc.src_x_in_bytes = 0;
|
|
mc.src_y = 0;
|
|
mc.src_memory_type = ::nvidia::cuda::memory_type::ARRAY;
|
|
mc.src_host = nullptr;
|
|
mc.src_device = 0;
|
|
mc.src_array = _ar_texture_cuda->map(_cuda_stream);
|
|
mc.src_pitch = static_cast<size_t>(_ar_image.pitch);
|
|
mc.dst_x_in_bytes = 0;
|
|
mc.dst_y = 0;
|
|
mc.dst_memory_type = ::nvidia::cuda::memory_type::DEVICE;
|
|
mc.dst_host = 0;
|
|
mc.dst_device = reinterpret_cast<::nvidia::cuda::device_ptr_t>(_ar_image.pixels);
|
|
mc.dst_array = 0;
|
|
mc.dst_pitch = static_cast<size_t>(_ar_image.pitch);
|
|
mc.width_in_bytes = static_cast<size_t>(_ar_image.pitch);
|
|
mc.height = _ar_image.height;
|
|
|
|
if (::nvidia::cuda::result res = _cuda->cuMemcpy2DAsync(&mc, _cuda_stream->get());
|
|
res != ::nvidia::cuda::result::SUCCESS) {
|
|
DLOG_ERROR("<%s> Failed to prepare buffers for tracking.", obs_source_get_name(_self));
|
|
return;
|
|
}
|
|
}
|
|
|
|
{ // Convert from RGBA 32-bit to BGR 24-bit.
|
|
#ifdef ENABLE_PROFILING
|
|
auto prof = _profile_ar_transfer->track();
|
|
#endif
|
|
if (NvCV_Status res =
|
|
_ar_library->image_transfer(&_ar_image, &_ar_image_bgr, 1.0,
|
|
reinterpret_cast<CUstream_st*>(_cuda_stream->get()), &_ar_image_temp);
|
|
res != NVCV_SUCCESS) {
|
|
DLOG_ERROR("<%s> Failed to convert from RGBX 32-bit to BGR 24-bit.", obs_source_get_name(_self));
|
|
return;
|
|
}
|
|
|
|
// Synchronize Streams.
|
|
_cuda->cuStreamSynchronize(_cuda_stream->get());
|
|
_cuda->cuCtxSynchronize();
|
|
}
|
|
|
|
{ // Track any faces.
|
|
#ifdef ENABLE_PROFILING
|
|
auto prof = _profile_ar_run->track();
|
|
#endif
|
|
if (NvCV_Status res = _ar_library->run(_ar_feature.get()); res != NVCV_SUCCESS) {
|
|
DLOG_ERROR("<%s> Failed to run tracking.", obs_source_get_name(_self));
|
|
return;
|
|
}
|
|
}
|
|
|
|
// Are we tracking anything, and confident enough in the tracking?
|
|
if ((_ar_bboxes.num_boxes == 0) || (_ar_bboxes_confidence.at(0) < 0.3333)) {
|
|
// If not, just return to full frame.
|
|
std::unique_lock<std::mutex> tlk{_values.lock};
|
|
_values.center[0] = .5;
|
|
_values.center[1] = .5;
|
|
_values.size[0] = 1.;
|
|
_values.size[1] = 1.;
|
|
_values.velocity[0] = 0;
|
|
_values.velocity[1] = 0;
|
|
} else {
|
|
// If yes, begin tracking.
|
|
#ifdef ENABLE_PROFILING
|
|
auto prof = _profile_ar_calc->track();
|
|
#endif
|
|
|
|
double_t sx = static_cast<double_t>(_ar_image_bgr.width);
|
|
double_t sy = static_cast<double_t>(_ar_image_bgr.height);
|
|
double_t aspect = double_t(sx) / double_t(sy);
|
|
double_t fps = 0.;
|
|
|
|
{
|
|
obs_video_info ovi;
|
|
obs_get_video_info(&ovi);
|
|
fps = static_cast<double_t>(ovi.fps_num) / static_cast<double_t>(ovi.fps_den);
|
|
}
|
|
|
|
// Store values and center.
|
|
double_t bsx = _ar_bboxes.boxes[0].width;
|
|
double_t bsy = _ar_bboxes.boxes[0].height;
|
|
double_t bcx = _ar_bboxes.boxes[0].x + bsx / 2.0;
|
|
double_t bcy = _ar_bboxes.boxes[0].y + bsy / 2.0;
|
|
|
|
// Zoom, Aspect Ratio, Offset
|
|
bsy = util::math::lerp<double_t>(sy, bsy, _cfg_zoom);
|
|
bsy = std::clamp(bsy, 10 * aspect, static_cast<double_t>(_size.second));
|
|
bsx = bsy * aspect;
|
|
bcx += _ar_bboxes.boxes[0].width * _cfg_offset.first;
|
|
bcy += _ar_bboxes.boxes[0].height * _cfg_offset.second;
|
|
|
|
// Fit back into the frame
|
|
// - Above code guarantees that height is never bigger than the height of the frame.
|
|
// - Which also guarantees that width is never bigger than the width of the frame.
|
|
// Only cx and cy need to be adjusted now to always be in the frame.
|
|
bcx = std::clamp(bcx, (bsx / 2.), sx - (bsx / 2.));
|
|
bcy = std::clamp(bcy, (bsy / 2.), sy - (bsy / 2.));
|
|
|
|
{ // Update target values.
|
|
std::unique_lock<std::mutex> tlk{_values.lock};
|
|
_values.velocity[0] = -_values.center[0];
|
|
_values.velocity[1] = -_values.center[1];
|
|
_values.center[0] = bcx / sx;
|
|
_values.center[1] = bcy / sy;
|
|
_values.velocity[0] += _values.center[0];
|
|
_values.velocity[1] += _values.center[1];
|
|
_values.velocity[0] *= fps;
|
|
_values.velocity[1] *= fps;
|
|
_values.size[0] = bsx / sx;
|
|
_values.size[1] = bsy / sy;
|
|
}
|
|
}
|
|
|
|
_async_track.reset();
|
|
|
|
// Allow new frames to be queued again.
|
|
_ar_is_tracking = false;
|
|
}
|
|
}
|
|
|
|
void face_tracking_instance::refresh_geometry()
|
|
{ // Update Region of Interest Geometry.
|
|
auto v0 = _geometry->at(0);
|
|
auto v1 = _geometry->at(1);
|
|
auto v2 = _geometry->at(2);
|
|
auto v3 = _geometry->at(3);
|
|
|
|
vec3_set(v3.position, static_cast<float_t>(_size.first), static_cast<float_t>(_size.second), 0.);
|
|
vec3_set(v2.position, v3.position->x, 0., 0.);
|
|
vec3_set(v1.position, 0., v3.position->y, 0.);
|
|
vec3_set(v0.position, 0., 0., 0.);
|
|
|
|
float_t hsx = static_cast<float_t>(_filters.size[0].get() / 2.);
|
|
float_t hsy = static_cast<float_t>(_filters.size[1].get() / 2.);
|
|
vec4_set(v0.uv[0], static_cast<float_t>(_filters.center[0].get() - hsx),
|
|
static_cast<float_t>(_filters.center[1].get() - hsy), 0., 0.);
|
|
vec4_set(v1.uv[0], static_cast<float_t>(_filters.center[0].get() - hsx),
|
|
static_cast<float_t>(_filters.center[1].get() + hsy), 0., 0.);
|
|
vec4_set(v2.uv[0], static_cast<float_t>(_filters.center[0].get() + hsx),
|
|
static_cast<float_t>(_filters.center[1].get() - hsy), 0., 0.);
|
|
vec4_set(v3.uv[0], static_cast<float_t>(_filters.center[0].get() + hsx),
|
|
static_cast<float_t>(_filters.center[1].get() + hsy), 0., 0.);
|
|
|
|
_geometry->update(true);
|
|
}
|
|
|
|
void face_tracking_instance::refresh_region_of_interest()
|
|
{
|
|
std::unique_lock<std::mutex> tlk(_values.lock);
|
|
|
|
double_t kalman_q = util::math::lerp<double_t>(1.0, 1e-6, _cfg_stability);
|
|
double_t kalman_r = util::math::lerp<double_t>(std::numeric_limits<double_t>::epsilon(), 1e+2, _cfg_stability);
|
|
|
|
_filters.center[0] = util::math::kalman1D<double_t>{kalman_q, kalman_r, 1., _values.center[0]};
|
|
_filters.center[1] = util::math::kalman1D<double_t>{kalman_q, kalman_r, 1., _values.center[1]};
|
|
_filters.size[0] = util::math::kalman1D<double_t>{kalman_q, kalman_r, 1., _values.size[0]};
|
|
_filters.size[1] = util::math::kalman1D<double_t>{kalman_q, kalman_r, 1., _values.size[1]};
|
|
}
|
|
|
|
void face_tracking_instance::load(obs_data_t* data)
|
|
{
|
|
update(data);
|
|
}
|
|
|
|
void face_tracking_instance::migrate(obs_data_t* data, uint64_t version) {}
|
|
|
|
void face_tracking_instance::update(obs_data_t* data)
|
|
{
|
|
_cfg_zoom = obs_data_get_double(data, SK_ROI_ZOOM) / 100.0;
|
|
_cfg_offset.first = obs_data_get_double(data, SK_ROI_OFFSET_X) / 100.0;
|
|
_cfg_offset.second = obs_data_get_double(data, SK_ROI_OFFSET_Y) / 100.0;
|
|
_cfg_stability = obs_data_get_double(data, SK_ROI_STABILITY) / 100.0;
|
|
|
|
// Refresh the Region Of Interest
|
|
refresh_region_of_interest();
|
|
}
|
|
|
|
void face_tracking_instance::video_tick(float_t seconds)
|
|
{
|
|
// If we aren't yet ready to do work, abort for now.
|
|
if (!_ar_loaded) {
|
|
return;
|
|
}
|
|
|
|
// Update the input size.
|
|
if (obs_source_t* src = obs_filter_get_target(_self); src != nullptr) {
|
|
_size.first = obs_source_get_base_width(src);
|
|
_size.second = obs_source_get_base_height(src);
|
|
}
|
|
|
|
// Update filters and geometry
|
|
{
|
|
std::unique_lock<std::mutex> tlk(_values.lock);
|
|
_filters.center[0].filter(_values.center[0]);
|
|
_filters.center[1].filter(_values.center[1]);
|
|
_filters.size[0].filter(_values.size[0]);
|
|
_filters.size[1].filter(_values.size[1]);
|
|
_values.center[0] += _values.velocity[0] * seconds;
|
|
_values.center[1] += _values.velocity[1] * seconds;
|
|
}
|
|
refresh_geometry();
|
|
|
|
_rt_is_fresh = false;
|
|
}
|
|
|
|
void face_tracking_instance::video_render(gs_effect_t* effect)
|
|
{
|
|
obs_source_t* filter_parent = obs_filter_get_parent(_self);
|
|
obs_source_t* filter_target = obs_filter_get_target(_self);
|
|
gs_effect_t* default_effect = obs_get_base_effect(OBS_EFFECT_DEFAULT);
|
|
|
|
if (!filter_parent || !filter_target || !_size.first || !_size.second || !_ar_loaded) {
|
|
obs_source_skip_video_filter(_self);
|
|
return;
|
|
}
|
|
|
|
#ifdef ENABLE_PROFILING
|
|
gs::debug_marker gdmp{gs::debug_color_source, "NVIDIA Face Tracking '%s'...", obs_source_get_name(_self)};
|
|
gs::debug_marker gdmp2{gs::debug_color_source, "... on '%s'", obs_source_get_name(obs_filter_get_parent(_self))};
|
|
#endif
|
|
|
|
if (!_rt_is_fresh) { // Capture the filter stack "below" us.
|
|
#ifdef ENABLE_PROFILING
|
|
auto prof = _profile_capture->track();
|
|
#endif
|
|
|
|
{
|
|
#ifdef ENABLE_PROFILING
|
|
gs::debug_marker gdm{gs::debug_color_cache, "Cache"};
|
|
#endif
|
|
|
|
if (obs_source_process_filter_begin(_self, _rt->get_color_format(), OBS_ALLOW_DIRECT_RENDERING)) {
|
|
auto op = _rt->render(_size.first, _size.second);
|
|
vec4 clr = {0., 0., 0., 0.};
|
|
|
|
gs_ortho(0., 1., 0., 1., -1., 1.);
|
|
gs_clear(GS_CLEAR_COLOR, &clr, 0., 0);
|
|
gs_enable_color(true, true, true, true);
|
|
gs_enable_blending(false);
|
|
|
|
obs_source_process_filter_tech_end(_self, default_effect, 1, 1, "Draw");
|
|
} else {
|
|
obs_source_skip_video_filter(_self);
|
|
return;
|
|
}
|
|
}
|
|
|
|
// Probably spawn new work.
|
|
async_track(nullptr);
|
|
|
|
_rt_is_fresh = true;
|
|
}
|
|
|
|
{ // Draw Texture
|
|
#ifdef ENABLE_PROFILING
|
|
gs::debug_marker gdm{gs::debug_color_render, "Render"};
|
|
#endif
|
|
|
|
gs_effect_set_texture(gs_effect_get_param_by_name(effect ? effect : default_effect, "image"),
|
|
_rt->get_texture()->get_object());
|
|
gs_load_vertexbuffer(_geometry->update(false));
|
|
while (gs_effect_loop(effect ? effect : default_effect, "Draw")) {
|
|
gs_draw(gs_draw_mode::GS_TRISTRIP, 0, 0);
|
|
}
|
|
gs_load_vertexbuffer(nullptr);
|
|
}
|
|
}
|
|
|
|
#ifdef ENABLE_PROFILING
|
|
bool face_tracking_instance::button_profile(obs_properties_t* props, obs_property_t* property)
|
|
{
|
|
DLOG_INFO("%-22s: %-10s %-10s %-10s %-10s %-10s", "Task", "Total", "Count", "Average", "99.9%ile", "95.0%ile");
|
|
|
|
std::pair<std::string, std::shared_ptr<util::profiler>> profilers[]{
|
|
{"Capture", _profile_capture}, {"Reallocate", _profile_capture_realloc},
|
|
{"Copy", _profile_capture_copy}, {"AR Reallocate", _profile_ar_realloc},
|
|
{"AR Copy", _profile_ar_copy}, {"AR Convert", _profile_ar_transfer},
|
|
{"AR Run", _profile_ar_run}, {"AR Calculate", _profile_ar_calc},
|
|
};
|
|
for (auto& kv : profilers) {
|
|
DLOG_INFO(" %-20s: %8lldµs %10lld %8lldµs %8lldµs %8lldµs", kv.first.c_str(),
|
|
std::chrono::duration_cast<std::chrono::microseconds>(kv.second->total_duration()).count(),
|
|
kv.second->count(), static_cast<int64_t>(kv.second->average_duration() / 1000.0),
|
|
std::chrono::duration_cast<std::chrono::microseconds>(kv.second->percentile(0.999)).count(),
|
|
std::chrono::duration_cast<std::chrono::microseconds>(kv.second->percentile(0.95)).count());
|
|
}
|
|
|
|
return false;
|
|
}
|
|
#endif
|
|
|
|
face_tracking_factory::face_tracking_factory()
|
|
{
|
|
// Try and load CUDA.
|
|
_cuda = ::nvidia::cuda::cuda::get();
|
|
|
|
// Try and load AR.
|
|
_ar = std::make_shared<::nvidia::ar::ar>();
|
|
|
|
// Initialize CUDA
|
|
{
|
|
auto gctx = gs::context{};
|
|
#ifdef WIN32
|
|
if (gs_get_device_type() == GS_DEVICE_DIRECT3D_11) {
|
|
_cuda_ctx =
|
|
std::make_shared<::nvidia::cuda::context>(_cuda, reinterpret_cast<ID3D11Device*>(gs_get_device_obj()));
|
|
}
|
|
#endif
|
|
if (gs_get_device_type() == GS_DEVICE_OPENGL) {
|
|
throw std::runtime_error("OpenGL not supported.");
|
|
}
|
|
}
|
|
|
|
// Info
|
|
_info.id = PREFIX "filter-nvidia-face-tracking";
|
|
_info.type = OBS_SOURCE_TYPE_FILTER;
|
|
_info.output_flags = OBS_SOURCE_VIDEO;
|
|
|
|
set_resolution_enabled(false);
|
|
finish_setup();
|
|
register_proxy("streamfx-nvidia-face-tracking");
|
|
}
|
|
|
|
face_tracking_factory::~face_tracking_factory() {}
|
|
|
|
const char* face_tracking_factory::get_name()
|
|
{
|
|
return D_TRANSLATE(ST);
|
|
}
|
|
|
|
void face_tracking_factory::get_defaults2(obs_data_t* data)
|
|
{
|
|
obs_data_set_default_double(data, SK_ROI_ZOOM, 50.0);
|
|
obs_data_set_default_double(data, SK_ROI_OFFSET_X, 0.0);
|
|
obs_data_set_default_double(data, SK_ROI_OFFSET_Y, -15.0);
|
|
obs_data_set_default_double(data, SK_ROI_STABILITY, 50.0);
|
|
}
|
|
|
|
obs_properties_t* face_tracking_factory::get_properties2(face_tracking_instance* data)
|
|
{
|
|
obs_properties_t* pr = obs_properties_create();
|
|
|
|
{
|
|
auto grp = obs_properties_create();
|
|
obs_properties_add_group(pr, ST_ROI, D_TRANSLATE(ST_ROI), OBS_GROUP_NORMAL, grp);
|
|
{
|
|
auto p =
|
|
obs_properties_add_float_slider(grp, SK_ROI_STABILITY, D_TRANSLATE(ST_ROI_STABILITY), 0, 100.0, 0.01);
|
|
obs_property_float_set_suffix(p, " %");
|
|
}
|
|
{
|
|
auto p = obs_properties_add_float_slider(grp, SK_ROI_ZOOM, D_TRANSLATE(ST_ROI_ZOOM), 0, 200.0, 0.01);
|
|
obs_property_float_set_suffix(p, " %");
|
|
}
|
|
{
|
|
auto grp2 = obs_properties_create();
|
|
obs_properties_add_group(grp, ST_ROI_OFFSET, D_TRANSLATE(ST_ROI_OFFSET), OBS_GROUP_NORMAL, grp2);
|
|
|
|
{
|
|
auto p = obs_properties_add_float_slider(grp2, SK_ROI_OFFSET_X, D_TRANSLATE(ST_ROI_OFFSET_X), -50.0,
|
|
50.0, 0.01);
|
|
obs_property_float_set_suffix(p, " %");
|
|
}
|
|
{
|
|
auto p = obs_properties_add_float_slider(grp2, SK_ROI_OFFSET_Y, D_TRANSLATE(ST_ROI_OFFSET_Y), -50.0,
|
|
50.0, 0.01);
|
|
obs_property_float_set_suffix(p, " %");
|
|
}
|
|
}
|
|
}
|
|
#ifdef ENABLE_PROFILING
|
|
{
|
|
obs_properties_add_button2(
|
|
pr, "Profile", "Profile",
|
|
[](obs_properties_t* props, obs_property_t* property, void* data) {
|
|
return reinterpret_cast<face_tracking_instance*>(data)->button_profile(props, property);
|
|
},
|
|
data);
|
|
}
|
|
#endif
|
|
|
|
return pr;
|
|
}
|
|
|
|
std::shared_ptr<::nvidia::cuda::cuda> face_tracking_factory::get_cuda()
|
|
{
|
|
return _cuda;
|
|
}
|
|
|
|
std::shared_ptr<::nvidia::cuda::context> face_tracking_factory::get_cuda_context()
|
|
{
|
|
return _cuda_ctx;
|
|
}
|
|
|
|
std::shared_ptr<::nvidia::ar::ar> face_tracking_factory::get_ar()
|
|
{
|
|
return _ar;
|
|
}
|
|
|
|
std::shared_ptr<face_tracking_factory> _filter_nvidia_face_tracking_factory_instance = nullptr;
|
|
|
|
void streamfx::filter::nvidia::face_tracking_factory::initialize()
|
|
{
|
|
try {
|
|
_filter_nvidia_face_tracking_factory_instance = std::make_shared<filter::nvidia::face_tracking_factory>();
|
|
} catch (const std::exception& ex) {
|
|
DLOG_ERROR("<NVIDIA Face Tracking Filter> %s", ex.what());
|
|
}
|
|
}
|
|
|
|
void streamfx::filter::nvidia::face_tracking_factory::finalize()
|
|
{
|
|
_filter_nvidia_face_tracking_factory_instance.reset();
|
|
}
|
|
|
|
std::shared_ptr<face_tracking_factory> streamfx::filter::nvidia::face_tracking_factory::get()
|
|
{
|
|
return _filter_nvidia_face_tracking_factory_instance;
|
|
}
|