filter-nv-face-tracking: Nvidia exclusive Face Tracking filter

Ever wished you had a professional camera operator to highlight and follow the action, ensuring the audience never misses a beat? Thanks to NVIDIA, you can now do this at home for free! The new NVIDIA AR SDK unlocks augmented reality features, including motion tracking for faces. This allows me to provide you with an automated zoom and cropping solution for your video camera to transform your streams into a slick, polished broadcast, where you’ll always be the star of the show. Don’t forget - everything is customizable so the possibilities are endless. You can even recreate that Futurama squinting meme if you wanted to (with some scripting)! The filter requires compatible Nvidia RTX hardware and the Nvidia AR SDK Runtime to be installed ahead of time. This filter is considered "stable" and shouldn't change much from version to version.
2020-03-25 18:31:22 +01:00 · 2020-03-25 18:31:22 +01:00 · 43f1dcf205
parent 88213e81f6
commit 43f1dcf205
13 changed files with 1408 additions and 4 deletions
--- a/.gitmodules
+++ b/.gitmodules
@ -1,3 +1,6 @@
 [submodule "cmake/clang"]
 	path = cmake/clang
 	url = https://github.com/Xaymar/cmake-clang.git
+[submodule "third-party/nvidia-arsdk"]
+	path = third-party/nvidia-arsdk
+	url = https://github.com/NVIDIA/BROADCAST-AR-SDK.git
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -200,6 +200,7 @@ set(${PropertyPrefix}ENABLE_FILTER_BLUR TRUE CACHE BOOL "Enable Blur Filter")
 set(${PropertyPrefix}ENABLE_FILTER_COLOR_GRADE TRUE CACHE BOOL "Enable Color Grade Filter")
 set(${PropertyPrefix}ENABLE_FILTER_DISPLACEMENT TRUE CACHE BOOL "Enable Displacement Filter")
 set(${PropertyPrefix}ENABLE_FILTER_DYNAMIC_MASK TRUE CACHE BOOL "Enable Dynamic Mask Filter")
+set(${PropertyPrefix}ENABLE_FILTER_NVIDIA_FACE_TRACKING TRUE CACHE BOOL "Enable NVidia Face Tracking Filter")
 set(${PropertyPrefix}ENABLE_FILTER_SDF_EFFECTS TRUE CACHE BOOL "Enable SDF Effects Filter")
 set(${PropertyPrefix}ENABLE_FILTER_SHADER TRUE CACHE BOOL "Enable Shader Filter")
 set(${PropertyPrefix}ENABLE_FILTER_TRANSFORM TRUE CACHE BOOL "Enable Transform Filter")
@ -551,6 +552,41 @@ if(${PropertyPrefix}ENABLE_FILTER_DYNAMIC_MASK)
 	)
 endif()

+## Features - Nvidia Face Tracking Filter
+if(${PropertyPrefix}ENABLE_FILTER_NVIDIA_FACE_TRACKING AND WIN32)
+	set(NVAR_ROOT "${CMAKE_CURRENT_SOURCE_DIR}/third-party/nvidia-arsdk")
+	find_package(NVAR REQUIRED)
+
+	list(APPEND PROJECT_PRIVATE_SOURCE
+		"source/filters/filter-nv-face-tracking.hpp"
+		"source/filters/filter-nv-face-tracking.cpp"
+		"source/nvidia/ar/nvidia-ar.hpp"
+		"source/nvidia/ar/nvidia-ar.cpp"
+		"source/nvidia/ar/nvidia-ar-feature.hpp"
+		"source/nvidia/ar/nvidia-ar-feature.cpp"
+		"source/nvidia/cuda/nvidia-cuda.hpp"
+		"source/nvidia/cuda/nvidia-cuda.cpp"
+		"source/nvidia/cuda/nvidia-cuda-context.hpp"
+		"source/nvidia/cuda/nvidia-cuda-context.cpp"
+		"source/nvidia/cuda/nvidia-cuda-context-stack.hpp"
+		"source/nvidia/cuda/nvidia-cuda-context-stack.cpp"
+		"source/nvidia/cuda/nvidia-cuda-gs-texture.hpp"
+		"source/nvidia/cuda/nvidia-cuda-gs-texture.cpp"
+		"source/nvidia/cuda/nvidia-cuda-memory.hpp"
+		"source/nvidia/cuda/nvidia-cuda-memory.cpp"
+		"source/nvidia/cuda/nvidia-cuda-stream.hpp"
+		"source/nvidia/cuda/nvidia-cuda-stream.cpp"
+	)
+	list(APPEND PROJECT_LIBRARIES
+		nvARProxy
+	)
+	list(APPEND PROJECT_INCLUDE_DIRS
+	)
+	list(APPEND PROJECT_DEFINITIONS
+		ENABLE_FILTER_NVIDIA_FACE_TRACKING
+	)
+endif()
+
 ## Features - SDF Effects Filter
 if(${PropertyPrefix}ENABLE_FILTER_SDF_EFFECTS)
 	list(APPEND PROJECT_DATA
@ -743,7 +779,7 @@ if(WIN32)
 			NOATOM
 			NOCLIPBOARD
 			NOCOLOR
-			NOCTLMGR
+			#NOCTLMGR
 			NODRAWTEXT
 			#NOGDI
 			NOKERNEL
@ -763,7 +799,7 @@ if(WIN32)
 			NOWINOFFSETS
 			NOCOMM
 			NOKANJI
-			NOHELP
+			#NOHELP
 			NOPROFILER
 			NODEFERWINDOWPOS
 			NOMCX
--- a/cmake/modules/FindNVAR.cmake
+++ b/cmake/modules/FindNVAR.cmake
@ -0,0 +1,59 @@
+# Nvidia AR SDK
+# 
+# Sets
+# - NVAR_FOUND
+# - NVAR_INCLUDE_DIRS
+# - NVAR_SOURCE_DIRS
+#
+#
+
+include(FindPackageHandleStandardArgs)
+find_package(PkgConfig QUIET)
+
+# Variables
+set(NVAR_ROOT "" CACHE PATH "Path to NVidia AR SDK")
+
+find_path(NVAR_INCLUDE_DIRS
+	NAMES
+		"nvAR.h" "nvAR_defs.h"
+	HINTS
+		ENV NVAR_ROOT
+		${NVAR_ROOT}
+	PATHS
+		/usr/include
+		/usr/local/include
+		/opt/local/include
+	PATH_SUFFIXES
+		include
+		nvar/include
+)
+find_path(NVAR_SOURCE_DIRS
+	NAMES
+		"nvARProxy.cpp"
+	HINTS
+		ENV NVAR_ROOT
+		${NVAR_ROOT}
+	PATHS
+		/usr/include
+		/usr/local/include
+		/opt/local/include
+	PATH_SUFFIXES
+		src
+		nvar/src
+)
+
+find_package_handle_standard_args(NVAR
+	FOUND_VAR NVAR_FOUND
+	REQUIRED_VARS NVAR_INCLUDE_DIRS NVAR_SOURCE_DIRS
+	VERSION_VAR NVAR_VERSION
+	HANDLE_COMPONENTS
+)
+
+if(NVAR_FOUND AND NOT TARGET nvARProxy)
+	add_library(nvARProxy INTERFACE)
+	target_include_directories(nvARProxy
+		INTERFACE
+			${NVAR_SOURCE_DIRS}
+			${NVAR_INCLUDE_DIRS}
+	)
+endif()
--- a/data/effects/pack-unpack.effect
+++ b/data/effects/pack-unpack.effect
@ -23,8 +23,8 @@ VertData vertex_program(VertData vd)
 // -------------------------------------------------------------------------------- //
 // Helpers
 // -------------------------------------------------------------------------------- //
-uint get_selector(VertData vd, int width) {
-	return uint(vd.uv.x * size * width) % width;
+uint get_selector(VertData vd, int components) {
+	return uint(vd.uv.x * size * components) % components;
 }

 // -------------------------------------------------------------------------------- //
--- a/data/locale/en-US.ini
+++ b/data/locale/en-US.ini
@ -191,6 +191,19 @@ Filter.DynamicMask.Channel.Multiplier.Description="TODO"
 Filter.DynamicMask.Channel.Input="%s Input Value"
 Filter.DynamicMask.Channel.Input.Description="TODO"

+# Filter - Nvidia Face Tracking
+Filter.Nvidia.FaceTracking="Nvidia Face Tracking"
+Filter.Nvidia.FaceTracking.ROI="Region of Interest"
+Filter.Nvidia.FaceTracking.ROI.Zoom="Zoom"
+Filter.Nvidia.FaceTracking.ROI.Zoom.Description="Restrict the maximum zoom level based on the current maximum and minimum zoom level.\nValues above 100% zoom into the face, while values below 100% will keep their distance from the face."
+Filter.NVidia.FaceTracking.ROI.Offset="Offset"
+Filter.NVidia.FaceTracking.ROI.Offset.X="X"
+Filter.NVidia.FaceTracking.ROI.Offset.X.Description="Horizontal offset relative to center of the detected face."
+Filter.Nvidia.FaceTracking.ROI.Offset.Y="Y"
+Filter.NVidia.FaceTracking.ROI.Offset.Y.Description="Vertical offset relative to center of the detected face."
+Filter.Nvidia.FaceTracking.ROI.Stability="Stability"
+Filter.NVidia.FaceTracking.ROI.Stability.Description="Controls the responsiveness of the tracking filter to filter out noisy and/or bad results.\nValues closer to 0% will be quicker but more noisy, while values closer to 100% will be slower but noise free.\nDue to unique noise patterns of modern Webcams, there is no universal setting for this."
+
 # Filter - SDF Effects
 Filter.SDFEffects="Signed Distance Field Effects"
 Filter.SDFEffects.Shadow.Inner="Inner Shadow"
--- a/source/filters/filter-nv-face-tracking.cpp
+++ b/source/filters/filter-nv-face-tracking.cpp
@ -0,0 +1,576 @@
+/*
+ * Modern effects for a modern Streamer
+ * Copyright (C) 2017 Michael Fabian Dirks
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA
+ */
+
+#include "filter-nv-face-tracking.hpp"
+#include <algorithm>
+#include <filesystem>
+#include <util/platform.h>
+#include "nvidia/cuda/nvidia-cuda-context-stack.hpp"
+#include "obs/gs/gs-helper.hpp"
+#include "utility.hpp"
+
+#define ST "Filter.Nvidia.FaceTracking"
+#define ST_ROI "Filter.Nvidia.FaceTracking.ROI"
+#define ST_ROI_ZOOM "Filter.Nvidia.FaceTracking.ROI.Zoom"
+#define SK_ROI_ZOOM "ROI.Zoom"
+#define ST_ROI_OFFSET "Filter.Nvidia.FaceTracking.ROI.Offset"
+#define ST_ROI_OFFSET_X "Filter.Nvidia.FaceTracking.ROI.Offset.X"
+#define SK_ROI_OFFSET_X "ROI.Offset.X"
+#define ST_ROI_OFFSET_Y "Filter.Nvidia.FaceTracking.ROI.Offset.Y"
+#define SK_ROI_OFFSET_Y "ROI.Offset.Y"
+#define ST_ROI_STABILITY "Filter.Nvidia.FaceTracking.ROI.Stability"
+#define SK_ROI_STABILITY "ROI.Stability"
+
+void nvar_deleter(NvAR_FeatureHandle v)
+{
+	filter::nvidia::face_tracking_factory::get()->get_ar()->destroy(v);
+}
+
+filter::nvidia::face_tracking_instance::face_tracking_instance(obs_data_t* settings, obs_source_t* self)
+	: obs::source_instance(settings, self), _width(), _height(), _up_to_date(false), _rt(), _cfg_roi_zoom(1.0),
+	  _cfg_roi_offset({0., 0.}), _cfg_roi_stability(1.0), _roi_center(), _roi_size(), _roi_geom(4, 1),
+	  _cuda(face_tracking_factory::get()->get_cuda()), _cuda_ctx(face_tracking_factory::get()->get_cuda_context()),
+	  _cuda_stream(), _cuda_mem(), _cuda_flush_cache(true), _ar(face_tracking_factory::get()->get_ar()),
+	  _ar_models_path(), _ar_tracker(), _ar_ready(false), _ar_bboxes_data(), _ar_bboxes(), _ar_bboxes_confidence(),
+	  _ar_image(), _ar_image_bgr(), _ar_image_temp()
+{
+	// Create rendertarget for parent source storage.
+	{
+		auto gctx = gs::context{};
+		_rt       = std::make_shared<gs::rendertarget>(GS_RGBA, GS_ZS_NONE);
+	}
+
+	// Figure out where the AR SDK Models are stored.
+	{
+		std::filesystem::path models_path = _ar->get_ar_sdk_path();
+		models_path                       = models_path.append("models");
+		models_path                       = std::filesystem::absolute(models_path);
+		models_path.concat("\\");
+		_ar_models_path = models_path.string();
+	}
+
+	// Initialize everything.
+	{
+		auto cctx    = std::make_shared<::nvidia::cuda::context_stack>(_cuda, _cuda_ctx);
+		_cuda_stream = std::make_shared<::nvidia::cuda::stream>(_cuda);
+		face_detection_initialize();
+	}
+
+#ifdef _DEBUG
+	// Profiling
+	_profile_capture       = util::profiler::create();
+	_profile_cuda_register = util::profiler::create();
+	_profile_cuda_copy     = util::profiler::create();
+	_profile_ar_transfer   = util::profiler::create();
+	_profile_ar_run        = util::profiler::create();
+#endif
+}
+
+filter::nvidia::face_tracking_instance::~face_tracking_instance()
+{
+	_ar->image_dealloc(&_ar_image_temp);
+	_ar->image_dealloc(&_ar_image_bgr);
+}
+
+void filter::nvidia::face_tracking_instance::face_detection_initialize()
+{
+	// Create
+	NvAR_FeatureHandle fd_inst;
+	if (NvCV_Status res = _ar->create(NvAR_Feature_FaceDetection, &fd_inst); res != NVCV_SUCCESS) {
+		throw std::runtime_error("Failed to create Face Detection feature.");
+	}
+	_ar_tracker = std::shared_ptr<nvAR_Feature>{fd_inst, nvar_deleter};
+
+	// Configuration
+	if (NvCV_Status res = _ar->set_cuda_stream(fd_inst, NvAR_Parameter_Config(CUDAStream),
+											   reinterpret_cast<CUstream>(_cuda_stream->get()));
+		res != NVCV_SUCCESS) {
+		throw std::runtime_error("");
+	}
+	if (NvCV_Status res = _ar->set_string(fd_inst, NvAR_Parameter_Config(ModelDir), _ar_models_path.c_str());
+		res != NVCV_SUCCESS) {
+		throw std::runtime_error("");
+	}
+	if (NvCV_Status res = _ar->set_uint32(fd_inst, NvAR_Parameter_Config(Temporal), 1); res != NVCV_SUCCESS) {
+		throw std::runtime_error("");
+	}
+
+	// Create Bounding Boxes Data
+	_ar_bboxes_data.assign(1, {0., 0., 0., 0.});
+	_ar_bboxes.boxes     = _ar_bboxes_data.data();
+	_ar_bboxes.max_boxes = std::clamp<std::uint8_t>(static_cast<std::uint8_t>(_ar_bboxes_data.size()), 0, 255);
+	_ar_bboxes.num_boxes = 0;
+	_ar_bboxes_confidence.resize(_ar_bboxes_data.size());
+
+	if (NvCV_Status res =
+			_ar->set_object(_ar_tracker.get(), NvAR_Parameter_Output(BoundingBoxes), &_ar_bboxes, sizeof(NvAR_BBoxes));
+		res != NVCV_SUCCESS) {
+		throw std::runtime_error("Failed to set BoundingBoxes for Face Tracking feature.");
+	}
+
+	if (NvCV_Status res =
+			_ar->set_float32_array(_ar_tracker.get(), NvAR_Parameter_Output(BoundingBoxesConfidence),
+								   _ar_bboxes_confidence.data(), static_cast<int>(_ar_bboxes_confidence.size()));
+		res != NVCV_SUCCESS) {
+		throw std::runtime_error("Failed to set BoundingBoxesConfidence for Face Tracking feature.");
+	}
+
+	// Push to extra thread to not block OBS Studio.
+	obs_source_addref(_self);
+	::get_global_threadpool()->push(std::bind(&filter::nvidia::face_tracking_instance::face_detection_initialize_thread,
+											  this, std::placeholders::_1),
+									nullptr);
+}
+
+void filter::nvidia::face_tracking_instance::face_detection_initialize_thread(std::shared_ptr<void> param)
+{
+	auto cctx = std::make_shared<::nvidia::cuda::context_stack>(_cuda, _cuda_ctx);
+	if (NvCV_Status res = _ar->load(_ar_tracker.get()); res != NVCV_SUCCESS) {
+		_ar_fail = true;
+	}
+	_ar_ready = true;
+	obs_source_release(_self);
+}
+
+void filter::nvidia::face_tracking_instance::create_image_buffer(std::size_t width, std::size_t height)
+{
+	auto cctx = std::make_shared<::nvidia::cuda::context_stack>(_cuda, _cuda_ctx);
+
+	// Create CUDA and AR interop.
+	size_t pitch = width * 4;
+	_cuda_mem    = std::make_shared<::nvidia::cuda::memory>(_cuda, pitch * height);
+	_ar->image_init(&_ar_image, static_cast<unsigned int>(width), static_cast<unsigned int>(height),
+					static_cast<int>(pitch), reinterpret_cast<void*>(_cuda_mem->get()), NVCV_RGBA, NVCV_U8,
+					NVCV_INTERLEAVED, NVCV_CUDA);
+	_ar->image_dealloc(&_ar_image_bgr);
+	_ar->image_alloc(&_ar_image_bgr, static_cast<unsigned int>(width), static_cast<unsigned int>(height), NVCV_BGR,
+					 NVCV_U8, NVCV_INTERLEAVED, NVCV_CUDA, 0);
+
+	if (NvCV_Status res =
+			_ar->set_object(_ar_tracker.get(), NvAR_Parameter_Input(Image), &_ar_image_bgr, sizeof(NvCVImage));
+		res != NVCV_SUCCESS) {
+		throw std::runtime_error("_ar_tracker NvAR_Parameter_Input(Image)");
+	}
+}
+
+void filter::nvidia::face_tracking_instance::roi_refresh()
+{
+	double_t kalman_q = util::math::lerp<double_t>(1.0, 1e-6, _cfg_roi_stability);
+	double_t kalman_r = util::math::lerp<double_t>(std::numeric_limits<double_t>::epsilon(), 1e+2, _cfg_roi_stability);
+
+	_roi_filters[0] = util::math::kalman1D<double_t>{kalman_q, kalman_r, 1.0, _roi_center.first};
+	_roi_filters[1] = util::math::kalman1D<double_t>{kalman_q, kalman_r, 1.0, _roi_center.second};
+	_roi_filters[2] = util::math::kalman1D<double_t>{kalman_q, kalman_r, 1.0, _roi_size.first};
+	_roi_filters[3] = util::math::kalman1D<double_t>{kalman_q, kalman_r, 1.0, _roi_size.second};
+}
+
+void filter::nvidia::face_tracking_instance::roi_reset()
+{
+	_roi_center.first  = static_cast<double_t>(_width) / 2.;
+	_roi_center.second = static_cast<double_t>(_height) / 2.;
+	_roi_size.first    = static_cast<double_t>(_width);
+	_roi_size.second   = static_cast<double_t>(_height);
+
+	roi_refresh();
+}
+
+void filter::nvidia::face_tracking_instance::load(obs_data_t* data)
+{
+	update(data);
+}
+
+void filter::nvidia::face_tracking_instance::update(obs_data_t* data)
+{
+	_cfg_roi_zoom          = obs_data_get_double(data, SK_ROI_ZOOM) / 100.0;
+	_cfg_roi_offset.first  = obs_data_get_double(data, SK_ROI_OFFSET_X) / 100.0;
+	_cfg_roi_offset.second = obs_data_get_double(data, SK_ROI_OFFSET_Y) / 100.0;
+	_cfg_roi_stability     = obs_data_get_double(data, SK_ROI_STABILITY) / 100.0;
+
+	// Refresh the Region Of Interest
+	roi_refresh();
+}
+
+void filter::nvidia::face_tracking_instance::video_tick(float seconds)
+{
+	if (!_ar_ready)
+		return;
+
+	// Update Buffers
+	uint32_t width  = obs_source_get_width(_self);
+	uint32_t height = obs_source_get_height(_self);
+	if ((width != _width) || (height != _height))
+		try {
+			// Recreate things.
+			create_image_buffer(width, height);
+			_cuda_flush_cache = true;
+
+			// Update Width/Height
+			_width  = width;
+			_height = height;
+
+			// Reset ROI.
+			roi_reset();
+		} catch (const std::exception& ex) {
+			LOG_ERROR("Error: %s", ex.what());
+		}
+
+	_up_to_date = false;
+}
+
+void filter::nvidia::face_tracking_instance::video_render(gs_effect_t* effect)
+{
+	gs::debug_marker gdm_main{gs::debug_color_source, "%s", obs_source_get_name(_self)};
+	obs_source_t*    filter_parent  = obs_filter_get_parent(_self);
+	obs_source_t*    filter_target  = obs_filter_get_target(_self);
+	gs_effect_t*     default_effect = obs_get_base_effect(OBS_EFFECT_DEFAULT);
+
+	if (!filter_parent || !filter_target || !_width || !_height || !_ar_ready) {
+		obs_source_skip_video_filter(_self);
+		return;
+	}
+
+	if (!_up_to_date) {
+		{ // Capture the filter stack "below" us.
+#ifdef _DEBUG
+			auto prof = _profile_capture->track();
+#endif
+			gs::debug_marker marker{gs::debug_color_render, "%s: Capture", obs_source_get_name(_self)};
+			if (obs_source_process_filter_begin(_self, _rt->get_color_format(), OBS_ALLOW_DIRECT_RENDERING)) {
+				auto op  = _rt->render(_width, _height);
+				vec4 clr = {0., 0., 0., 0.};
+
+				gs_ortho(0, static_cast<float_t>(_width), 0, static_cast<float_t>(_height), 0, 1);
+				gs_clear(GS_CLEAR_COLOR, &clr, 0, 0);
+
+				obs_source_process_filter_tech_end(_self, default_effect, _width, _height, "Draw");
+			} else {
+				obs_source_skip_video_filter(_self);
+				return;
+			}
+		}
+
+		{
+			gs::debug_marker marker{gs::debug_color_render, "%s: Nvidia AR SDK", obs_source_get_name(_self)};
+			auto             cctx = std::make_shared<::nvidia::cuda::context_stack>(_cuda, _cuda_ctx);
+
+			if (_cuda_flush_cache) {
+#ifdef _DEBUG
+				auto prof = _profile_cuda_register->track();
+#endif
+				_cuda_rt_cache = std::make_shared<::nvidia::cuda::gstexture>(_cuda, _rt->get_texture());
+				_cuda_rt_cache->map(_cuda_stream);
+				_cuda_flush_cache = false;
+			}
+
+			{
+#ifdef _DEBUG
+				auto prof = _profile_cuda_copy->track();
+#endif
+				::nvidia::cuda::cu_memcpy2d_t mc;
+				mc.src_x_in_bytes  = 0;
+				mc.src_y           = 0;
+				mc.src_memory_type = ::nvidia::cuda::cu_memory_type::ARRAY;
+				mc.src_host        = nullptr;
+				mc.src_device      = 0;
+				mc.src_array       = _cuda_rt_cache->map(_cuda_stream);
+				mc.src_pitch       = static_cast<size_t>(_ar_image.pitch);
+				mc.dst_x_in_bytes  = 0;
+				mc.dst_y           = 0;
+				mc.dst_memory_type = ::nvidia::cuda::cu_memory_type::DEVICE;
+				mc.dst_host        = 0;
+				mc.dst_device      = reinterpret_cast<::nvidia::cuda::cu_device_ptr_t>(_ar_image.pixels);
+				mc.dst_array       = 0;
+				mc.dst_pitch       = static_cast<size_t>(_ar_image.pitch);
+				mc.width_in_bytes  = static_cast<size_t>(_ar_image.pitch);
+				mc.height          = _ar_image.height;
+
+				if (::nvidia::cuda::cu_result res = _cuda->cuMemcpy2D(&mc); res != ::nvidia::cuda::cu_result::SUCCESS) {
+					obs_source_skip_video_filter(_self);
+					return;
+				}
+			}
+
+			{
+#ifdef _DEBUG
+				auto prof = _profile_ar_transfer->track();
+#endif
+				if (NvCV_Status res =
+						_ar->image_transfer(&_ar_image, &_ar_image_bgr, 1.0,
+											reinterpret_cast<CUstream_st*>(_cuda_stream->get()), &_ar_image_temp);
+					res != NVCV_SUCCESS) {
+					obs_source_skip_video_filter(_self);
+					return;
+				}
+			}
+
+			{
+#ifdef _DEBUG
+				auto prof = _profile_ar_run->track();
+#endif
+				if (NvCV_Status res = _ar->run(_ar_tracker.get()); res != NVCV_SUCCESS) {
+					obs_source_skip_video_filter(_self);
+					return;
+				}
+			}
+		}
+
+		// Recalculate the region of interest.
+		if (_ar_bboxes.num_boxes > 0) {
+			double_t aspect = double_t(_width) / double_t(_height);
+
+			// Store values and center.
+			double_t bbox_w  = _ar_bboxes.boxes[0].width;
+			double_t bbox_h  = _ar_bboxes.boxes[0].height;
+			double_t bbox_cx = _ar_bboxes.boxes[0].x + bbox_w / 2.0;
+			double_t bbox_cy = _ar_bboxes.boxes[0].y + bbox_h / 2.0;
+
+			// Zoom, Aspect Ratio, Offset
+			bbox_h = util::math::lerp<double_t>(_height, bbox_h, _cfg_roi_zoom);
+			bbox_h = std::clamp(bbox_h, 10 * aspect, static_cast<double_t>(_height));
+			bbox_w = bbox_h * aspect;
+			bbox_cx += _ar_bboxes.boxes[0].width * _cfg_roi_offset.first;
+			bbox_cy += _ar_bboxes.boxes[0].height * _cfg_roi_offset.second;
+
+			// Fit back into the frame
+			// - Above code guarantees that height is never bigger than the height of the frame.
+			// - Which also guarantees that width is never bigger than the width of the frame.
+			// Only cx and cy need to be adjusted now to always be in the frame.
+			bbox_cx = std::clamp(bbox_cx, (bbox_w / 2.), static_cast<double_t>(_width) - (bbox_w / 2.));
+			bbox_cy = std::clamp(bbox_cy, (bbox_h / 2.), static_cast<double_t>(_height) - (bbox_h / 2.));
+
+			// Filter values
+			auto size_w   = _roi_filters[2].filter(bbox_w);
+			auto size_h   = _roi_filters[3].filter(bbox_h);
+			auto center_x = _roi_filters[0].filter(bbox_cx);
+			auto center_y = _roi_filters[1].filter(bbox_cy);
+
+			// Fix NaN/Infinity
+			if (std::isfinite(size_w) && std::isfinite(size_h) && std::isfinite(center_x) && std::isfinite(center_y)) {
+				_roi_center.first  = center_x;
+				_roi_center.second = center_y;
+				_roi_size.first    = size_w;
+				_roi_size.second   = size_h;
+			} else {
+				roi_refresh();
+			}
+		} else {
+			// Todo: Time based return to full frame.
+		}
+
+		// Update Region of Interest Geometry.
+		{
+			auto v0 = _roi_geom.at(0);
+			auto v1 = _roi_geom.at(1);
+			auto v2 = _roi_geom.at(2);
+			auto v3 = _roi_geom.at(3);
+
+			*v0.color = 0xFFFFFFFF;
+			*v1.color = 0xFFFFFFFF;
+			*v2.color = 0xFFFFFFFF;
+			*v3.color = 0xFFFFFFFF;
+
+			vec3_set(v3.position, static_cast<float_t>(_width), static_cast<float_t>(_height), 0.);
+			vec3_set(v2.position, v3.position->x, 0., 0.);
+			vec3_set(v1.position, 0., v3.position->y, 0.);
+			vec3_set(v0.position, 0., 0., 0.);
+
+			vec4_set(
+				v0.uv[0],
+				static_cast<float_t>((_roi_center.first - _roi_size.first / 2.) / static_cast<double_t>(_width)),
+				static_cast<float_t>((_roi_center.second - _roi_size.second / 2.) / static_cast<double_t>(_height)), 0.,
+				0.);
+			vec4_set(
+				v1.uv[0],
+				static_cast<float_t>((_roi_center.first - _roi_size.first / 2.) / static_cast<double_t>(_width)),
+				static_cast<float_t>((_roi_center.second + _roi_size.second / 2.) / static_cast<double_t>(_height)), 0.,
+				0.);
+			vec4_set(
+				v2.uv[0],
+				static_cast<float_t>((_roi_center.first + _roi_size.first / 2.) / static_cast<double_t>(_width)),
+				static_cast<float_t>((_roi_center.second - _roi_size.second / 2.) / static_cast<double_t>(_height)), 0.,
+				0.);
+			vec4_set(
+				v3.uv[0],
+				static_cast<float_t>((_roi_center.first + _roi_size.first / 2.) / static_cast<double_t>(_width)),
+				static_cast<float_t>((_roi_center.second + _roi_size.second / 2.) / static_cast<double_t>(_height)), 0.,
+				0.);
+
+			_roi_geom.update();
+		}
+
+		_up_to_date = true;
+	}
+
+	// Draw Texture
+	gs_effect_set_texture(gs_effect_get_param_by_name(effect ? effect : default_effect, "image"),
+						  _rt->get_texture()->get_object());
+	gs_load_vertexbuffer(_roi_geom.update());
+	while (gs_effect_loop(effect ? effect : default_effect, "Draw")) {
+		gs_draw(gs_draw_mode::GS_TRISTRIP, 0, _roi_geom.size());
+	}
+	gs_load_vertexbuffer(nullptr);
+}
+
+#ifdef _DEBUG
+bool filter::nvidia::face_tracking_instance::button_profile(obs_properties_t* props, obs_property_t* property)
+{
+	LOG_INFO("Profiling (Total/Avg/99.9/95)");
+	LOG_INFO("  %-12s: %8lldµs %8lldµs %8lldµs %8lldµs", "Capture",
+			 std::chrono::duration_cast<std::chrono::microseconds>(_profile_capture->total_duration()).count(),
+			 static_cast<std::int64_t>(_profile_capture->average_duration() / 1000.0),
+			 std::chrono::duration_cast<std::chrono::microseconds>(_profile_capture->percentile(0.999)).count(),
+			 std::chrono::duration_cast<std::chrono::microseconds>(_profile_capture->percentile(0.95)).count());
+	LOG_INFO("  %-12s: %8lldµs %8lldµs %8lldµs %8lldµs", "Register",
+			 std::chrono::duration_cast<std::chrono::microseconds>(_profile_cuda_register->total_duration()).count(),
+			 static_cast<std::int64_t>(_profile_cuda_register->average_duration() / 1000.0),
+			 std::chrono::duration_cast<std::chrono::microseconds>(_profile_cuda_register->percentile(0.999)).count(),
+			 std::chrono::duration_cast<std::chrono::microseconds>(_profile_cuda_register->percentile(0.95)).count());
+	LOG_INFO("  %-12s: %8lldµs %8lldµs %8lldµs %8lldµs", "Copy",
+			 std::chrono::duration_cast<std::chrono::microseconds>(_profile_cuda_copy->total_duration()).count(),
+			 static_cast<std::int64_t>(_profile_cuda_copy->average_duration() / 1000.0),
+			 std::chrono::duration_cast<std::chrono::microseconds>(_profile_cuda_copy->percentile(0.999)).count(),
+			 std::chrono::duration_cast<std::chrono::microseconds>(_profile_cuda_copy->percentile(0.95)).count());
+	LOG_INFO("  %-12s: %8lldµs %8lldµs %8lldµs %8lldµs", "Transfer",
+			 std::chrono::duration_cast<std::chrono::microseconds>(_profile_ar_transfer->total_duration()).count(),
+			 static_cast<std::int64_t>(_profile_capture->average_duration() / 1000.0),
+			 std::chrono::duration_cast<std::chrono::microseconds>(_profile_ar_transfer->percentile(0.999)).count(),
+			 std::chrono::duration_cast<std::chrono::microseconds>(_profile_ar_transfer->percentile(0.95)).count());
+	LOG_INFO("  %-12s: %8lldµs %8lldµs %8lldµs %8lldµs", "Run",
+			 std::chrono::duration_cast<std::chrono::microseconds>(_profile_ar_run->total_duration()).count(),
+			 static_cast<std::int64_t>(_profile_ar_run->average_duration() / 1000.0),
+			 std::chrono::duration_cast<std::chrono::microseconds>(_profile_ar_run->percentile(0.999)).count(),
+			 std::chrono::duration_cast<std::chrono::microseconds>(_profile_ar_run->percentile(0.95)).count());
+
+	return false;
+}
+#endif
+
+std::shared_ptr<filter::nvidia::face_tracking_factory> filter::nvidia::face_tracking_factory::factory_instance =
+	nullptr;
+
+filter::nvidia::face_tracking_factory::face_tracking_factory()
+{
+	// Try and load CUDA.
+	_cuda = std::make_shared<::nvidia::cuda::cuda>();
+
+	// Try and load AR.
+	_ar = std::make_shared<::nvidia::ar::ar>();
+
+	// Initialize CUDA
+	{
+		auto gctx = gs::context{};
+#ifdef WIN32
+		if (gs_get_device_type() == GS_DEVICE_DIRECT3D_11) {
+			_cuda_ctx =
+				std::make_shared<::nvidia::cuda::context>(_cuda, reinterpret_cast<ID3D11Device*>(gs_get_device_obj()));
+		}
+#endif
+		if (gs_get_device_type() == GS_DEVICE_OPENGL) {
+			throw std::runtime_error("OpenGL not supported.");
+		}
+	}
+
+	// Info
+	_info.id           = "streamfx-nvidia-face-tracking";
+	_info.type         = OBS_SOURCE_TYPE_FILTER;
+	_info.output_flags = OBS_SOURCE_VIDEO;
+
+	set_resolution_enabled(false);
+	finish_setup();
+}
+
+filter::nvidia::face_tracking_factory::~face_tracking_factory() {}
+
+const char* filter::nvidia::face_tracking_factory::get_name()
+{
+	return D_TRANSLATE(ST);
+}
+
+void filter::nvidia::face_tracking_factory::get_defaults2(obs_data_t* data)
+{
+	obs_data_set_default_double(data, SK_ROI_ZOOM, 50.0);
+	obs_data_set_default_double(data, SK_ROI_OFFSET_X, 0.0);
+	obs_data_set_default_double(data, SK_ROI_OFFSET_Y, -15.0);
+	obs_data_set_default_double(data, SK_ROI_STABILITY, 50.0);
+}
+
+obs_properties_t* filter::nvidia::face_tracking_factory::get_properties2(filter::nvidia::face_tracking_instance* data)
+{
+	obs_properties_t* pr = obs_properties_create();
+
+	{
+		auto grp = obs_properties_create();
+		obs_properties_add_group(pr, ST_ROI, D_TRANSLATE(ST_ROI), OBS_GROUP_NORMAL, grp);
+		{
+			auto p =
+				obs_properties_add_float_slider(grp, SK_ROI_STABILITY, D_TRANSLATE(ST_ROI_STABILITY), 0, 100.0, 0.01);
+			obs_property_set_long_description(p, D_TRANSLATE(D_DESC(ST_ROI_STABILITY)));
+			obs_property_float_set_suffix(p, " %");
+		}
+		{
+			auto p = obs_properties_add_float_slider(grp, SK_ROI_ZOOM, D_TRANSLATE(ST_ROI_ZOOM), 0, 200.0, 0.01);
+			obs_property_set_long_description(p, D_TRANSLATE(D_DESC(ST_ROI_ZOOM)));
+			obs_property_float_set_suffix(p, " %");
+		}
+		{
+			auto grp2 = obs_properties_create();
+			obs_properties_add_group(grp, ST_ROI_OFFSET, D_TRANSLATE(ST_ROI_OFFSET), OBS_GROUP_NORMAL, grp2);
+
+			{
+				auto p = obs_properties_add_float_slider(grp2, SK_ROI_OFFSET_X, D_TRANSLATE(ST_ROI_OFFSET_X), -50.0,
+														 50.0, 0.01);
+				obs_property_set_long_description(p, D_TRANSLATE(D_DESC(ST_ROI_OFFSET_X)));
+				obs_property_float_set_suffix(p, " %");
+			}
+			{
+				auto p = obs_properties_add_float_slider(grp2, SK_ROI_OFFSET_Y, D_TRANSLATE(ST_ROI_OFFSET_Y), -50.0,
+														 50.0, 0.01);
+				obs_property_set_long_description(p, D_TRANSLATE(D_DESC(ST_ROI_OFFSET_Y)));
+				obs_property_float_set_suffix(p, " %");
+			}
+		}
+	}
+#ifdef _DEBUG
+	{
+		auto p = obs_properties_add_button2(
+			pr, "Profile", "Profile",
+			[](obs_properties_t* props, obs_property_t* property, void* data) {
+				return reinterpret_cast<filter::nvidia::face_tracking_instance*>(data)->button_profile(props, property);
+			},
+			data);
+	}
+#endif
+
+	return pr;
+}
+
+std::shared_ptr<::nvidia::cuda::cuda> filter::nvidia::face_tracking_factory::get_cuda()
+{
+	return _cuda;
+}
+
+std::shared_ptr<::nvidia::cuda::context> filter::nvidia::face_tracking_factory::get_cuda_context()
+{
+	return _cuda_ctx;
+}
+
+std::shared_ptr<::nvidia::ar::ar> filter::nvidia::face_tracking_factory::get_ar()
+{
+	return _ar;
+}
--- a/source/filters/filter-nv-face-tracking.hpp
+++ b/source/filters/filter-nv-face-tracking.hpp
@ -0,0 +1,159 @@
+/*
+ * Modern effects for a modern Streamer
+ * Copyright (C) 2017 Michael Fabian Dirks
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA
+ */
+
+#pragma once
+#include <atomic>
+#include <memory>
+#include <vector>
+#include "obs/gs/gs-effect.hpp"
+#include "obs/gs/gs-rendertarget.hpp"
+#include "obs/gs/gs-vertexbuffer.hpp"
+#include "obs/obs-source-factory.hpp"
+#include "plugin.hpp"
+#include "util-profiler.hpp"
+
+// Nvidia
+#include "nvidia/ar/nvidia-ar.hpp"
+#include "nvidia/cuda/nvidia-cuda-context.hpp"
+#include "nvidia/cuda/nvidia-cuda-gs-texture.hpp"
+#include "nvidia/cuda/nvidia-cuda-memory.hpp"
+#include "nvidia/cuda/nvidia-cuda-stream.hpp"
+#include "nvidia/cuda/nvidia-cuda.hpp"
+
+namespace filter::nvidia {
+	class face_tracking_instance : public obs::source_instance {
+		// Filter Cache
+		std::uint32_t                     _width;
+		std::uint32_t                     _height;
+		bool                              _up_to_date;
+		std::shared_ptr<gs::rendertarget> _rt;
+
+		// Settings
+		double_t                      _cfg_roi_zoom;
+		std::pair<double_t, double_t> _cfg_roi_offset;
+		double_t                      _cfg_roi_stability;
+
+		// Region of Interest
+		util::math::kalman1D<double_t> _roi_filters[4];
+		std::pair<double_t, double_t>  _roi_center;
+		std::pair<double_t, double_t>  _roi_size;
+		gs::vertex_buffer              _roi_geom;
+
+		// Nvidia CUDA interop
+		std::shared_ptr<::nvidia::cuda::cuda>      _cuda;
+		std::shared_ptr<::nvidia::cuda::context>   _cuda_ctx;
+		std::shared_ptr<::nvidia::cuda::stream>    _cuda_stream;
+		std::shared_ptr<::nvidia::cuda::memory>    _cuda_mem;
+		bool                                       _cuda_flush_cache;
+		std::shared_ptr<::nvidia::cuda::gstexture> _cuda_rt_cache;
+
+		// Nvidia AR interop
+		std::shared_ptr<::nvidia::ar::ar> _ar;
+		std::string                       _ar_models_path;
+		std::shared_ptr<nvAR_Feature>     _ar_tracker;
+		std::atomic_bool                  _ar_ready;
+		std::atomic_bool                  _ar_fail;
+		std::vector<NvAR_Rect>            _ar_bboxes_data;
+		NvAR_BBoxes                       _ar_bboxes;
+		std::vector<float_t>              _ar_bboxes_confidence;
+		NvCVImage                         _ar_image;
+		NvCVImage                         _ar_image_bgr;
+		NvCVImage                         _ar_image_temp;
+
+#ifdef _DEBUG
+		// Profiling
+		std::shared_ptr<util::profiler> _profile_capture;
+		std::shared_ptr<util::profiler> _profile_cuda_register;
+		std::shared_ptr<util::profiler> _profile_cuda_copy;
+		std::shared_ptr<util::profiler> _profile_ar_transfer;
+		std::shared_ptr<util::profiler> _profile_ar_run;
+#endif
+
+		public:
+		face_tracking_instance(obs_data_t*, obs_source_t*);
+		virtual ~face_tracking_instance() override;
+
+		// Initialize face detection.
+		void face_detection_initialize();
+
+		void face_detection_initialize_thread(std::shared_ptr<void> param);
+
+		// Create image buffer.
+		void create_image_buffer(std::size_t width, std::size_t height);
+
+		void roi_refresh();
+
+		void roi_reset();
+
+		virtual void load(obs_data_t* data) override;
+
+		virtual void update(obs_data_t* data) override;
+
+		virtual void video_tick(float seconds) override;
+
+		virtual void video_render(gs_effect_t* effect) override;
+
+#ifdef _DEBUG
+		bool button_profile(obs_properties_t* props, obs_property_t* property);
+#endif
+	};
+
+	class face_tracking_factory
+		: public obs::source_factory<filter::nvidia::face_tracking_factory, filter::nvidia::face_tracking_instance> {
+		static std::shared_ptr<filter::nvidia::face_tracking_factory> factory_instance;
+
+		std::shared_ptr<::nvidia::cuda::cuda>    _cuda;
+		std::shared_ptr<::nvidia::cuda::context> _cuda_ctx;
+		std::shared_ptr<::nvidia::ar::ar>        _ar;
+
+		public: // Singleton
+		static void initialize()
+		try {
+			factory_instance = std::make_shared<filter::nvidia::face_tracking_factory>();
+		} catch (const std::exception& ex) {
+			LOG_ERROR("<Nvidia Face Tracking Filter> %s", ex.what());
+		}
+
+		static void finalize()
+		{
+			factory_instance.reset();
+		}
+
+		static std::shared_ptr<face_tracking_factory> get()
+		{
+			return factory_instance;
+		}
+
+		public:
+		face_tracking_factory();
+		virtual ~face_tracking_factory() override;
+
+		virtual const char* get_name() override;
+
+		virtual void get_defaults2(obs_data_t* data) override;
+
+		virtual obs_properties_t* get_properties2(filter::nvidia::face_tracking_instance* data) override;
+
+		std::shared_ptr<::nvidia::cuda::cuda> get_cuda();
+
+		std::shared_ptr<::nvidia::cuda::context> get_cuda_context();
+
+		std::shared_ptr<::nvidia::ar::ar> get_ar();
+	};
+} // namespace filter::nvidia
--- a/source/nvidia/ar/nvidia-ar-feature.cpp
+++ b/source/nvidia/ar/nvidia-ar-feature.cpp
@ -0,0 +1,37 @@
+/*
+ * Modern effects for a modern Streamer
+ * Copyright (C) 2020 Michael Fabian Dirks
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA
+ */
+
+#include "nvidia-ar-feature.hpp"
+
+nvidia::ar::feature::feature(std::shared_ptr<::nvidia::ar::ar> ar, NvAR_FeatureID feature) : _ar(ar)
+{
+	NvAR_FeatureHandle feat;
+	if (NvCV_Status res = _ar->create(feature, &feat); res != NVCV_SUCCESS) {
+		throw std::runtime_error("Failed to create feature.");
+	}
+
+	_feature = std::shared_ptr<nvAR_Feature>{feat, [this](NvAR_FeatureHandle v) {
+		_ar->destroy(v);
+	}};
+}
+
+nvidia::ar::feature::~feature()
+{
+	_feature.reset();
+}
--- a/source/nvidia/ar/nvidia-ar-feature.hpp
+++ b/source/nvidia/ar/nvidia-ar-feature.hpp
@ -0,0 +1,156 @@
+/*
+ * Modern effects for a modern Streamer
+ * Copyright (C) 2020 Michael Fabian Dirks
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA
+ */
+
+#pragma once
+#include <string>
+#include "nvidia-ar.hpp"
+#include "nvidia/cuda/nvidia-cuda-stream.hpp"
+
+namespace nvidia::ar {
+	class feature {
+		std::shared_ptr<::nvidia::ar::ar> _ar;
+		std::shared_ptr<nvAR_Feature>     _feature;
+
+		public:
+		feature(std::shared_ptr<::nvidia::ar::ar> ar, NvAR_FeatureID feature);
+		~feature();
+
+		public:
+		template<typename T>
+		inline NvCV_Status set(std::string name, T value);
+		template<typename T>
+		inline NvCV_Status get(std::string name, T& value);
+
+		template<>
+		inline NvCV_Status set(std::string name, std::int32_t value)
+		{
+			return _ar->set_int32(_feature.get(), name.c_str(), value);
+		}
+
+		template<>
+		inline NvCV_Status get(std::string name, std::int32_t& value)
+		{
+			return _ar->get_int32(_feature.get(), name.c_str(), &value);
+		}
+
+		template<>
+		inline NvCV_Status set(std::string name, std::uint32_t value)
+		{
+			return _ar->set_uint32(_feature.get(), name.c_str(), value);
+		}
+
+		template<>
+		inline NvCV_Status get(std::string name, std::uint32_t& value)
+		{
+			return _ar->get_uint32(_feature.get(), name.c_str(), &value);
+		}
+
+		template<>
+		inline NvCV_Status set(std::string name, std::uint64_t value)
+		{
+			return _ar->set_uint64(_feature.get(), name.c_str(), value);
+		}
+
+		template<>
+		inline NvCV_Status get(std::string name, std::uint64_t& value)
+		{
+			return _ar->get_uint64(_feature.get(), name.c_str(), &value);
+		}
+
+		template<>
+		inline NvCV_Status set(std::string name, std::float_t value)
+		{
+			return _ar->set_float32(_feature.get(), name.c_str(), value);
+		}
+
+		template<>
+		inline NvCV_Status get(std::string name, std::float_t& value)
+		{
+			return _ar->get_float32(_feature.get(), name.c_str(), &value);
+		}
+
+		template<>
+		inline NvCV_Status set(std::string name, std::vector<std::float_t> value)
+		{
+			return _ar->set_float32_array(_feature.get(), name.c_str(), value.data(),
+										  static_cast<std::int32_t>(value.size()));
+		}
+
+		template<>
+		inline NvCV_Status get(std::string name, std::vector<std::float_t>& value)
+		{
+			// ToDo: Validate this.
+			const float* vals      = nullptr;
+			int          val_count = 0;
+			NvCV_Status  res       = _ar->get_float32_array(_feature.get(), name.c_str(), &vals, &val_count);
+			if (res != NVCV_SUCCESS) {
+				return res;
+			} else {
+				value.resize(static_cast<size_t>(val_count));
+				for (std::size_t idx = 0; idx < static_cast<std::size_t>(val_count); idx++) {
+					value[idx] = *vals;
+					vals++;
+				}
+				return res;
+			}
+		}
+
+		template<>
+		inline NvCV_Status set(std::string name, std::double_t value)
+		{
+			return _ar->set_float64(_feature.get(), name.c_str(), value);
+		}
+
+		template<>
+		inline NvCV_Status get(std::string name, std::double_t& value)
+		{
+			return _ar->get_float64(_feature.get(), name.c_str(), &value);
+		}
+
+		template<>
+		inline NvCV_Status set(std::string name, std::string value)
+		{
+			return _ar->set_string(_feature.get(), name.c_str(), value.c_str());
+		}
+
+		template<>
+		inline NvCV_Status get(std::string name, std::string& value)
+		{
+			// ToDo: Validate this.
+			const char* buf;
+			NvCV_Status res = _ar->get_string(_feature.get(), name.c_str(), &buf);
+			if (res == NVCV_SUCCESS) {
+				value = std::string(buf, buf + strlen(buf));
+				return res;
+			} else {
+				return res;
+			}
+		}
+
+		template<>
+		inline NvCV_Status set(std::string name, std::shared_ptr<::nvidia::cuda::stream> value)
+		{
+			return _ar->set_cuda_stream(_feature.get(), name.c_str(), reinterpret_cast<CUstream>(value->get()));
+		}
+
+		template<>
+		inline NvCV_Status get(std::string name, std::shared_ptr<::nvidia::cuda::stream>& value)
+		{}
+	};
+} // namespace nvidia::ar
--- a/source/nvidia/ar/nvidia-ar.cpp
+++ b/source/nvidia/ar/nvidia-ar.cpp
@ -0,0 +1,260 @@
+/*
+ * Modern effects for a modern Streamer
+ * Copyright (C) 2020 Michael Fabian Dirks
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA
+ */
+
+#include "nvidia-ar.hpp"
+#include <stdexcept>
+#include <util/bmem.h>
+#include <util/platform.h>
+
+#ifdef WIN32
+#include <Shlobj.h>
+#include <Windows.h>
+#endif
+
+#include <nvARProxy.cpp>
+
+nvidia::ar::ar::ar()
+{
+	if (!getNvARLib())
+		throw std::runtime_error("Failed to load Nvidia AR SDK runtime.");
+}
+
+nvidia::ar::ar::~ar() {}
+
+std::filesystem::path nvidia::ar::ar::get_ar_sdk_path()
+{
+	char* arsdk_path = getenv("NV_AR_SDK_PATH");
+	if (arsdk_path) {
+		return std::filesystem::path(std::string{arsdk_path});
+	} else {
+		std::filesystem::path res;
+#ifdef WIN32
+		std::vector<wchar_t> dll_path_w;
+		dll_path_w.resize(65535);
+		DWORD size_w = GetModuleFileNameW(getNvARLib(), dll_path_w.data(), static_cast<DWORD>(dll_path_w.size()));
+
+		std::vector<char> dll_path;
+		dll_path.resize(65535);
+		std::size_t size = os_wcs_to_utf8(dll_path_w.data(), size_w, dll_path.data(), dll_path.size());
+
+		std::filesystem::path dll = std::string{dll_path.data(), dll_path.data() + size};
+		res                       = dll.remove_filename();
+#endif
+		return res;
+	}
+}
+
+NvCV_Status nvidia::ar::ar::image_init(NvCVImage* im, unsigned width, unsigned height, int pitch, void* pixels,
+									   NvCVImage_PixelFormat format, NvCVImage_ComponentType type, unsigned isPlanar,
+									   unsigned onGPU)
+{
+	return NvCVImage_Init(im, width, height, pitch, pixels, format, type, isPlanar, onGPU);
+}
+
+void nvidia::ar::ar::image_init_view(NvCVImage* subImg, NvCVImage* fullImg, int x, int y, unsigned width,
+									 unsigned height)
+{
+	NvCVImage_InitView(subImg, fullImg, x, y, width, height);
+}
+
+NvCV_Status nvidia::ar::ar::image_alloc(NvCVImage* im, unsigned width, unsigned height, NvCVImage_PixelFormat format,
+										NvCVImage_ComponentType type, unsigned isPlanar, unsigned onGPU,
+										unsigned alignment)
+{
+	return NvCVImage_Alloc(im, width, height, format, type, isPlanar, onGPU, alignment);
+}
+
+NvCV_Status nvidia::ar::ar::image_realloc(NvCVImage* im, unsigned width, unsigned height, NvCVImage_PixelFormat format,
+										  NvCVImage_ComponentType type, unsigned isPlanar, unsigned onGPU,
+										  unsigned alignment)
+{
+	return NvCVImage_Realloc(im, width, height, format, type, isPlanar, onGPU, alignment);
+}
+
+void nvidia::ar::ar::image_dealloc(NvCVImage* im)
+{
+	NvCVImage_Dealloc(im);
+}
+
+NvCV_Status nvidia::ar::ar::image_create(unsigned width, unsigned height, NvCVImage_PixelFormat format,
+										 NvCVImage_ComponentType type, unsigned isPlanar, unsigned onGPU,
+										 unsigned alignment, NvCVImage** out)
+{
+	return NvCVImage_Create(width, height, format, type, isPlanar, onGPU, alignment, out);
+}
+
+void nvidia::ar::ar::image_destroy(NvCVImage* im)
+{
+	NvCVImage_Destroy(im);
+}
+
+void nvidia::ar::ar::image_component_offsets(NvCVImage_PixelFormat format, int* rOff, int* gOff, int* bOff, int* aOff,
+											 int* yOff)
+{
+	NvCVImage_ComponentOffsets(format, rOff, gOff, bOff, aOff, yOff);
+}
+
+NvCV_Status nvidia::ar::ar::image_transfer(const NvCVImage* src, NvCVImage* dst, float scale, CUstream_st* stream,
+										   NvCVImage* tmp)
+{
+	return NvCVImage_Transfer(src, dst, scale, stream, tmp);
+}
+
+NvCV_Status nvidia::ar::ar::image_composite(const NvCVImage* src, const NvCVImage* mat, NvCVImage* dst)
+{
+	return NvCVImage_Composite(src, mat, dst);
+}
+
+NvCV_Status nvidia::ar::ar::image_composite_over_constant(const NvCVImage* src, const NvCVImage* mat,
+														  const unsigned char bgColor[3], NvCVImage* dst)
+{
+	return NvCVImage_CompositeOverConstant(src, mat, bgColor, dst);
+}
+
+NvCV_Status nvidia::ar::ar::image_flipy(const NvCVImage* src, NvCVImage* dst)
+{
+	return NvCVImage_FlipY(src, dst);
+}
+
+NvCV_Status nvidia::ar::ar::create(NvAR_FeatureID featureID, NvAR_FeatureHandle* handle)
+{
+	return NvAR_Create(featureID, handle);
+}
+
+NvCV_Status nvidia::ar::ar::destroy(NvAR_FeatureHandle handle)
+{
+	return NvAR_Destroy(handle);
+}
+
+NvCV_Status nvidia::ar::ar::set_uint32(NvAR_FeatureHandle handle, const char* name, unsigned int val)
+{
+	return NvAR_SetU32(handle, name, val);
+}
+
+NvCV_Status nvidia::ar::ar::set_int32(NvAR_FeatureHandle handle, const char* name, int val)
+{
+	return NvAR_SetS32(handle, name, val);
+}
+
+NvCV_Status nvidia::ar::ar::set_float32(NvAR_FeatureHandle handle, const char* name, float val)
+{
+	return NvAR_SetF32(handle, name, val);
+}
+
+NvCV_Status nvidia::ar::ar::set_float64(NvAR_FeatureHandle handle, const char* name, double val)
+{
+	return NvAR_SetF64(handle, name, val);
+}
+
+NvCV_Status nvidia::ar::ar::set_uint64(NvAR_FeatureHandle handle, const char* name, unsigned long long val)
+{
+	return NvAR_SetU64(handle, name, val);
+}
+
+NvCV_Status nvidia::ar::ar::set_object(NvAR_FeatureHandle handle, const char* name, void* ptr, unsigned long typeSize)
+{
+	return NvAR_SetObject(handle, name, ptr, typeSize);
+}
+
+NvCV_Status nvidia::ar::ar::set_string(NvAR_FeatureHandle handle, const char* name, const char* str)
+{
+	return NvAR_SetString(handle, name, str);
+}
+
+NvCV_Status nvidia::ar::ar::set_cuda_stream(NvAR_FeatureHandle handle, const char* name, CUstream stream)
+{
+	return NvAR_SetCudaStream(handle, name, stream);
+}
+
+NvCV_Status nvidia::ar::ar::set_float32_array(NvAR_FeatureHandle handle, const char* name, float* val, int count)
+{
+	return NvAR_SetF32Array(handle, name, val, count);
+}
+
+NvCV_Status nvidia::ar::ar::get_uint32(NvAR_FeatureHandle handle, const char* name, unsigned int* val)
+{
+	return NvAR_GetU32(handle, name, val);
+}
+
+NvCV_Status nvidia::ar::ar::get_int32(NvAR_FeatureHandle handle, const char* name, int* val)
+{
+	return NvAR_GetS32(handle, name, val);
+}
+
+NvCV_Status nvidia::ar::ar::get_float32(NvAR_FeatureHandle handle, const char* name, float* val)
+{
+	return NvAR_GetF32(handle, name, val);
+}
+
+NvCV_Status nvidia::ar::ar::get_float64(NvAR_FeatureHandle handle, const char* name, double* val)
+{
+	return NvAR_GetF64(handle, name, val);
+}
+
+NvCV_Status nvidia::ar::ar::get_uint64(NvAR_FeatureHandle handle, const char* name, unsigned long long* val)
+{
+	return NvAR_GetU64(handle, name, val);
+}
+
+NvCV_Status nvidia::ar::ar::get_object(NvAR_FeatureHandle handle, const char* name, const void** ptr,
+									   unsigned long typeSize)
+{
+	return NvAR_GetObject(handle, name, ptr, typeSize);
+}
+
+NvCV_Status nvidia::ar::ar::get_string(NvAR_FeatureHandle handle, const char* name, const char** str)
+{
+	return NvAR_GetString(handle, name, str);
+}
+
+NvCV_Status nvidia::ar::ar::get_cuda_stream(NvAR_FeatureHandle handle, const char* name, const CUstream* stream)
+{
+	return NvAR_GetCudaStream(handle, name, stream);
+}
+
+NvCV_Status nvidia::ar::ar::get_float32_array(NvAR_FeatureHandle handle, const char* name, const float** vals,
+											  int* count)
+{
+	return NvAR_GetF32Array(handle, name, vals, count);
+}
+
+NvCV_Status nvidia::ar::ar::run(NvAR_FeatureHandle handle)
+{
+	return NvAR_Run(handle);
+}
+
+NvCV_Status nvidia::ar::ar::load(NvAR_FeatureHandle handle)
+{
+	return NvAR_Load(handle);
+}
+
+NvCV_Status nvidia::ar::ar::cuda_stream_create(CUstream* stream)
+{
+	return NvAR_CudaStreamCreate(stream);
+}
+
+NvCV_Status nvidia::ar::ar::cuda_stream_destroy(CUstream stream)
+{
+	return NvAR_CudaStreamDestroy(stream);
+}
+
+const char* nvidia::ar::ar::cv_get_error_string_from_code(NvCV_Status code)
+{
+	return NvCV_GetErrorStringFromCode(code);
+}
--- a/source/nvidia/ar/nvidia-ar.hpp
+++ b/source/nvidia/ar/nvidia-ar.hpp
@ -0,0 +1,95 @@
+/*
+ * Modern effects for a modern Streamer
+ * Copyright (C) 2020 Michael Fabian Dirks
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA
+ */
+
+#pragma once
+#include <cstddef>
+#include <filesystem>
+#include <functional>
+#include <memory>
+
+
+#ifdef _MSC_VER
+#pragma warning(push)
+#pragma warning(disable : 4138)
+#endif
+#include <nvAR.h>
+#include <nvAR_defs.h>
+#include <nvCVImage.h>
+#include <nvCVStatus.h>
+#ifdef _MSC_VER
+#pragma warning(pop)
+#endif
+
+namespace nvidia::ar {
+	class ar {
+		public:
+		ar();
+		~ar();
+
+		std::filesystem::path get_ar_sdk_path();
+
+		public:
+		NvCV_Status image_init(NvCVImage* im, unsigned width, unsigned height, int pitch, void* pixels,
+							   NvCVImage_PixelFormat format, NvCVImage_ComponentType type, unsigned isPlanar,
+							   unsigned onGPU);
+		void image_init_view(NvCVImage* subImg, NvCVImage* fullImg, int x, int y, unsigned width, unsigned height);
+		NvCV_Status image_alloc(NvCVImage* im, unsigned width, unsigned height, NvCVImage_PixelFormat format,
+								NvCVImage_ComponentType type, unsigned isPlanar, unsigned onGPU, unsigned alignment);
+		NvCV_Status image_realloc(NvCVImage* im, unsigned width, unsigned height, NvCVImage_PixelFormat format,
+								  NvCVImage_ComponentType type, unsigned isPlanar, unsigned onGPU, unsigned alignment);
+		void        image_dealloc(NvCVImage* im);
+		NvCV_Status image_create(unsigned width, unsigned height, NvCVImage_PixelFormat format,
+								 NvCVImage_ComponentType type, unsigned isPlanar, unsigned onGPU, unsigned alignment,
+								 NvCVImage** out);
+		void        image_destroy(NvCVImage* im);
+		void        image_component_offsets(NvCVImage_PixelFormat format, int* rOff, int* gOff, int* bOff, int* aOff,
+											int* yOff);
+		NvCV_Status image_transfer(const NvCVImage* src, NvCVImage* dst, float scale, CUstream_st* stream,
+								   NvCVImage* tmp);
+		NvCV_Status image_composite(const NvCVImage* src, const NvCVImage* mat, NvCVImage* dst);
+		NvCV_Status image_composite_over_constant(const NvCVImage* src, const NvCVImage* mat,
+												  const unsigned char bgColor[3], NvCVImage* dst);
+		NvCV_Status image_flipy(const NvCVImage* src, NvCVImage* dst);
+		NvCV_Status create(NvAR_FeatureID featureID, NvAR_FeatureHandle* handle);
+		NvCV_Status destroy(NvAR_FeatureHandle handle);
+		NvCV_Status set_uint32(NvAR_FeatureHandle handle, const char* name, unsigned int val);
+		NvCV_Status set_int32(NvAR_FeatureHandle handle, const char* name, int val);
+		NvCV_Status set_float32(NvAR_FeatureHandle handle, const char* name, float val);
+		NvCV_Status set_float64(NvAR_FeatureHandle handle, const char* name, double val);
+		NvCV_Status set_uint64(NvAR_FeatureHandle handle, const char* name, unsigned long long val);
+		NvCV_Status set_object(NvAR_FeatureHandle handle, const char* name, void* ptr, unsigned long typeSize);
+		NvCV_Status set_string(NvAR_FeatureHandle handle, const char* name, const char* str);
+		NvCV_Status set_cuda_stream(NvAR_FeatureHandle handle, const char* name, CUstream stream);
+		NvCV_Status set_float32_array(NvAR_FeatureHandle handle, const char* name, float* val, int count);
+		NvCV_Status get_uint32(NvAR_FeatureHandle handle, const char* name, unsigned int* val);
+		NvCV_Status get_int32(NvAR_FeatureHandle handle, const char* name, int* val);
+		NvCV_Status get_float32(NvAR_FeatureHandle handle, const char* name, float* val);
+		NvCV_Status get_float64(NvAR_FeatureHandle handle, const char* name, double* val);
+		NvCV_Status get_uint64(NvAR_FeatureHandle handle, const char* name, unsigned long long* val);
+		NvCV_Status get_object(NvAR_FeatureHandle handle, const char* name, const void** ptr, unsigned long typeSize);
+		NvCV_Status get_string(NvAR_FeatureHandle handle, const char* name, const char** str);
+		NvCV_Status get_cuda_stream(NvAR_FeatureHandle handle, const char* name, const CUstream* stream);
+		NvCV_Status get_float32_array(NvAR_FeatureHandle handle, const char* name, const float** vals, int* count);
+		NvCV_Status run(NvAR_FeatureHandle handle);
+		NvCV_Status load(NvAR_FeatureHandle handle);
+		NvCV_Status cuda_stream_create(CUstream* stream);
+		NvCV_Status cuda_stream_destroy(CUstream stream);
+		const char* cv_get_error_string_from_code(NvCV_Status code);
+	};
+} // namespace nvidia::ar
--- a/source/plugin.cpp
+++ b/source/plugin.cpp
@ -37,6 +37,9 @@
 #ifdef ENABLE_FILTER_DYNAMIC_MASK
 #include "filters/filter-dynamic-mask.hpp"
 #endif
+#ifdef ENABLE_FILTER_NVIDIA_FACE_TRACKING
+#include "filters/filter-nv-face-tracking.hpp"
+#endif
 #ifdef ENABLE_FILTER_SDF_EFFECTS
 #include "filters/filter-sdf-effects.hpp"
 #endif
@ -87,6 +90,9 @@ try {
 #ifdef ENABLE_FILTER_DYNAMIC_MASK
 	filter::dynamic_mask::dynamic_mask_factory::initialize();
 #endif
+#ifdef ENABLE_FILTER_NVIDIA_FACE_TRACKING
+	filter::nvidia::face_tracking_factory::initialize();
+#endif
 #ifdef ENABLE_FILTER_SDF_EFFECTS
 	filter::sdf_effects::sdf_effects_factory::initialize();
 #endif
@ -146,6 +152,9 @@ try {
 #ifdef ENABLE_FILTER_DYNAMIC_MASK
 	filter::dynamic_mask::dynamic_mask_factory::finalize();
 #endif
+#ifdef ENABLE_FILTER_NVIDIA_FACE_TRACKING
+	filter::nvidia::face_tracking_factory::finalize();
+#endif
 #ifdef ENABLE_FILTER_SDF_EFFECTS
 	filter::sdf_effects::sdf_effects_factory::finalize();
 #endif
--- a/third-party/nvidia-arsdk
+++ b/third-party/nvidia-arsdk
@ -0,0 +1 @@
+Subproject commit 3765f94a8a981552a2fb0665ae84af7272322219
				`@ -0,0 +1 @@`
				`Subproject commit 3765f94a8a981552a2fb0665ae84af7272322219`