diff --git a/data/effects/blur/gaussian-linear.effect b/data/effects/blur/gaussian-linear.effect new file mode 100644 index 00000000..dc46fea8 --- /dev/null +++ b/data/effects/blur/gaussian-linear.effect @@ -0,0 +1,136 @@ +// Parameters: +/// OBS Default +uniform float4x4 ViewProj; +/// Texture +uniform texture2d pImage; +uniform float2 pImageTexel; +/// Blur +uniform float pSize; +uniform float pAngle; +uniform float2 pCenter; +uniform float2 pStepScale; +/// Gaussian +uniform float4 pKernel[32]; + +#define MAX_BLUR_SIZE 128 + +// # Linear Optimization +// While the normal way is to sample every texel in the pSize, linear optimization +// takes advantage of the fact that most people, especially after compression, +// will not be able to tell the difference between a linear approximation and +// the actual thing. +// +// Instead of sampling every texel like this: +// +// |Tx|Tx|Tx|Tx|Tx| +// Tx|-2|-1| 0|+1|+2| +// +// Linear optimization will sample like this: +// +// |Tx|Tx|Tx|Tx|Tx| +// Tx| -1 | 0| +1 | +// +// This effectively removes half the necessary samples and looks identical when +// when used with box blur. However there is an edge case when the blur width +// is not a multiple of two, where two additional samples have to be spent on +// reading the outer edge: +// +// |Tx|Tx|Tx|Tx|Tx|Tx|Tx| +// Tx|-2| -1 | 0| +1 |+2| +// +// or this alternative pattern that uses two less samples: +// +// |Tx|Tx|Tx|Tx|Tx|Tx|Tx| +// Tx| 0 | +1 | +2 |+3| +// +// or this alternative pattern that also uses two less samples: +// +// |Tx|Tx|Tx|Tx|Tx|Tx|Tx| +// Tx| -2 | -1~~+1 | +2 | +// +// With careful planning this can even be used for other types of Blur, such as +// Gaussian Blur, which suffers a larger hit - however there are better and +// faster alternatives than linear sampling with Gaussian Blur, such as +// Dual Filtering ("Dual Kawase"). + +// Sampler +sampler_state linearSampler { + Filter = Linear; + AddressU = Clamp; + AddressV = Clamp; + MinLOD = 0; + MaxLOD = 0; +}; + +// Default Vertex Shader and Data +struct VertDataIn { + float4 pos : POSITION; + float2 uv : TEXCOORD0; +}; + +struct VertDataOut { + float4 pos : POSITION; + float2 uv : TEXCOORD0; + bool is_odd : TEXCOORD1; +}; + +VertDataOut VSDefault(VertDataIn vtx) { + VertDataOut vert_out; + vert_out.pos = mul(float4(vtx.pos.xyz, 1.0), ViewProj); + vert_out.uv = vtx.uv; + vert_out.is_odd = ((int(round(pSize)) % 2) == 1); + return vert_out; +} + +// Functions +float GetKernelAt(int i) { + return ((float[4])(pKernel[floor(i/4)]))[i%4]; +} + +// Blur 1 Dimensional +float4 PSBlur1D(VertDataOut vtx) : TARGET { + float4 final = pImage.Sample(linearSampler, vtx.uv) + * GetKernelAt(0); + + // y = yes, s = skip, b = break + // Size-> | 1| 2| 3| 4| 5| 6| 7| + // -------+--+--+--+--+--+--+--+ + // n=1 | b| y| y| y| y| y| y| + // n=2 | |bs| s| s| s| s| s| + // n=3 | | b| b| y| y| y| y| + // n=4 | | | |bs| s| s| s| + // n=5 | | | | b| b| y| y| + // n=6 | | | | | |bs| s| + // n=7 | | | | | | b| b| + // n=8 | | | | | | | | + + // Loop unrolling is only possible with a fixed known maximum. + // Some compilers may unroll up to x iterations, but most will not. + for (int n = 1; n <= MAX_BLUR_SIZE; n+=2) { + // Different from normal box, early exit instead of late exit. + if (n >= pSize) { + break; + } + + // TODO: Determine better position than 0.5 for gaussian approximation. + float2 nstep = (pImageTexel * pStepScale) * (n + 0.5); + float kernel = GetKernelAt(n) + GetKernelAt(n + 1); + final += pImage.Sample(linearSampler, vtx.uv + nstep) * kernel; + final += pImage.Sample(linearSampler, vtx.uv - nstep) * kernel; + } + if (vtx.is_odd) { + float kernel = GetKernelAt(pSize); + float2 nstep = (pImageTexel * pStepScale) * pSize; + final += pImage.Sample(linearSampler, vtx.uv + nstep) * kernel; + final += pImage.Sample(linearSampler, vtx.uv - nstep) * kernel; + } + + return final; +} + +technique Draw { + pass { + vertex_shader = VSDefault(vtx); + pixel_shader = PSBlur1D(vtx); + } +} diff --git a/source/gfx/blur/gfx-blur-gaussian-linear.cpp b/source/gfx/blur/gfx-blur-gaussian-linear.cpp new file mode 100644 index 00000000..0acea8c4 --- /dev/null +++ b/source/gfx/blur/gfx-blur-gaussian-linear.cpp @@ -0,0 +1,425 @@ +// Modern effects for a modern Streamer +// Copyright (C) 2019 Michael Fabian Dirks +// +// This program is free software; you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation; either version 2 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA + +#include "gfx-blur-gaussian-linear.hpp" +#include "util-math.hpp" + +#ifdef _MSC_VER +#pragma warning(push) +#pragma warning(disable : 4201) +#endif +#include +#include +#ifdef _MSC_VER +#pragma warning(pop) +#endif + +// FIXME: This breaks when MAX_KERNEL_SIZE is changed, due to the way the Gaussian +// function first goes up at the point, and then once we pass the critical point +// will go down again and it is not handled well. This is a pretty basic +// approximation anyway at the moment. +#define MAX_KERNEL_SIZE 128 +#define MAX_BLUR_SIZE (MAX_KERNEL_SIZE - 1) +#define SEARCH_DENSITY double_t(1. / 500.) +#define SEARCH_THRESHOLD double_t(1. / (MAX_KERNEL_SIZE * 5)) +#define SEARCH_EXTENSION 1 +#define SEARCH_RANGE MAX_KERNEL_SIZE * 2 + +gfx::blur::gaussian_linear_data::gaussian_linear_data() +{ + { + char* file = obs_module_file("effects/blur/gaussian-linear.effect"); + m_effect = std::make_shared(file); + bfree(file); + } + + // Precalculate Kernels + for (size_t kernel_size = 1; kernel_size <= MAX_BLUR_SIZE; kernel_size++) { + std::vector kernel_math(MAX_KERNEL_SIZE); + std::vector kernel_data(MAX_KERNEL_SIZE); + double_t actual_width = 1.; + + // Find actual kernel width. + for (double_t h = SEARCH_DENSITY; h < SEARCH_RANGE; h += SEARCH_DENSITY) { + if (util::math::gaussian(double_t(kernel_size + SEARCH_EXTENSION), h) > SEARCH_THRESHOLD) { + actual_width = h; + break; + } + } + + // Calculate and normalize + double_t sum = 0; + for (size_t p = 0; p <= kernel_size; p++) { + kernel_math[p] = util::math::gaussian(double_t(p), actual_width); + sum += kernel_math[p] * (p > 0 ? 2 : 1); + } + + // Normalize to fill the entire 0..1 range over the width. + double_t inverse_sum = 1.0 / sum; + for (size_t p = 0; p <= kernel_size; p++) { + kernel_data.at(p) = float_t(kernel_math[p] * inverse_sum); + } + + m_kernels.push_back(std::move(kernel_data)); + } +} + +gfx::blur::gaussian_linear_data::~gaussian_linear_data() +{ + m_effect.reset(); +} + +std::shared_ptr<::gs::effect> gfx::blur::gaussian_linear_data::get_effect() +{ + return m_effect; +} + +std::vector const& gfx::blur::gaussian_linear_data::get_kernel(size_t width) +{ + if (width < 1) + width = 1; + if (width > MAX_BLUR_SIZE) + width = MAX_BLUR_SIZE; + width -= 1; + return m_kernels[width]; +} + +gfx::blur::gaussian_linear_factory::gaussian_linear_factory() {} + +gfx::blur::gaussian_linear_factory::~gaussian_linear_factory() {} + +bool gfx::blur::gaussian_linear_factory::is_type_supported(::gfx::blur::type v) +{ + switch (v) { + case ::gfx::blur::type::Area: + return true; + case ::gfx::blur::type::Directional: + return true; + default: + return false; + } +} + +std::shared_ptr<::gfx::blur::ibase> gfx::blur::gaussian_linear_factory::create(::gfx::blur::type v) +{ + switch (v) { + case ::gfx::blur::type::Area: + return std::make_shared<::gfx::blur::gaussian_linear>(); + case ::gfx::blur::type::Directional: + return std::static_pointer_cast<::gfx::blur::gaussian_linear>(std::make_shared<::gfx::blur::gaussian_linear_directional>()); + default: + throw std::runtime_error("Invalid type."); + } +} + +double_t gfx::blur::gaussian_linear_factory::get_min_size(::gfx::blur::type) +{ + return double_t(1.0); +} + +double_t gfx::blur::gaussian_linear_factory::get_step_size(::gfx::blur::type) +{ + return double_t(1.0); +} + +double_t gfx::blur::gaussian_linear_factory::get_max_size(::gfx::blur::type) +{ + return double_t(MAX_BLUR_SIZE); +} + +double_t gfx::blur::gaussian_linear_factory::get_min_angle(::gfx::blur::type v) +{ + switch (v) { + case ::gfx::blur::type::Directional: + case ::gfx::blur::type::Rotational: + return -180.0; + default: + return 0; + } +} + +double_t gfx::blur::gaussian_linear_factory::get_step_angle(::gfx::blur::type) +{ + return double_t(0.01); +} + +double_t gfx::blur::gaussian_linear_factory::get_max_angle(::gfx::blur::type v) +{ + switch (v) { + case ::gfx::blur::type::Directional: + case ::gfx::blur::type::Rotational: + return 180.0; + default: + return 0; + } +} + +bool gfx::blur::gaussian_linear_factory::is_step_scale_supported(::gfx::blur::type v) +{ + switch (v) { + case ::gfx::blur::type::Area: + case ::gfx::blur::type::Zoom: + case ::gfx::blur::type::Directional: + return true; + default: + return false; + } +} + +double_t gfx::blur::gaussian_linear_factory::get_min_step_scale_x(::gfx::blur::type) +{ + return double_t(0.01); +} + +double_t gfx::blur::gaussian_linear_factory::get_step_step_scale_x(::gfx::blur::type) +{ + return double_t(0.01); +} + +double_t gfx::blur::gaussian_linear_factory::get_max_step_scale_x(::gfx::blur::type) +{ + return double_t(1000.0); +} + +double_t gfx::blur::gaussian_linear_factory::get_min_step_scale_y(::gfx::blur::type) +{ + return double_t(0.01); +} + +double_t gfx::blur::gaussian_linear_factory::get_step_step_scale_y(::gfx::blur::type) +{ + return double_t(0.01); +} + +double_t gfx::blur::gaussian_linear_factory::get_max_step_scale_y(::gfx::blur::type) +{ + return double_t(1000.0); +} + +std::shared_ptr<::gfx::blur::gaussian_linear_data> gfx::blur::gaussian_linear_factory::data() +{ + std::unique_lock ulock(m_data_lock); + std::shared_ptr<::gfx::blur::gaussian_linear_data> data = m_data.lock(); + if (!data) { + data = std::make_shared<::gfx::blur::gaussian_linear_data>(); + m_data = data; + } + return data; +} + +::gfx::blur::gaussian_linear_factory& gfx::blur::gaussian_linear_factory::get() +{ + static ::gfx::blur::gaussian_linear_factory instance; + return instance; +} + +gfx::blur::gaussian_linear::gaussian_linear() + : m_size(1.), m_step_scale({1., 1.}), m_data(::gfx::blur::gaussian_linear_factory::get().data()) +{ + m_rendertarget = std::make_shared(GS_RGBA, GS_ZS_NONE); + m_rendertarget2 = std::make_shared(GS_RGBA, GS_ZS_NONE); +} + +gfx::blur::gaussian_linear::~gaussian_linear() {} + +void gfx::blur::gaussian_linear::set_input(std::shared_ptr<::gs::texture> texture) +{ + m_input_texture = texture; +} + +::gfx::blur::type gfx::blur::gaussian_linear::get_type() +{ + return ::gfx::blur::type::Area; +} + +double_t gfx::blur::gaussian_linear::get_size() +{ + return m_size; +} + +void gfx::blur::gaussian_linear::set_size(double_t width) +{ + if (width < 1.) + width = 1.; + if (width > MAX_BLUR_SIZE) + width = MAX_BLUR_SIZE; + m_size = width; +} + +void gfx::blur::gaussian_linear::set_step_scale(double_t x, double_t y) +{ + m_step_scale.first = x; + m_step_scale.second = y; +} + +void gfx::blur::gaussian_linear::get_step_scale(double_t& x, double_t& y) +{ + x = m_step_scale.first; + y = m_step_scale.second; +} + +double_t gfx::blur::gaussian_linear::get_step_scale_x() +{ + return m_step_scale.first; +} + +double_t gfx::blur::gaussian_linear::get_step_scale_y() +{ + return m_step_scale.second; +} + +std::shared_ptr<::gs::texture> gfx::blur::gaussian_linear::render() +{ + std::shared_ptr<::gs::effect> effect = m_data->get_effect(); + auto kernel = m_data->get_kernel(size_t(m_size)); + + if (!effect || ((m_step_scale.first + m_step_scale.second) < FLT_EPSILON)) { + return m_input_texture; + } + + float_t width = float_t(m_input_texture->get_width()); + float_t height = float_t(m_input_texture->get_height()); + + // Setup + obs_enter_graphics(); + gs_set_cull_mode(GS_NEITHER); + gs_enable_color(true, true, true, true); + gs_enable_depth_test(false); + gs_depth_function(GS_ALWAYS); + gs_blend_state_push(); + gs_reset_blend_state(); + gs_enable_blending(false); + gs_blend_function(GS_BLEND_ONE, GS_BLEND_ZERO); + gs_enable_stencil_test(false); + gs_enable_stencil_write(false); + gs_stencil_function(GS_STENCIL_BOTH, GS_ALWAYS); + gs_stencil_op(GS_STENCIL_BOTH, GS_ZERO, GS_ZERO, GS_ZERO); + + effect->get_parameter("pImage").set_texture(m_input_texture); + effect->get_parameter("pStepScale").set_float2(float_t(m_step_scale.first), float_t(m_step_scale.second)); + effect->get_parameter("pSize").set_float(float_t(m_size)); + effect->get_parameter("pKernel").set_float_array(kernel.data(), MAX_KERNEL_SIZE); + + // First Pass + if (m_step_scale.first > FLT_EPSILON) { + effect->get_parameter("pImageTexel").set_float2(float_t(1. / width), 0.); + + { + auto op = m_rendertarget2->render(uint32_t(width), uint32_t(height)); + gs_ortho(0, 1., 0, 1., 0, 1.); + while (gs_effect_loop(effect->get_object(), "Draw")) { + gs_draw_sprite(0, 0, 1, 1); + } + } + + std::swap(m_rendertarget, m_rendertarget2); + effect->get_parameter("pImage").set_texture(m_rendertarget->get_texture()); + } + + // Second Pass + if (m_step_scale.second > FLT_EPSILON) { + effect->get_parameter("pImageTexel").set_float2(0., float_t(1. / height)); + + { + auto op = m_rendertarget2->render(uint32_t(width), uint32_t(height)); + gs_ortho(0, 1., 0, 1., 0, 1.); + while (gs_effect_loop(effect->get_object(), "Draw")) { + gs_draw_sprite(0, 0, 1, 1); + } + } + + std::swap(m_rendertarget, m_rendertarget2); + } + + gs_blend_state_pop(); + obs_leave_graphics(); + + return this->get(); +} + +std::shared_ptr<::gs::texture> gfx::blur::gaussian_linear::get() +{ + return m_rendertarget->get_texture(); +} + +gfx::blur::gaussian_linear_directional::gaussian_linear_directional() : m_angle(0.) {} + +gfx::blur::gaussian_linear_directional::~gaussian_linear_directional() {} + +::gfx::blur::type gfx::blur::gaussian_linear_directional::get_type() +{ + return ::gfx::blur::type::Directional; +} + +double_t gfx::blur::gaussian_linear_directional::get_angle() +{ + return RAD_TO_DEG(m_angle); +} + +void gfx::blur::gaussian_linear_directional::set_angle(double_t angle) +{ + m_angle = DEG_TO_RAD(angle); +} + +std::shared_ptr<::gs::texture> gfx::blur::gaussian_linear_directional::render() +{ + std::shared_ptr<::gs::effect> effect = m_data->get_effect(); + auto kernel = m_data->get_kernel(size_t(m_size)); + + if (!effect || ((m_step_scale.first + m_step_scale.second) < FLT_EPSILON)) { + return m_input_texture; + } + + float_t width = float_t(m_input_texture->get_width()); + float_t height = float_t(m_input_texture->get_height()); + + // Setup + obs_enter_graphics(); + gs_set_cull_mode(GS_NEITHER); + gs_enable_color(true, true, true, true); + gs_enable_depth_test(false); + gs_depth_function(GS_ALWAYS); + gs_blend_state_push(); + gs_reset_blend_state(); + gs_enable_blending(false); + gs_blend_function(GS_BLEND_ONE, GS_BLEND_ZERO); + gs_enable_stencil_test(false); + gs_enable_stencil_write(false); + gs_stencil_function(GS_STENCIL_BOTH, GS_ALWAYS); + gs_stencil_op(GS_STENCIL_BOTH, GS_ZERO, GS_ZERO, GS_ZERO); + + effect->get_parameter("pImage").set_texture(m_input_texture); + effect->get_parameter("pImageTexel") + .set_float2(float_t(1. / width * cos(m_angle)), float_t(1. / height * sin(m_angle))); + effect->get_parameter("pStepScale").set_float2(float_t(m_step_scale.first), float_t(m_step_scale.second)); + effect->get_parameter("pSize").set_float(float_t(m_size)); + effect->get_parameter("pKernel").set_float_array(kernel.data(), MAX_KERNEL_SIZE); + + // First Pass + { + auto op = m_rendertarget->render(uint32_t(width), uint32_t(height)); + gs_ortho(0, 1., 0, 1., 0, 1.); + while (gs_effect_loop(effect->get_object(), "Draw")) { + gs_draw_sprite(0, 0, 1, 1); + } + } + + gs_blend_state_pop(); + obs_leave_graphics(); + + return this->get(); +} diff --git a/source/gfx/blur/gfx-blur-gaussian-linear.hpp b/source/gfx/blur/gfx-blur-gaussian-linear.hpp new file mode 100644 index 00000000..fe02ccbf --- /dev/null +++ b/source/gfx/blur/gfx-blur-gaussian-linear.hpp @@ -0,0 +1,138 @@ +// Modern effects for a modern Streamer +// Copyright (C) 2019 Michael Fabian Dirks +// +// This program is free software; you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation; either version 2 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA + +#pragma once +#include +#include +#include "gfx-blur-base.hpp" +#include "obs/gs/gs-effect.hpp" +#include "obs/gs/gs-rendertarget.hpp" +#include "obs/gs/gs-texture.hpp" + +namespace gfx { + namespace blur { + class gaussian_linear_data { + std::shared_ptr<::gs::effect> m_effect; + std::vector> m_kernels; + + public: + gaussian_linear_data(); + ~gaussian_linear_data(); + + std::shared_ptr<::gs::effect> get_effect(); + + std::vector const& get_kernel(size_t width); + }; + + class gaussian_linear_factory : public ::gfx::blur::ifactory { + std::mutex m_data_lock; + std::weak_ptr<::gfx::blur::gaussian_linear_data> m_data; + + public: + gaussian_linear_factory(); + virtual ~gaussian_linear_factory(); + + virtual bool is_type_supported(::gfx::blur::type type) override; + + virtual std::shared_ptr<::gfx::blur::ibase> create(::gfx::blur::type type) override; + + virtual double_t get_min_size(::gfx::blur::type type) override; + + virtual double_t get_step_size(::gfx::blur::type type) override; + + virtual double_t get_max_size(::gfx::blur::type type) override; + + virtual double_t get_min_angle(::gfx::blur::type type) override; + + virtual double_t get_step_angle(::gfx::blur::type type) override; + + virtual double_t get_max_angle(::gfx::blur::type type) override; + + virtual bool is_step_scale_supported(::gfx::blur::type type) override; + + virtual double_t get_min_step_scale_x(::gfx::blur::type type) override; + + virtual double_t get_step_step_scale_x(::gfx::blur::type type) override; + + virtual double_t get_max_step_scale_x(::gfx::blur::type type) override; + + virtual double_t get_min_step_scale_y(::gfx::blur::type type) override; + + virtual double_t get_step_step_scale_y(::gfx::blur::type type) override; + + virtual double_t get_max_step_scale_y(::gfx::blur::type type) override; + + std::shared_ptr<::gfx::blur::gaussian_linear_data> data(); + + public: // Singleton + static ::gfx::blur::gaussian_linear_factory& get(); + }; + + class gaussian_linear : public ::gfx::blur::ibase { + protected: + std::shared_ptr<::gfx::blur::gaussian_linear_data> m_data; + + double_t m_size; + std::pair m_step_scale; + std::shared_ptr<::gs::texture> m_input_texture; + std::shared_ptr<::gs::rendertarget> m_rendertarget; + + private: + std::shared_ptr<::gs::rendertarget> m_rendertarget2; + + public: + gaussian_linear(); + virtual ~gaussian_linear(); + + virtual void set_input(std::shared_ptr<::gs::texture> texture) override; + + virtual ::gfx::blur::type get_type() override; + + virtual double_t get_size() override; + + virtual void set_size(double_t width) override; + + virtual void set_step_scale(double_t x, double_t y) override; + + virtual void get_step_scale(double_t& x, double_t& y) override; + + virtual double_t get_step_scale_x() override; + + virtual double_t get_step_scale_y() override; + + virtual std::shared_ptr<::gs::texture> render() override; + + virtual std::shared_ptr<::gs::texture> get() override; + }; + + class gaussian_linear_directional : public ::gfx::blur::gaussian_linear, public ::gfx::blur::ibase_angle { + double_t m_angle; + + public: + gaussian_linear_directional(); + virtual ~gaussian_linear_directional(); + + virtual ::gfx::blur::type get_type() override; + + virtual double_t get_angle() override; + + virtual void set_angle(double_t angle) override; + + virtual std::shared_ptr<::gs::texture> render() override; + }; + } // namespace blur +} // namespace gfx