gfx/blur/gaussian-linear: Refactor Linear Gaussian Blur

While Gaussian Blur is not a Blur type that really benefits much from linear sampling, it can be used for a slight quality and gpu usage reduction. However for Area and Directional Blur there is a better alternative: Dual Filtering Blur. And as with all other currently implement Linear versions of Blur, only Area and Directional Blur are supported.

This type of Gaussian Blur also has the loading hitch that exists in normal Gaussian Blur.

Related: #45, #6
This commit is contained in:
Michael Fabian 'Xaymar' Dirks 2019-04-02 02:59:20 +02:00 committed by Michael Fabian Dirks
parent fedc8e075c
commit b6d45ce73c
3 changed files with 699 additions and 0 deletions

View file

@ -0,0 +1,136 @@
// Parameters:
/// OBS Default
uniform float4x4 ViewProj;
/// Texture
uniform texture2d pImage;
uniform float2 pImageTexel;
/// Blur
uniform float pSize;
uniform float pAngle;
uniform float2 pCenter;
uniform float2 pStepScale;
/// Gaussian
uniform float4 pKernel[32];
#define MAX_BLUR_SIZE 128
// # Linear Optimization
// While the normal way is to sample every texel in the pSize, linear optimization
// takes advantage of the fact that most people, especially after compression,
// will not be able to tell the difference between a linear approximation and
// the actual thing.
//
// Instead of sampling every texel like this:
//
// |Tx|Tx|Tx|Tx|Tx|
// Tx|-2|-1| 0|+1|+2|
//
// Linear optimization will sample like this:
//
// |Tx|Tx|Tx|Tx|Tx|
// Tx| -1 | 0| +1 |
//
// This effectively removes half the necessary samples and looks identical when
// when used with box blur. However there is an edge case when the blur width
// is not a multiple of two, where two additional samples have to be spent on
// reading the outer edge:
//
// |Tx|Tx|Tx|Tx|Tx|Tx|Tx|
// Tx|-2| -1 | 0| +1 |+2|
//
// or this alternative pattern that uses two less samples:
//
// |Tx|Tx|Tx|Tx|Tx|Tx|Tx|
// Tx| 0 | +1 | +2 |+3|
//
// or this alternative pattern that also uses two less samples:
//
// |Tx|Tx|Tx|Tx|Tx|Tx|Tx|
// Tx| -2 | -1~~+1 | +2 |
//
// With careful planning this can even be used for other types of Blur, such as
// Gaussian Blur, which suffers a larger hit - however there are better and
// faster alternatives than linear sampling with Gaussian Blur, such as
// Dual Filtering ("Dual Kawase").
// Sampler
sampler_state linearSampler {
Filter = Linear;
AddressU = Clamp;
AddressV = Clamp;
MinLOD = 0;
MaxLOD = 0;
};
// Default Vertex Shader and Data
struct VertDataIn {
float4 pos : POSITION;
float2 uv : TEXCOORD0;
};
struct VertDataOut {
float4 pos : POSITION;
float2 uv : TEXCOORD0;
bool is_odd : TEXCOORD1;
};
VertDataOut VSDefault(VertDataIn vtx) {
VertDataOut vert_out;
vert_out.pos = mul(float4(vtx.pos.xyz, 1.0), ViewProj);
vert_out.uv = vtx.uv;
vert_out.is_odd = ((int(round(pSize)) % 2) == 1);
return vert_out;
}
// Functions
float GetKernelAt(int i) {
return ((float[4])(pKernel[floor(i/4)]))[i%4];
}
// Blur 1 Dimensional
float4 PSBlur1D(VertDataOut vtx) : TARGET {
float4 final = pImage.Sample(linearSampler, vtx.uv)
* GetKernelAt(0);
// y = yes, s = skip, b = break
// Size-> | 1| 2| 3| 4| 5| 6| 7|
// -------+--+--+--+--+--+--+--+
// n=1 | b| y| y| y| y| y| y|
// n=2 | |bs| s| s| s| s| s|
// n=3 | | b| b| y| y| y| y|
// n=4 | | | |bs| s| s| s|
// n=5 | | | | b| b| y| y|
// n=6 | | | | | |bs| s|
// n=7 | | | | | | b| b|
// n=8 | | | | | | | |
// Loop unrolling is only possible with a fixed known maximum.
// Some compilers may unroll up to x iterations, but most will not.
for (int n = 1; n <= MAX_BLUR_SIZE; n+=2) {
// Different from normal box, early exit instead of late exit.
if (n >= pSize) {
break;
}
// TODO: Determine better position than 0.5 for gaussian approximation.
float2 nstep = (pImageTexel * pStepScale) * (n + 0.5);
float kernel = GetKernelAt(n) + GetKernelAt(n + 1);
final += pImage.Sample(linearSampler, vtx.uv + nstep) * kernel;
final += pImage.Sample(linearSampler, vtx.uv - nstep) * kernel;
}
if (vtx.is_odd) {
float kernel = GetKernelAt(pSize);
float2 nstep = (pImageTexel * pStepScale) * pSize;
final += pImage.Sample(linearSampler, vtx.uv + nstep) * kernel;
final += pImage.Sample(linearSampler, vtx.uv - nstep) * kernel;
}
return final;
}
technique Draw {
pass {
vertex_shader = VSDefault(vtx);
pixel_shader = PSBlur1D(vtx);
}
}

View file

@ -0,0 +1,425 @@
// Modern effects for a modern Streamer
// Copyright (C) 2019 Michael Fabian Dirks
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
#include "gfx-blur-gaussian-linear.hpp"
#include "util-math.hpp"
#ifdef _MSC_VER
#pragma warning(push)
#pragma warning(disable : 4201)
#endif
#include <obs-module.h>
#include <obs.h>
#ifdef _MSC_VER
#pragma warning(pop)
#endif
// FIXME: This breaks when MAX_KERNEL_SIZE is changed, due to the way the Gaussian
// function first goes up at the point, and then once we pass the critical point
// will go down again and it is not handled well. This is a pretty basic
// approximation anyway at the moment.
#define MAX_KERNEL_SIZE 128
#define MAX_BLUR_SIZE (MAX_KERNEL_SIZE - 1)
#define SEARCH_DENSITY double_t(1. / 500.)
#define SEARCH_THRESHOLD double_t(1. / (MAX_KERNEL_SIZE * 5))
#define SEARCH_EXTENSION 1
#define SEARCH_RANGE MAX_KERNEL_SIZE * 2
gfx::blur::gaussian_linear_data::gaussian_linear_data()
{
{
char* file = obs_module_file("effects/blur/gaussian-linear.effect");
m_effect = std::make_shared<gs::effect>(file);
bfree(file);
}
// Precalculate Kernels
for (size_t kernel_size = 1; kernel_size <= MAX_BLUR_SIZE; kernel_size++) {
std::vector<double_t> kernel_math(MAX_KERNEL_SIZE);
std::vector<float_t> kernel_data(MAX_KERNEL_SIZE);
double_t actual_width = 1.;
// Find actual kernel width.
for (double_t h = SEARCH_DENSITY; h < SEARCH_RANGE; h += SEARCH_DENSITY) {
if (util::math::gaussian<double_t>(double_t(kernel_size + SEARCH_EXTENSION), h) > SEARCH_THRESHOLD) {
actual_width = h;
break;
}
}
// Calculate and normalize
double_t sum = 0;
for (size_t p = 0; p <= kernel_size; p++) {
kernel_math[p] = util::math::gaussian<double_t>(double_t(p), actual_width);
sum += kernel_math[p] * (p > 0 ? 2 : 1);
}
// Normalize to fill the entire 0..1 range over the width.
double_t inverse_sum = 1.0 / sum;
for (size_t p = 0; p <= kernel_size; p++) {
kernel_data.at(p) = float_t(kernel_math[p] * inverse_sum);
}
m_kernels.push_back(std::move(kernel_data));
}
}
gfx::blur::gaussian_linear_data::~gaussian_linear_data()
{
m_effect.reset();
}
std::shared_ptr<::gs::effect> gfx::blur::gaussian_linear_data::get_effect()
{
return m_effect;
}
std::vector<float_t> const& gfx::blur::gaussian_linear_data::get_kernel(size_t width)
{
if (width < 1)
width = 1;
if (width > MAX_BLUR_SIZE)
width = MAX_BLUR_SIZE;
width -= 1;
return m_kernels[width];
}
gfx::blur::gaussian_linear_factory::gaussian_linear_factory() {}
gfx::blur::gaussian_linear_factory::~gaussian_linear_factory() {}
bool gfx::blur::gaussian_linear_factory::is_type_supported(::gfx::blur::type v)
{
switch (v) {
case ::gfx::blur::type::Area:
return true;
case ::gfx::blur::type::Directional:
return true;
default:
return false;
}
}
std::shared_ptr<::gfx::blur::ibase> gfx::blur::gaussian_linear_factory::create(::gfx::blur::type v)
{
switch (v) {
case ::gfx::blur::type::Area:
return std::make_shared<::gfx::blur::gaussian_linear>();
case ::gfx::blur::type::Directional:
return std::static_pointer_cast<::gfx::blur::gaussian_linear>(std::make_shared<::gfx::blur::gaussian_linear_directional>());
default:
throw std::runtime_error("Invalid type.");
}
}
double_t gfx::blur::gaussian_linear_factory::get_min_size(::gfx::blur::type)
{
return double_t(1.0);
}
double_t gfx::blur::gaussian_linear_factory::get_step_size(::gfx::blur::type)
{
return double_t(1.0);
}
double_t gfx::blur::gaussian_linear_factory::get_max_size(::gfx::blur::type)
{
return double_t(MAX_BLUR_SIZE);
}
double_t gfx::blur::gaussian_linear_factory::get_min_angle(::gfx::blur::type v)
{
switch (v) {
case ::gfx::blur::type::Directional:
case ::gfx::blur::type::Rotational:
return -180.0;
default:
return 0;
}
}
double_t gfx::blur::gaussian_linear_factory::get_step_angle(::gfx::blur::type)
{
return double_t(0.01);
}
double_t gfx::blur::gaussian_linear_factory::get_max_angle(::gfx::blur::type v)
{
switch (v) {
case ::gfx::blur::type::Directional:
case ::gfx::blur::type::Rotational:
return 180.0;
default:
return 0;
}
}
bool gfx::blur::gaussian_linear_factory::is_step_scale_supported(::gfx::blur::type v)
{
switch (v) {
case ::gfx::blur::type::Area:
case ::gfx::blur::type::Zoom:
case ::gfx::blur::type::Directional:
return true;
default:
return false;
}
}
double_t gfx::blur::gaussian_linear_factory::get_min_step_scale_x(::gfx::blur::type)
{
return double_t(0.01);
}
double_t gfx::blur::gaussian_linear_factory::get_step_step_scale_x(::gfx::blur::type)
{
return double_t(0.01);
}
double_t gfx::blur::gaussian_linear_factory::get_max_step_scale_x(::gfx::blur::type)
{
return double_t(1000.0);
}
double_t gfx::blur::gaussian_linear_factory::get_min_step_scale_y(::gfx::blur::type)
{
return double_t(0.01);
}
double_t gfx::blur::gaussian_linear_factory::get_step_step_scale_y(::gfx::blur::type)
{
return double_t(0.01);
}
double_t gfx::blur::gaussian_linear_factory::get_max_step_scale_y(::gfx::blur::type)
{
return double_t(1000.0);
}
std::shared_ptr<::gfx::blur::gaussian_linear_data> gfx::blur::gaussian_linear_factory::data()
{
std::unique_lock<std::mutex> ulock(m_data_lock);
std::shared_ptr<::gfx::blur::gaussian_linear_data> data = m_data.lock();
if (!data) {
data = std::make_shared<::gfx::blur::gaussian_linear_data>();
m_data = data;
}
return data;
}
::gfx::blur::gaussian_linear_factory& gfx::blur::gaussian_linear_factory::get()
{
static ::gfx::blur::gaussian_linear_factory instance;
return instance;
}
gfx::blur::gaussian_linear::gaussian_linear()
: m_size(1.), m_step_scale({1., 1.}), m_data(::gfx::blur::gaussian_linear_factory::get().data())
{
m_rendertarget = std::make_shared<gs::rendertarget>(GS_RGBA, GS_ZS_NONE);
m_rendertarget2 = std::make_shared<gs::rendertarget>(GS_RGBA, GS_ZS_NONE);
}
gfx::blur::gaussian_linear::~gaussian_linear() {}
void gfx::blur::gaussian_linear::set_input(std::shared_ptr<::gs::texture> texture)
{
m_input_texture = texture;
}
::gfx::blur::type gfx::blur::gaussian_linear::get_type()
{
return ::gfx::blur::type::Area;
}
double_t gfx::blur::gaussian_linear::get_size()
{
return m_size;
}
void gfx::blur::gaussian_linear::set_size(double_t width)
{
if (width < 1.)
width = 1.;
if (width > MAX_BLUR_SIZE)
width = MAX_BLUR_SIZE;
m_size = width;
}
void gfx::blur::gaussian_linear::set_step_scale(double_t x, double_t y)
{
m_step_scale.first = x;
m_step_scale.second = y;
}
void gfx::blur::gaussian_linear::get_step_scale(double_t& x, double_t& y)
{
x = m_step_scale.first;
y = m_step_scale.second;
}
double_t gfx::blur::gaussian_linear::get_step_scale_x()
{
return m_step_scale.first;
}
double_t gfx::blur::gaussian_linear::get_step_scale_y()
{
return m_step_scale.second;
}
std::shared_ptr<::gs::texture> gfx::blur::gaussian_linear::render()
{
std::shared_ptr<::gs::effect> effect = m_data->get_effect();
auto kernel = m_data->get_kernel(size_t(m_size));
if (!effect || ((m_step_scale.first + m_step_scale.second) < FLT_EPSILON)) {
return m_input_texture;
}
float_t width = float_t(m_input_texture->get_width());
float_t height = float_t(m_input_texture->get_height());
// Setup
obs_enter_graphics();
gs_set_cull_mode(GS_NEITHER);
gs_enable_color(true, true, true, true);
gs_enable_depth_test(false);
gs_depth_function(GS_ALWAYS);
gs_blend_state_push();
gs_reset_blend_state();
gs_enable_blending(false);
gs_blend_function(GS_BLEND_ONE, GS_BLEND_ZERO);
gs_enable_stencil_test(false);
gs_enable_stencil_write(false);
gs_stencil_function(GS_STENCIL_BOTH, GS_ALWAYS);
gs_stencil_op(GS_STENCIL_BOTH, GS_ZERO, GS_ZERO, GS_ZERO);
effect->get_parameter("pImage").set_texture(m_input_texture);
effect->get_parameter("pStepScale").set_float2(float_t(m_step_scale.first), float_t(m_step_scale.second));
effect->get_parameter("pSize").set_float(float_t(m_size));
effect->get_parameter("pKernel").set_float_array(kernel.data(), MAX_KERNEL_SIZE);
// First Pass
if (m_step_scale.first > FLT_EPSILON) {
effect->get_parameter("pImageTexel").set_float2(float_t(1. / width), 0.);
{
auto op = m_rendertarget2->render(uint32_t(width), uint32_t(height));
gs_ortho(0, 1., 0, 1., 0, 1.);
while (gs_effect_loop(effect->get_object(), "Draw")) {
gs_draw_sprite(0, 0, 1, 1);
}
}
std::swap(m_rendertarget, m_rendertarget2);
effect->get_parameter("pImage").set_texture(m_rendertarget->get_texture());
}
// Second Pass
if (m_step_scale.second > FLT_EPSILON) {
effect->get_parameter("pImageTexel").set_float2(0., float_t(1. / height));
{
auto op = m_rendertarget2->render(uint32_t(width), uint32_t(height));
gs_ortho(0, 1., 0, 1., 0, 1.);
while (gs_effect_loop(effect->get_object(), "Draw")) {
gs_draw_sprite(0, 0, 1, 1);
}
}
std::swap(m_rendertarget, m_rendertarget2);
}
gs_blend_state_pop();
obs_leave_graphics();
return this->get();
}
std::shared_ptr<::gs::texture> gfx::blur::gaussian_linear::get()
{
return m_rendertarget->get_texture();
}
gfx::blur::gaussian_linear_directional::gaussian_linear_directional() : m_angle(0.) {}
gfx::blur::gaussian_linear_directional::~gaussian_linear_directional() {}
::gfx::blur::type gfx::blur::gaussian_linear_directional::get_type()
{
return ::gfx::blur::type::Directional;
}
double_t gfx::blur::gaussian_linear_directional::get_angle()
{
return RAD_TO_DEG(m_angle);
}
void gfx::blur::gaussian_linear_directional::set_angle(double_t angle)
{
m_angle = DEG_TO_RAD(angle);
}
std::shared_ptr<::gs::texture> gfx::blur::gaussian_linear_directional::render()
{
std::shared_ptr<::gs::effect> effect = m_data->get_effect();
auto kernel = m_data->get_kernel(size_t(m_size));
if (!effect || ((m_step_scale.first + m_step_scale.second) < FLT_EPSILON)) {
return m_input_texture;
}
float_t width = float_t(m_input_texture->get_width());
float_t height = float_t(m_input_texture->get_height());
// Setup
obs_enter_graphics();
gs_set_cull_mode(GS_NEITHER);
gs_enable_color(true, true, true, true);
gs_enable_depth_test(false);
gs_depth_function(GS_ALWAYS);
gs_blend_state_push();
gs_reset_blend_state();
gs_enable_blending(false);
gs_blend_function(GS_BLEND_ONE, GS_BLEND_ZERO);
gs_enable_stencil_test(false);
gs_enable_stencil_write(false);
gs_stencil_function(GS_STENCIL_BOTH, GS_ALWAYS);
gs_stencil_op(GS_STENCIL_BOTH, GS_ZERO, GS_ZERO, GS_ZERO);
effect->get_parameter("pImage").set_texture(m_input_texture);
effect->get_parameter("pImageTexel")
.set_float2(float_t(1. / width * cos(m_angle)), float_t(1. / height * sin(m_angle)));
effect->get_parameter("pStepScale").set_float2(float_t(m_step_scale.first), float_t(m_step_scale.second));
effect->get_parameter("pSize").set_float(float_t(m_size));
effect->get_parameter("pKernel").set_float_array(kernel.data(), MAX_KERNEL_SIZE);
// First Pass
{
auto op = m_rendertarget->render(uint32_t(width), uint32_t(height));
gs_ortho(0, 1., 0, 1., 0, 1.);
while (gs_effect_loop(effect->get_object(), "Draw")) {
gs_draw_sprite(0, 0, 1, 1);
}
}
gs_blend_state_pop();
obs_leave_graphics();
return this->get();
}

View file

@ -0,0 +1,138 @@
// Modern effects for a modern Streamer
// Copyright (C) 2019 Michael Fabian Dirks
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
#pragma once
#include <mutex>
#include <vector>
#include "gfx-blur-base.hpp"
#include "obs/gs/gs-effect.hpp"
#include "obs/gs/gs-rendertarget.hpp"
#include "obs/gs/gs-texture.hpp"
namespace gfx {
namespace blur {
class gaussian_linear_data {
std::shared_ptr<::gs::effect> m_effect;
std::vector<std::vector<float_t>> m_kernels;
public:
gaussian_linear_data();
~gaussian_linear_data();
std::shared_ptr<::gs::effect> get_effect();
std::vector<float_t> const& get_kernel(size_t width);
};
class gaussian_linear_factory : public ::gfx::blur::ifactory {
std::mutex m_data_lock;
std::weak_ptr<::gfx::blur::gaussian_linear_data> m_data;
public:
gaussian_linear_factory();
virtual ~gaussian_linear_factory();
virtual bool is_type_supported(::gfx::blur::type type) override;
virtual std::shared_ptr<::gfx::blur::ibase> create(::gfx::blur::type type) override;
virtual double_t get_min_size(::gfx::blur::type type) override;
virtual double_t get_step_size(::gfx::blur::type type) override;
virtual double_t get_max_size(::gfx::blur::type type) override;
virtual double_t get_min_angle(::gfx::blur::type type) override;
virtual double_t get_step_angle(::gfx::blur::type type) override;
virtual double_t get_max_angle(::gfx::blur::type type) override;
virtual bool is_step_scale_supported(::gfx::blur::type type) override;
virtual double_t get_min_step_scale_x(::gfx::blur::type type) override;
virtual double_t get_step_step_scale_x(::gfx::blur::type type) override;
virtual double_t get_max_step_scale_x(::gfx::blur::type type) override;
virtual double_t get_min_step_scale_y(::gfx::blur::type type) override;
virtual double_t get_step_step_scale_y(::gfx::blur::type type) override;
virtual double_t get_max_step_scale_y(::gfx::blur::type type) override;
std::shared_ptr<::gfx::blur::gaussian_linear_data> data();
public: // Singleton
static ::gfx::blur::gaussian_linear_factory& get();
};
class gaussian_linear : public ::gfx::blur::ibase {
protected:
std::shared_ptr<::gfx::blur::gaussian_linear_data> m_data;
double_t m_size;
std::pair<double_t, double_t> m_step_scale;
std::shared_ptr<::gs::texture> m_input_texture;
std::shared_ptr<::gs::rendertarget> m_rendertarget;
private:
std::shared_ptr<::gs::rendertarget> m_rendertarget2;
public:
gaussian_linear();
virtual ~gaussian_linear();
virtual void set_input(std::shared_ptr<::gs::texture> texture) override;
virtual ::gfx::blur::type get_type() override;
virtual double_t get_size() override;
virtual void set_size(double_t width) override;
virtual void set_step_scale(double_t x, double_t y) override;
virtual void get_step_scale(double_t& x, double_t& y) override;
virtual double_t get_step_scale_x() override;
virtual double_t get_step_scale_y() override;
virtual std::shared_ptr<::gs::texture> render() override;
virtual std::shared_ptr<::gs::texture> get() override;
};
class gaussian_linear_directional : public ::gfx::blur::gaussian_linear, public ::gfx::blur::ibase_angle {
double_t m_angle;
public:
gaussian_linear_directional();
virtual ~gaussian_linear_directional();
virtual ::gfx::blur::type get_type() override;
virtual double_t get_angle() override;
virtual void set_angle(double_t angle) override;
virtual std::shared_ptr<::gs::texture> render() override;
};
} // namespace blur
} // namespace gfx