mirror of
synced 2024-12-28 18:41:14 +00:00
filter-blur: Use only a single Gaussian Kernel Texture
This commit is contained in:
3 changed files with 77 additions and 147 deletions
@ -14,20 +14,13 @@ uniform float2 u_texelDelta;
uniform texture2d kernel;
uniform float2 kernelTexel;
sampler_state pointClampSampler {
sampler_state textureSampler {
Filter = Point;
AddressU = Clamp;
AddressV = Clamp;
MinLOD = 0;
MaxLOD = 0;
sampler_state bilinearClampSampler {
Filter = Bilinear;
AddressU = Clamp;
AddressV = Clamp;
MinLOD = 0;
MaxLOD = 0;
struct VertDataIn {
float4 pos : POSITION;
@ -47,81 +40,18 @@ VertDataOut VSDefault(VertDataIn v_in)
return vert_out;
// Gaussian Blur
float Gaussian(float x, float o) {
const float pivalue = 3.1415926535897932384626433832795;
return (1.0 / (o * sqrt(2.0 * pivalue))) * exp((-(x * x)) / (2 * (o * o)));
float4 InternalGaussian(float2 p_uv, float2 p_uvStep, int p_radius,
texture2d p_image, float2 p_imageTexel) {
float l_gauss = Gaussian(0, p_size);
float4 l_value = p_image.Sample(pointClampSampler, p_uv) * gauss;
float2 l_uvoffset = float2(0, 0);
for (int k = 1; k <= p_size; k++) {
l_uvoffset += p_uvStep;
float l_g = Gaussian(k, p_size);
float4 l_p = p_image.Sample(pointClampSampler, p_uv + l_uvoffset) * l_g;
float4 l_n = p_image.Sample(pointClampSampler, p_uv - l_uvoffset) * l_g;
l_value += l_p + l_n;
l_gauss += l_g;
l_value = l_value * (1.0 / l_gauss);
return l_value;
float4 InternalGaussianPrecalculated(float2 p_uv, float2 p_uvStep, int p_radius,
texture2d p_image, float2 p_imageTexel,
texture2d p_kernel, float2 p_kernelTexel) {
float4 l_value = p_image.Sample(pointClampSampler, p_uv)
* kernel.Sample(pointClampSampler, float2(0, 0)).r;
float2 l_uvoffset = float2(0, 0);
for (int k = 1; k <= p_radius; k++) {
l_uvoffset += p_uvStep;
float l_g = p_kernel.Sample(pointClampSampler, p_kernelTexel * k).r;
float4 l_p = p_image.Sample(pointClampSampler, p_uv + l_uvoffset) * l_g;
float4 l_n = p_image.Sample(pointClampSampler, p_uv - l_uvoffset) * l_g;
l_value += l_p + l_n;
return l_value;
/*float4 InternalGaussianPrecalculatedNVOptimized(float2 p_uv, int p_size,
texture2d p_image, float2 p_imageTexel,
texture2d p_kernel, float2 p_kernelTexel) {
if (p_size % 2 == 0) {
float4 l_value = p_image.Sample(pointClampSampler, p_uv)
* kernel.Sample(pointClampSampler, float2(0, 0)).r;
float2 l_uvoffset = p_texel;
float2 l_koffset = p_kernelTexel;
for (int k = 1; k <= p_size; k++) {
float l_g = p_kernel.Sample(pointClampSampler, l_koffset).r;
float4 l_p = p_image.Sample(pointClampSampler, p_uv + l_uvoffset) * l_g;
float4 l_n = p_image.Sample(pointClampSampler, p_uv - l_uvoffset) * l_g;
l_value += l_p + l_n;
l_uvoffset += p_texel;
l_koffset += p_kernelTexel;
return l_value;
} else {
return InternalGaussianPrecalculated(p_uv, p_image, p_texel, p_size, p_kernel, p_kerneltexel);)
float4 PSGaussian(VertDataOut v_in) : TARGET {
//return InternalGaussian(v_in.uv, u_texelDelta, u_image, u_imageTexel, u_radius);
return InternalGaussianPrecalculated(
v_in.uv, u_texelDelta, u_radius,
u_image, u_imageTexel,
kernel, kernelTexel);//*/
return InternalGaussianPrecalculatedNVOptimize(
v_in.uv, u_texelDelta, u_radius,
u_image, u_imageTexel,
kernel, kernelTexel);//*/
float2 uvOffset = float2(0, 0);
float4 rgba = u_image.SampleLevel(textureSampler, v_in.uv, 0)
* kernel.SampleLevel(textureSampler, (float2(0, u_radius - 1) * kernelTexel), 0).r;
for (int k = 1; k <= u_radius; k++) {
uvOffset += u_texelDelta;
float l_g = kernel.SampleLevel(textureSampler, (float2(k, u_radius - 1) * kernelTexel), 0).r;
float4 l_p = u_image.SampleLevel(textureSampler, v_in.uv + uvOffset, 0) * l_g;
float4 l_n = u_image.SampleLevel(textureSampler, v_in.uv - uvOffset, 0) * l_g;
rgba += l_p + l_n;
return rgba;
technique Draw
@ -22,6 +22,7 @@
#include "util-math.h"
#include <math.h>
#include <map>
#include <inttypes.h>
extern "C" {
#pragma warning (push)
@ -63,50 +64,6 @@ enum ColorFormat : uint64_t {
// Global Data
const size_t MaxKernelSize = 25;
std::map<std::string, std::shared_ptr<gs::effect>> g_effects;
std::vector<std::shared_ptr<gs::texture>> g_gaussianKernels;
double_t bilateral(double_t x, double_t o) {
return 0.39894 * exp(-0.5 * (x * x) / (o * o)) / o;
static void GenerateGaussianKernelTextures() {
size_t textureBufferSize = GetNearestPowerOfTwoAbove(MaxKernelSize);
std::vector<double_t> mathBuffer(MaxKernelSize + 1);
std::vector<float_t> textureBuffer(textureBufferSize);
for (size_t n = 0; n < MaxKernelSize; n++) {
size_t width = 2 + n;
// Generate Gaussian Gradient and calculate sum.
double_t sum = 0;
for (size_t p = 0; p < width; p++) {
mathBuffer[p] = Gaussian1D(double_t(p), double_t(n + 1));
sum += mathBuffer[p] * (p > 0 ? 2 : 1);
// Normalize
double_t inverseSum = 1.0 / sum;
for (size_t p = 0; p < width; p++) {
textureBuffer[p] = float_t(mathBuffer[p] * inverseSum);
// Create Texture
uint8_t* data = reinterpret_cast<uint8_t*>(textureBuffer.data());
const uint8_t** pdata = const_cast<const uint8_t**>(&data);
try {
std::shared_ptr<gs::texture> tex = std::make_shared<gs::texture>((uint32_t)textureBufferSize, 1,
gs_color_format::GS_R32F, 1, pdata, 0);
g_gaussianKernels[n] = tex;
} catch (std::runtime_error ex) {
P_LOG_ERROR("<filter-blur> Failed to create gaussian kernel for %d width.", n);
Filter::Blur::Blur() {
memset(&m_sourceInfo, 0, sizeof(obs_source_info));
m_sourceInfo.id = "obs-stream-effects-filter-blur";
@ -135,7 +92,7 @@ Filter::Blur::Blur() {
for (auto& kv : effects) {
try {
std::shared_ptr<gs::effect> effect = std::make_shared<gs::effect>(kv.second);
g_effects.insert(std::make_pair(kv.first, effect));
m_effects.insert(std::make_pair(kv.first, effect));
} catch (std::runtime_error ex) {
P_LOG_ERROR("<filter-blur> Loading effect '%s' (path: '%s') failed with error(s): %s",
kv.first.c_str(), kv.second.c_str(), ex.what());
@ -143,15 +100,52 @@ Filter::Blur::Blur() {
Filter::Blur::~Blur() {
void Filter::Blur::generate_gaussian_kernels() {
// 2D texture, horizontal is value, vertical is kernel size.
size_t textureSizePOT = GetNearestPowerOfTwoAbove(max_kernel_size);
std::vector<float_t> textureBuffer(textureSizePOT * textureSizePOT);
std::vector<float_t> mathBuffer(textureSizePOT);
for (size_t width = 1; width <= max_kernel_size; width++) {
size_t v = (width - 1) * textureSizePOT;
// Calculate and normalize
float_t sum = 0;
for (size_t p = 0; p <= width; p++) {
mathBuffer[p] = float_t(Gaussian1D(double_t(p), double_t(width)));
sum += mathBuffer[p] * (p > 0 ? 2 : 1);
// Normalize to Texture Buffer
double_t inverseSum = 1.0 / sum;
for (size_t p = 0; p <= width; p++) {
textureBuffer[v + p] = float_t(mathBuffer[p] * inverseSum);
// Create Texture
try {
auto buf = reinterpret_cast<uint8_t*>(textureBuffer.data());
auto rbuf = const_cast<const uint8_t**>(&buf);
m_gaussianKernelTexture = std::make_shared<gs::texture>(uint32_t(textureSizePOT), uint32_t(textureSizePOT), GS_R32F, 1, rbuf, 0);
} catch (std::runtime_error ex) {
P_LOG_ERROR("<filter-blur> Failed to create gaussian kernel texture.");
void Filter::Blur::generate_kernel_textures() {
const char * Filter::Blur::get_name(void *) {
@ -271,7 +265,7 @@ void Filter::Blur::video_render(void *ptr, gs_effect_t *effect) {
Filter::Blur::Instance::Instance(obs_data_t *data, obs_source_t *context) : m_source(context) {
m_effect = g_effects.at("Box Blur");
m_effect = filterBlurInstance->m_effects.at("Box Blur");
m_primaryRT = gs_texrender_create(GS_RGBA, GS_ZS_NONE);
m_secondaryRT = gs_texrender_create(GS_RGBA, GS_ZS_NONE);
m_rtHorizontal = gs_texrender_create(GS_RGBA, GS_ZS_NONE);
@ -303,13 +297,13 @@ void Filter::Blur::Instance::update(obs_data_t *data) {
m_type = (Type)obs_data_get_int(data, S_TYPE);
switch (m_type) {
case Filter::Blur::Type::Box:
m_effect = g_effects.at("Box Blur");
m_effect = filterBlurInstance->m_effects.at("Box Blur");
case Filter::Blur::Type::Gaussian:
m_effect = g_effects.at("Gaussian Blur");
m_effect = filterBlurInstance->m_effects.at("Gaussian Blur");
case Filter::Blur::Type::Bilateral:
m_effect = g_effects.at("Bilateral Blur");
m_effect = filterBlurInstance->m_effects.at("Bilateral Blur");
m_size = (uint64_t)obs_data_get_int(data, S_SIZE);
@ -345,7 +339,7 @@ void Filter::Blur::Instance::video_render(gs_effect_t *effect) {
baseW = obs_source_get_base_width(target),
baseH = obs_source_get_base_height(target);
gs_effect_t* colorConversionEffect = g_effects.count("Color Conversion") ? g_effects.at("Color Conversion")->get_object() : nullptr;
gs_effect_t* colorConversionEffect = filterBlurInstance->m_effects.count("Color Conversion") ? filterBlurInstance->m_effects.at("Color Conversion")->get_object() : nullptr;
// Skip rendering if our target, parent or context is not valid.
if (!target || !parent || !m_source) {
@ -615,20 +609,18 @@ bool Filter::Blur::Instance::apply_bilateral_param() {
bool Filter::Blur::Instance::apply_gaussian_param() {
bool result = true;
if (m_type != Type::Gaussian)
if (m_effect->has_parameter("kernel")) {
} else {
return false;
std::shared_ptr<gs::texture> tex;
if ((m_size - 1) < MaxKernelSize) {
tex = g_gaussianKernels[size_t(m_size - 1)];
result = result && gs_set_param_texture(m_effect->get_object(), "kernel", tex->get_object());
vec2 kerneltexel;
vec2_set(&kerneltexel, 1.0f / gs_texture_get_width(tex->get_object()), 0);
result = result && gs_set_param_float2(m_effect->get_object(), "kernelTexel", &kerneltexel);
if (m_effect->has_parameter("kernelTexel")) {
auto tex = filterBlurInstance->m_gaussianKernelTexture->get_object();
float_t wb = 1.0f / gs_texture_get_width(tex);
float_t hb = 1.0f / gs_texture_get_height(tex);
m_effect->get_parameter("kernelTexel").set_float2(wb, hb);
return result;
return true;
@ -31,6 +31,9 @@ namespace Filter {
void generate_gaussian_kernels();
void generate_kernel_textures();
enum Type : int64_t {
@ -38,9 +41,14 @@ namespace Filter {
std::shared_ptr<gs::texture> m_gaussianKernelTexture;
std::map<std::string, std::shared_ptr<gs::effect>> m_effects;
obs_source_info m_sourceInfo;
static const size_t max_kernel_size = 25;
public /*static*/:
static const char *get_name(void *);
static void get_defaults(obs_data_t *);
Reference in a new issue