From a6f945165485b9657311bec733800c68b8a57c8b Mon Sep 17 00:00:00 2001 From: Michael Fabian 'Xaymar' Dirks Date: Sun, 23 Dec 2018 02:00:30 +0100 Subject: [PATCH] filter-blur: Switch to Kernel Array instead of Kernel Texture This speeds up Gaussian Blur and Linear Gaussian Blur drastically reduces time spent reading textures and instead uses existing registers - maximizing time spent reading the actual image texture. See Also: #21 Blur Quality --- data/effects/blur.effect | 24 +++++++++++++----------- source/filter-blur.cpp | 36 ++++++++++++++++++++---------------- source/filter-blur.h | 5 ++++- 3 files changed, 37 insertions(+), 28 deletions(-) diff --git a/data/effects/blur.effect b/data/effects/blur.effect index 7b7e7d3b..36818fea 100644 --- a/data/effects/blur.effect +++ b/data/effects/blur.effect @@ -10,9 +10,7 @@ uniform int u_diameter; uniform float2 u_texelDelta; // Kernel Settings -//uniform float registerkernel[25]; -uniform texture2d kernel; -uniform float2 kernelTexel; +uniform float4 kernel[8]; // max kernel radius 31+center. // Bilateral Settings uniform float bilateralSmoothing; @@ -53,6 +51,11 @@ VertDataOut VSDefault(VertDataIn vtx) return vert_out; } +/// Utility +float GetKernelAt(int i) { + return ((float[4])(kernel[floor(i/4)]))[i%4]; +} + /// Blur: Box float4 PSBoxBlur(VertDataOut vtx) : TARGET { float4 origin = u_image.SampleLevel(pointSampler, vtx.uv, 0); @@ -131,14 +134,13 @@ technique BoxLinear } /// Blur: Gaussian -// ToDo: Switch to array Kernel instead of Texture kernel. float4 PSGaussianBlur(VertDataOut vtx) : TARGET { float2 uvOffset = float2(0, 0); float4 final = u_image.SampleLevel(pointSampler, vtx.uv, 0) - * kernel.SampleLevel(pointSampler, (float2(0, u_radius - 1) * kernelTexel), 0).r; + * GetKernelAt(0); for (int k = 1; k <= u_radius; k++) { uvOffset += u_texelDelta; - float l_g = kernel.SampleLevel(pointSampler, (float2(k, u_radius - 1) * kernelTexel), 0).r; + float l_g = GetKernelAt(k); float4 l_p = u_image.SampleLevel(pointSampler, vtx.uv + uvOffset, 0); float4 l_n = u_image.SampleLevel(pointSampler, vtx.uv - uvOffset, 0); final += (l_p + l_n) * l_g; @@ -180,15 +182,15 @@ float4 PSGaussianLinearBlur(VertDataOut vtx) : TARGET { // [-2, -1, 0, +1, +2] // ^-S-^ S ^-S-^ // Total Samples: 3 (n+1) - + float4 origin = u_image.SampleLevel(pointSampler, vtx.uv, 0); - float4 final = origin * kernel.SampleLevel(pointSampler, (float2(0, u_radius - 1) * kernelTexel), 0).r; + float4 final = origin * GetKernelAt(0); float2 halfTexelDelta = u_texelDelta / 2.0; for (int k = 1; k < u_radius; k+=2) { float2 offset = k * u_texelDelta + halfTexelDelta; - float l_g0 = kernel.SampleLevel(pointSampler, (float2(k, u_radius - 1) * kernelTexel), 0).r; - float l_g1 = kernel.SampleLevel(pointSampler, (float2(k + 1, u_radius - 1) * kernelTexel), 0).r; + float l_g0 = GetKernelAt(k); + float l_g1 = GetKernelAt(k +1); float4 l_p = u_image.SampleLevel(linearSampler, vtx.uv + offset, 0); float4 l_n = u_image.SampleLevel(linearSampler, vtx.uv - offset, 0); final += (l_p + l_n) * l_g0; @@ -199,7 +201,7 @@ float4 PSGaussianLinearBlur(VertDataOut vtx) : TARGET { // Odd numbers require treatment of ends. float4 left = u_image.SampleLevel(pointSampler, vtx.uv + u_texelDelta * u_radius, 0); float4 right = u_image.SampleLevel(pointSampler, vtx.uv - u_texelDelta * u_radius, 0); - float krn = kernel.SampleLevel(pointSampler, (float2(u_radius, u_radius - 1) * kernelTexel), 0).r; + float krn = GetKernelAt(u_radius); final += (left + right) * krn; } diff --git a/source/filter-blur.cpp b/source/filter-blur.cpp index 70b6f8a2..a88918b6 100644 --- a/source/filter-blur.cpp +++ b/source/filter-blur.cpp @@ -120,18 +120,12 @@ bool filter::blur::blur_instance::apply_bilateral_param() return true; } -bool filter::blur::blur_instance::apply_gaussian_param() +bool filter::blur::blur_instance::apply_gaussian_param(uint8_t width) { - std::shared_ptr kernel = filter::blur::blur_factory::get()->get_kernel(filter::blur::type::Gaussian); + auto kernel = filter::blur::blur_factory::get()->get_gaussian_kernel(width); if (blur_effect->has_parameter("kernel")) { - blur_effect->get_parameter("kernel").set_texture(kernel); - } - - if (blur_effect->has_parameter("kernelTexel")) { - float_t wb = 1.0f / kernel->get_width(); - float_t hb = 1.0f / kernel->get_height(); - blur_effect->get_parameter("kernelTexel").set_float2(wb, hb); + blur_effect->get_parameter("kernel").set_float_array(&(kernel->front()), kernel->size()); } return true; @@ -604,7 +598,7 @@ void filter::blur::blur_instance::video_render(gs_effect_t* effect) } #pragma endregion RGB->YUV -#pragma region blur +#pragma region Blur // Set up camera stuff gs_set_cull_mode(GS_NEITHER); gs_reset_blend_state(); @@ -630,7 +624,7 @@ void filter::blur::blur_instance::video_render(gs_effect_t* effect) if (!apply_shared_param(intermediate, xpel, ypel)) break; - apply_gaussian_param(); + apply_gaussian_param(this->size); apply_bilateral_param(); gs_texrender_reset(rt); @@ -853,11 +847,13 @@ void filter::blur::blur_factory::generate_gaussian_kernels() // 2D texture, horizontal is value, vertical is kernel size. size_t size_power_of_two = size_t(pow(2, util::math::get_power_of_two_exponent_ceil(max_kernel_size))); - std::vector texture_Data(size_power_of_two * size_power_of_two); - std::vector math_data(size_power_of_two); + std::vector texture_data(size_power_of_two * size_power_of_two); + std::vector math_data(size_power_of_two); + std::shared_ptr> kernel_data; for (size_t width = 1; width <= max_kernel_size; width++) { - size_t v = (width - 1) * size_power_of_two; + size_t v = (width - 1) * size_power_of_two; + kernel_data = std::make_shared>(size_power_of_two); // Calculate and normalize float_t sum = 0; @@ -869,13 +865,16 @@ void filter::blur::blur_factory::generate_gaussian_kernels() // Normalize to Texture Buffer double_t inverse_sum = 1.0 / sum; for (size_t p = 0; p <= width; p++) { - texture_Data[v + p] = float_t(math_data[p] * inverse_sum); + texture_data[v + p] = float_t(math_data[p] * inverse_sum); + kernel_data->at(p) = texture_data[v + p]; } + + gaussian_kernels.insert({uint8_t(width), kernel_data}); } // Create Texture try { - auto texture_buffer = reinterpret_cast(texture_Data.data()); + auto texture_buffer = reinterpret_cast(texture_data.data()); auto unsafe_buffer = const_cast(&texture_buffer); kernels.insert_or_assign(filter::blur::type::Gaussian, @@ -1046,6 +1045,11 @@ std::shared_ptr filter::blur::blur_factory::get_kernel(filter::blur return kernels.at(type); } +std::shared_ptr> filter::blur::blur_factory::get_gaussian_kernel(uint8_t size) +{ + return gaussian_kernels.at(size); +} + obs_scene_t* filter::blur::blur_factory::get_scene(std::string name) { auto kv = scenes.find(name); diff --git a/source/filter-blur.h b/source/filter-blur.h index 0143582c..2abf10ba 100644 --- a/source/filter-blur.h +++ b/source/filter-blur.h @@ -111,7 +111,7 @@ namespace filter { bool apply_shared_param(gs_texture_t* input, float texelX, float texelY); bool apply_bilateral_param(); - bool apply_gaussian_param(); + bool apply_gaussian_param(uint8_t width); bool apply_mask_parameters(std::shared_ptr effect, gs_texture_t* original_texture, gs_texture_t* blurred_texture); @@ -143,6 +143,7 @@ namespace filter { std::shared_ptr blur_effect; std::map> kernels; + std::map>> gaussian_kernels; std::map scenes; @@ -188,6 +189,8 @@ namespace filter { std::shared_ptr get_kernel(filter::blur::type type); + std::shared_ptr> get_gaussian_kernel(uint8_t size); + obs_scene_t* get_scene(std::string name); void enum_scenes(std::function fnc);