filter-blur: Switch to Kernel Array instead of Kernel Texture

This speeds up Gaussian Blur and Linear Gaussian Blur drastically reduces time spent reading textures and instead uses existing registers - maximizing time spent reading the actual image texture.

See Also: #21 Blur Quality
This commit is contained in:
Michael Fabian 'Xaymar' Dirks 2018-12-23 02:00:30 +01:00
parent 92c4b54177
commit a6f9451654
3 changed files with 37 additions and 28 deletions

View file

@ -10,9 +10,7 @@ uniform int u_diameter;
uniform float2 u_texelDelta;
// Kernel Settings
//uniform float registerkernel[25];
uniform texture2d kernel;
uniform float2 kernelTexel;
uniform float4 kernel[8]; // max kernel radius 31+center.
// Bilateral Settings
uniform float bilateralSmoothing;
@ -53,6 +51,11 @@ VertDataOut VSDefault(VertDataIn vtx)
return vert_out;
}
/// Utility
float GetKernelAt(int i) {
return ((float[4])(kernel[floor(i/4)]))[i%4];
}
/// Blur: Box
float4 PSBoxBlur(VertDataOut vtx) : TARGET {
float4 origin = u_image.SampleLevel(pointSampler, vtx.uv, 0);
@ -131,14 +134,13 @@ technique BoxLinear
}
/// Blur: Gaussian
// ToDo: Switch to array Kernel instead of Texture kernel.
float4 PSGaussianBlur(VertDataOut vtx) : TARGET {
float2 uvOffset = float2(0, 0);
float4 final = u_image.SampleLevel(pointSampler, vtx.uv, 0)
* kernel.SampleLevel(pointSampler, (float2(0, u_radius - 1) * kernelTexel), 0).r;
* GetKernelAt(0);
for (int k = 1; k <= u_radius; k++) {
uvOffset += u_texelDelta;
float l_g = kernel.SampleLevel(pointSampler, (float2(k, u_radius - 1) * kernelTexel), 0).r;
float l_g = GetKernelAt(k);
float4 l_p = u_image.SampleLevel(pointSampler, vtx.uv + uvOffset, 0);
float4 l_n = u_image.SampleLevel(pointSampler, vtx.uv - uvOffset, 0);
final += (l_p + l_n) * l_g;
@ -180,15 +182,15 @@ float4 PSGaussianLinearBlur(VertDataOut vtx) : TARGET {
// [-2, -1, 0, +1, +2]
// ^-S-^ S ^-S-^
// Total Samples: 3 (n+1)
float4 origin = u_image.SampleLevel(pointSampler, vtx.uv, 0);
float4 final = origin * kernel.SampleLevel(pointSampler, (float2(0, u_radius - 1) * kernelTexel), 0).r;
float4 final = origin * GetKernelAt(0);
float2 halfTexelDelta = u_texelDelta / 2.0;
for (int k = 1; k < u_radius; k+=2) {
float2 offset = k * u_texelDelta + halfTexelDelta;
float l_g0 = kernel.SampleLevel(pointSampler, (float2(k, u_radius - 1) * kernelTexel), 0).r;
float l_g1 = kernel.SampleLevel(pointSampler, (float2(k + 1, u_radius - 1) * kernelTexel), 0).r;
float l_g0 = GetKernelAt(k);
float l_g1 = GetKernelAt(k +1);
float4 l_p = u_image.SampleLevel(linearSampler, vtx.uv + offset, 0);
float4 l_n = u_image.SampleLevel(linearSampler, vtx.uv - offset, 0);
final += (l_p + l_n) * l_g0;
@ -199,7 +201,7 @@ float4 PSGaussianLinearBlur(VertDataOut vtx) : TARGET {
// Odd numbers require treatment of ends.
float4 left = u_image.SampleLevel(pointSampler, vtx.uv + u_texelDelta * u_radius, 0);
float4 right = u_image.SampleLevel(pointSampler, vtx.uv - u_texelDelta * u_radius, 0);
float krn = kernel.SampleLevel(pointSampler, (float2(u_radius, u_radius - 1) * kernelTexel), 0).r;
float krn = GetKernelAt(u_radius);
final += (left + right) * krn;
}

View file

@ -120,18 +120,12 @@ bool filter::blur::blur_instance::apply_bilateral_param()
return true;
}
bool filter::blur::blur_instance::apply_gaussian_param()
bool filter::blur::blur_instance::apply_gaussian_param(uint8_t width)
{
std::shared_ptr<gs::texture> kernel = filter::blur::blur_factory::get()->get_kernel(filter::blur::type::Gaussian);
auto kernel = filter::blur::blur_factory::get()->get_gaussian_kernel(width);
if (blur_effect->has_parameter("kernel")) {
blur_effect->get_parameter("kernel").set_texture(kernel);
}
if (blur_effect->has_parameter("kernelTexel")) {
float_t wb = 1.0f / kernel->get_width();
float_t hb = 1.0f / kernel->get_height();
blur_effect->get_parameter("kernelTexel").set_float2(wb, hb);
blur_effect->get_parameter("kernel").set_float_array(&(kernel->front()), kernel->size());
}
return true;
@ -604,7 +598,7 @@ void filter::blur::blur_instance::video_render(gs_effect_t* effect)
}
#pragma endregion RGB->YUV
#pragma region blur
#pragma region Blur
// Set up camera stuff
gs_set_cull_mode(GS_NEITHER);
gs_reset_blend_state();
@ -630,7 +624,7 @@ void filter::blur::blur_instance::video_render(gs_effect_t* effect)
if (!apply_shared_param(intermediate, xpel, ypel))
break;
apply_gaussian_param();
apply_gaussian_param(this->size);
apply_bilateral_param();
gs_texrender_reset(rt);
@ -853,11 +847,13 @@ void filter::blur::blur_factory::generate_gaussian_kernels()
// 2D texture, horizontal is value, vertical is kernel size.
size_t size_power_of_two = size_t(pow(2, util::math::get_power_of_two_exponent_ceil(max_kernel_size)));
std::vector<float_t> texture_Data(size_power_of_two * size_power_of_two);
std::vector<float_t> math_data(size_power_of_two);
std::vector<float_t> texture_data(size_power_of_two * size_power_of_two);
std::vector<float_t> math_data(size_power_of_two);
std::shared_ptr<std::vector<float_t>> kernel_data;
for (size_t width = 1; width <= max_kernel_size; width++) {
size_t v = (width - 1) * size_power_of_two;
size_t v = (width - 1) * size_power_of_two;
kernel_data = std::make_shared<std::vector<float_t>>(size_power_of_two);
// Calculate and normalize
float_t sum = 0;
@ -869,13 +865,16 @@ void filter::blur::blur_factory::generate_gaussian_kernels()
// Normalize to Texture Buffer
double_t inverse_sum = 1.0 / sum;
for (size_t p = 0; p <= width; p++) {
texture_Data[v + p] = float_t(math_data[p] * inverse_sum);
texture_data[v + p] = float_t(math_data[p] * inverse_sum);
kernel_data->at(p) = texture_data[v + p];
}
gaussian_kernels.insert({uint8_t(width), kernel_data});
}
// Create Texture
try {
auto texture_buffer = reinterpret_cast<uint8_t*>(texture_Data.data());
auto texture_buffer = reinterpret_cast<uint8_t*>(texture_data.data());
auto unsafe_buffer = const_cast<const uint8_t**>(&texture_buffer);
kernels.insert_or_assign(filter::blur::type::Gaussian,
@ -1046,6 +1045,11 @@ std::shared_ptr<gs::texture> filter::blur::blur_factory::get_kernel(filter::blur
return kernels.at(type);
}
std::shared_ptr<std::vector<float_t>> filter::blur::blur_factory::get_gaussian_kernel(uint8_t size)
{
return gaussian_kernels.at(size);
}
obs_scene_t* filter::blur::blur_factory::get_scene(std::string name)
{
auto kv = scenes.find(name);

View file

@ -111,7 +111,7 @@ namespace filter {
bool apply_shared_param(gs_texture_t* input, float texelX, float texelY);
bool apply_bilateral_param();
bool apply_gaussian_param();
bool apply_gaussian_param(uint8_t width);
bool apply_mask_parameters(std::shared_ptr<gs::effect> effect, gs_texture_t* original_texture,
gs_texture_t* blurred_texture);
@ -143,6 +143,7 @@ namespace filter {
std::shared_ptr<gs::effect> blur_effect;
std::map<filter::blur::type, std::shared_ptr<gs::texture>> kernels;
std::map<uint8_t, std::shared_ptr<std::vector<float_t>>> gaussian_kernels;
std::map<std::string, obs_scene_t*> scenes;
@ -188,6 +189,8 @@ namespace filter {
std::shared_ptr<gs::texture> get_kernel(filter::blur::type type);
std::shared_ptr<std::vector<float_t>> get_gaussian_kernel(uint8_t size);
obs_scene_t* get_scene(std::string name);
void enum_scenes(std::function<bool(obs_scene_t*)> fnc);