mirror of
https://github.com/Xaymar/obs-StreamFX
synced 2024-11-14 07:45:06 +00:00
filter-blur: Switch to Kernel Array instead of Kernel Texture
This speeds up Gaussian Blur and Linear Gaussian Blur drastically reduces time spent reading textures and instead uses existing registers - maximizing time spent reading the actual image texture. See Also: #21 Blur Quality
This commit is contained in:
parent
92c4b54177
commit
a6f9451654
3 changed files with 37 additions and 28 deletions
|
@ -10,9 +10,7 @@ uniform int u_diameter;
|
||||||
uniform float2 u_texelDelta;
|
uniform float2 u_texelDelta;
|
||||||
|
|
||||||
// Kernel Settings
|
// Kernel Settings
|
||||||
//uniform float registerkernel[25];
|
uniform float4 kernel[8]; // max kernel radius 31+center.
|
||||||
uniform texture2d kernel;
|
|
||||||
uniform float2 kernelTexel;
|
|
||||||
|
|
||||||
// Bilateral Settings
|
// Bilateral Settings
|
||||||
uniform float bilateralSmoothing;
|
uniform float bilateralSmoothing;
|
||||||
|
@ -53,6 +51,11 @@ VertDataOut VSDefault(VertDataIn vtx)
|
||||||
return vert_out;
|
return vert_out;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Utility
|
||||||
|
float GetKernelAt(int i) {
|
||||||
|
return ((float[4])(kernel[floor(i/4)]))[i%4];
|
||||||
|
}
|
||||||
|
|
||||||
/// Blur: Box
|
/// Blur: Box
|
||||||
float4 PSBoxBlur(VertDataOut vtx) : TARGET {
|
float4 PSBoxBlur(VertDataOut vtx) : TARGET {
|
||||||
float4 origin = u_image.SampleLevel(pointSampler, vtx.uv, 0);
|
float4 origin = u_image.SampleLevel(pointSampler, vtx.uv, 0);
|
||||||
|
@ -131,14 +134,13 @@ technique BoxLinear
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Blur: Gaussian
|
/// Blur: Gaussian
|
||||||
// ToDo: Switch to array Kernel instead of Texture kernel.
|
|
||||||
float4 PSGaussianBlur(VertDataOut vtx) : TARGET {
|
float4 PSGaussianBlur(VertDataOut vtx) : TARGET {
|
||||||
float2 uvOffset = float2(0, 0);
|
float2 uvOffset = float2(0, 0);
|
||||||
float4 final = u_image.SampleLevel(pointSampler, vtx.uv, 0)
|
float4 final = u_image.SampleLevel(pointSampler, vtx.uv, 0)
|
||||||
* kernel.SampleLevel(pointSampler, (float2(0, u_radius - 1) * kernelTexel), 0).r;
|
* GetKernelAt(0);
|
||||||
for (int k = 1; k <= u_radius; k++) {
|
for (int k = 1; k <= u_radius; k++) {
|
||||||
uvOffset += u_texelDelta;
|
uvOffset += u_texelDelta;
|
||||||
float l_g = kernel.SampleLevel(pointSampler, (float2(k, u_radius - 1) * kernelTexel), 0).r;
|
float l_g = GetKernelAt(k);
|
||||||
float4 l_p = u_image.SampleLevel(pointSampler, vtx.uv + uvOffset, 0);
|
float4 l_p = u_image.SampleLevel(pointSampler, vtx.uv + uvOffset, 0);
|
||||||
float4 l_n = u_image.SampleLevel(pointSampler, vtx.uv - uvOffset, 0);
|
float4 l_n = u_image.SampleLevel(pointSampler, vtx.uv - uvOffset, 0);
|
||||||
final += (l_p + l_n) * l_g;
|
final += (l_p + l_n) * l_g;
|
||||||
|
@ -182,13 +184,13 @@ float4 PSGaussianLinearBlur(VertDataOut vtx) : TARGET {
|
||||||
// Total Samples: 3 (n+1)
|
// Total Samples: 3 (n+1)
|
||||||
|
|
||||||
float4 origin = u_image.SampleLevel(pointSampler, vtx.uv, 0);
|
float4 origin = u_image.SampleLevel(pointSampler, vtx.uv, 0);
|
||||||
float4 final = origin * kernel.SampleLevel(pointSampler, (float2(0, u_radius - 1) * kernelTexel), 0).r;
|
float4 final = origin * GetKernelAt(0);
|
||||||
float2 halfTexelDelta = u_texelDelta / 2.0;
|
float2 halfTexelDelta = u_texelDelta / 2.0;
|
||||||
|
|
||||||
for (int k = 1; k < u_radius; k+=2) {
|
for (int k = 1; k < u_radius; k+=2) {
|
||||||
float2 offset = k * u_texelDelta + halfTexelDelta;
|
float2 offset = k * u_texelDelta + halfTexelDelta;
|
||||||
float l_g0 = kernel.SampleLevel(pointSampler, (float2(k, u_radius - 1) * kernelTexel), 0).r;
|
float l_g0 = GetKernelAt(k);
|
||||||
float l_g1 = kernel.SampleLevel(pointSampler, (float2(k + 1, u_radius - 1) * kernelTexel), 0).r;
|
float l_g1 = GetKernelAt(k +1);
|
||||||
float4 l_p = u_image.SampleLevel(linearSampler, vtx.uv + offset, 0);
|
float4 l_p = u_image.SampleLevel(linearSampler, vtx.uv + offset, 0);
|
||||||
float4 l_n = u_image.SampleLevel(linearSampler, vtx.uv - offset, 0);
|
float4 l_n = u_image.SampleLevel(linearSampler, vtx.uv - offset, 0);
|
||||||
final += (l_p + l_n) * l_g0;
|
final += (l_p + l_n) * l_g0;
|
||||||
|
@ -199,7 +201,7 @@ float4 PSGaussianLinearBlur(VertDataOut vtx) : TARGET {
|
||||||
// Odd numbers require treatment of ends.
|
// Odd numbers require treatment of ends.
|
||||||
float4 left = u_image.SampleLevel(pointSampler, vtx.uv + u_texelDelta * u_radius, 0);
|
float4 left = u_image.SampleLevel(pointSampler, vtx.uv + u_texelDelta * u_radius, 0);
|
||||||
float4 right = u_image.SampleLevel(pointSampler, vtx.uv - u_texelDelta * u_radius, 0);
|
float4 right = u_image.SampleLevel(pointSampler, vtx.uv - u_texelDelta * u_radius, 0);
|
||||||
float krn = kernel.SampleLevel(pointSampler, (float2(u_radius, u_radius - 1) * kernelTexel), 0).r;
|
float krn = GetKernelAt(u_radius);
|
||||||
final += (left + right) * krn;
|
final += (left + right) * krn;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -120,18 +120,12 @@ bool filter::blur::blur_instance::apply_bilateral_param()
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool filter::blur::blur_instance::apply_gaussian_param()
|
bool filter::blur::blur_instance::apply_gaussian_param(uint8_t width)
|
||||||
{
|
{
|
||||||
std::shared_ptr<gs::texture> kernel = filter::blur::blur_factory::get()->get_kernel(filter::blur::type::Gaussian);
|
auto kernel = filter::blur::blur_factory::get()->get_gaussian_kernel(width);
|
||||||
|
|
||||||
if (blur_effect->has_parameter("kernel")) {
|
if (blur_effect->has_parameter("kernel")) {
|
||||||
blur_effect->get_parameter("kernel").set_texture(kernel);
|
blur_effect->get_parameter("kernel").set_float_array(&(kernel->front()), kernel->size());
|
||||||
}
|
|
||||||
|
|
||||||
if (blur_effect->has_parameter("kernelTexel")) {
|
|
||||||
float_t wb = 1.0f / kernel->get_width();
|
|
||||||
float_t hb = 1.0f / kernel->get_height();
|
|
||||||
blur_effect->get_parameter("kernelTexel").set_float2(wb, hb);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
|
@ -604,7 +598,7 @@ void filter::blur::blur_instance::video_render(gs_effect_t* effect)
|
||||||
}
|
}
|
||||||
#pragma endregion RGB->YUV
|
#pragma endregion RGB->YUV
|
||||||
|
|
||||||
#pragma region blur
|
#pragma region Blur
|
||||||
// Set up camera stuff
|
// Set up camera stuff
|
||||||
gs_set_cull_mode(GS_NEITHER);
|
gs_set_cull_mode(GS_NEITHER);
|
||||||
gs_reset_blend_state();
|
gs_reset_blend_state();
|
||||||
|
@ -630,7 +624,7 @@ void filter::blur::blur_instance::video_render(gs_effect_t* effect)
|
||||||
|
|
||||||
if (!apply_shared_param(intermediate, xpel, ypel))
|
if (!apply_shared_param(intermediate, xpel, ypel))
|
||||||
break;
|
break;
|
||||||
apply_gaussian_param();
|
apply_gaussian_param(this->size);
|
||||||
apply_bilateral_param();
|
apply_bilateral_param();
|
||||||
|
|
||||||
gs_texrender_reset(rt);
|
gs_texrender_reset(rt);
|
||||||
|
@ -853,11 +847,13 @@ void filter::blur::blur_factory::generate_gaussian_kernels()
|
||||||
// 2D texture, horizontal is value, vertical is kernel size.
|
// 2D texture, horizontal is value, vertical is kernel size.
|
||||||
size_t size_power_of_two = size_t(pow(2, util::math::get_power_of_two_exponent_ceil(max_kernel_size)));
|
size_t size_power_of_two = size_t(pow(2, util::math::get_power_of_two_exponent_ceil(max_kernel_size)));
|
||||||
|
|
||||||
std::vector<float_t> texture_Data(size_power_of_two * size_power_of_two);
|
std::vector<float_t> texture_data(size_power_of_two * size_power_of_two);
|
||||||
std::vector<float_t> math_data(size_power_of_two);
|
std::vector<float_t> math_data(size_power_of_two);
|
||||||
|
std::shared_ptr<std::vector<float_t>> kernel_data;
|
||||||
|
|
||||||
for (size_t width = 1; width <= max_kernel_size; width++) {
|
for (size_t width = 1; width <= max_kernel_size; width++) {
|
||||||
size_t v = (width - 1) * size_power_of_two;
|
size_t v = (width - 1) * size_power_of_two;
|
||||||
|
kernel_data = std::make_shared<std::vector<float_t>>(size_power_of_two);
|
||||||
|
|
||||||
// Calculate and normalize
|
// Calculate and normalize
|
||||||
float_t sum = 0;
|
float_t sum = 0;
|
||||||
|
@ -869,13 +865,16 @@ void filter::blur::blur_factory::generate_gaussian_kernels()
|
||||||
// Normalize to Texture Buffer
|
// Normalize to Texture Buffer
|
||||||
double_t inverse_sum = 1.0 / sum;
|
double_t inverse_sum = 1.0 / sum;
|
||||||
for (size_t p = 0; p <= width; p++) {
|
for (size_t p = 0; p <= width; p++) {
|
||||||
texture_Data[v + p] = float_t(math_data[p] * inverse_sum);
|
texture_data[v + p] = float_t(math_data[p] * inverse_sum);
|
||||||
|
kernel_data->at(p) = texture_data[v + p];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
gaussian_kernels.insert({uint8_t(width), kernel_data});
|
||||||
}
|
}
|
||||||
|
|
||||||
// Create Texture
|
// Create Texture
|
||||||
try {
|
try {
|
||||||
auto texture_buffer = reinterpret_cast<uint8_t*>(texture_Data.data());
|
auto texture_buffer = reinterpret_cast<uint8_t*>(texture_data.data());
|
||||||
auto unsafe_buffer = const_cast<const uint8_t**>(&texture_buffer);
|
auto unsafe_buffer = const_cast<const uint8_t**>(&texture_buffer);
|
||||||
|
|
||||||
kernels.insert_or_assign(filter::blur::type::Gaussian,
|
kernels.insert_or_assign(filter::blur::type::Gaussian,
|
||||||
|
@ -1046,6 +1045,11 @@ std::shared_ptr<gs::texture> filter::blur::blur_factory::get_kernel(filter::blur
|
||||||
return kernels.at(type);
|
return kernels.at(type);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::shared_ptr<std::vector<float_t>> filter::blur::blur_factory::get_gaussian_kernel(uint8_t size)
|
||||||
|
{
|
||||||
|
return gaussian_kernels.at(size);
|
||||||
|
}
|
||||||
|
|
||||||
obs_scene_t* filter::blur::blur_factory::get_scene(std::string name)
|
obs_scene_t* filter::blur::blur_factory::get_scene(std::string name)
|
||||||
{
|
{
|
||||||
auto kv = scenes.find(name);
|
auto kv = scenes.find(name);
|
||||||
|
|
|
@ -111,7 +111,7 @@ namespace filter {
|
||||||
|
|
||||||
bool apply_shared_param(gs_texture_t* input, float texelX, float texelY);
|
bool apply_shared_param(gs_texture_t* input, float texelX, float texelY);
|
||||||
bool apply_bilateral_param();
|
bool apply_bilateral_param();
|
||||||
bool apply_gaussian_param();
|
bool apply_gaussian_param(uint8_t width);
|
||||||
bool apply_mask_parameters(std::shared_ptr<gs::effect> effect, gs_texture_t* original_texture,
|
bool apply_mask_parameters(std::shared_ptr<gs::effect> effect, gs_texture_t* original_texture,
|
||||||
gs_texture_t* blurred_texture);
|
gs_texture_t* blurred_texture);
|
||||||
|
|
||||||
|
@ -143,6 +143,7 @@ namespace filter {
|
||||||
|
|
||||||
std::shared_ptr<gs::effect> blur_effect;
|
std::shared_ptr<gs::effect> blur_effect;
|
||||||
std::map<filter::blur::type, std::shared_ptr<gs::texture>> kernels;
|
std::map<filter::blur::type, std::shared_ptr<gs::texture>> kernels;
|
||||||
|
std::map<uint8_t, std::shared_ptr<std::vector<float_t>>> gaussian_kernels;
|
||||||
|
|
||||||
std::map<std::string, obs_scene_t*> scenes;
|
std::map<std::string, obs_scene_t*> scenes;
|
||||||
|
|
||||||
|
@ -188,6 +189,8 @@ namespace filter {
|
||||||
|
|
||||||
std::shared_ptr<gs::texture> get_kernel(filter::blur::type type);
|
std::shared_ptr<gs::texture> get_kernel(filter::blur::type type);
|
||||||
|
|
||||||
|
std::shared_ptr<std::vector<float_t>> get_gaussian_kernel(uint8_t size);
|
||||||
|
|
||||||
obs_scene_t* get_scene(std::string name);
|
obs_scene_t* get_scene(std::string name);
|
||||||
|
|
||||||
void enum_scenes(std::function<bool(obs_scene_t*)> fnc);
|
void enum_scenes(std::function<bool(obs_scene_t*)> fnc);
|
||||||
|
|
Loading…
Reference in a new issue