obs-StreamFX/data/effects/blur.effect
Michael Fabian 'Xaymar' Dirks a6f9451654 filter-blur: Switch to Kernel Array instead of Kernel Texture
This speeds up Gaussian Blur and Linear Gaussian Blur drastically reduces time spent reading textures and instead uses existing registers - maximizing time spent reading the actual image texture.

See Also: #21 Blur Quality
2018-12-23 02:00:30 +01:00

269 lines
6.7 KiB
Text

// OBS Default
uniform float4x4 ViewProj;
// Settings (Shared)
uniform texture2d u_image;
uniform float2 u_imageSize;
uniform float2 u_imageTexel;
uniform int u_radius;
uniform int u_diameter;
uniform float2 u_texelDelta;
// Kernel Settings
uniform float4 kernel[8]; // max kernel radius 31+center.
// Bilateral Settings
uniform float bilateralSmoothing;
uniform float bilateralSharpness;
// Data
sampler_state pointSampler {
Filter = Point;
AddressU = Clamp;
AddressV = Clamp;
MinLOD = 0;
MaxLOD = 0;
};
sampler_state linearSampler {
Filter = Linear;
AddressU = Clamp;
AddressV = Clamp;
MinLOD = 0;
MaxLOD = 0;
};
struct VertDataIn {
float4 pos : POSITION;
float2 uv : TEXCOORD0;
};
struct VertDataOut {
float4 pos : POSITION;
float2 uv : TEXCOORD0;
};
VertDataOut VSDefault(VertDataIn vtx)
{
VertDataOut vert_out;
vert_out.pos = mul(float4(vtx.pos.xyz, 1.0), ViewProj);
vert_out.uv = vtx.uv;
return vert_out;
}
/// Utility
float GetKernelAt(int i) {
return ((float[4])(kernel[floor(i/4)]))[i%4];
}
/// Blur: Box
float4 PSBoxBlur(VertDataOut vtx) : TARGET {
float4 origin = u_image.SampleLevel(pointSampler, vtx.uv, 0);
float4 final = origin;
for (int k = 1; k <= u_radius; k++) {
final += u_image.SampleLevel(pointSampler, vtx.uv + (u_texelDelta * k), 0);
final += u_image.SampleLevel(pointSampler, vtx.uv - (u_texelDelta * k), 0);
}
final /= u_diameter;
return final;
}
technique Box
{
pass
{
vertex_shader = VSDefault(vtx);
pixel_shader = PSBoxBlur(vtx);
}
}
/// Blur: Box (Linear Optimized)
// By abusing Linear sampling we can reduce the necessary samples, halving the total samples.
float4 PSBoxBlurLinear(VertDataOut vtx) : TARGET {
// Radius 4 (Even):
// [-4, -3, -2, -1, 0, +1, +2, +3, +4]
// ^-S-^ ^-S-^ S ^-S-^ ^-S-^
// Total Samples: 5 (n+1)
// Radius 3 (Odd):
// [-3, -2, -1, 0, +1, +2, +3]
// ^-S-^ ^-S-^ S ^-S-^
// Total Samples: 4 (n)
// Radius 2 (Even):
// [-2, -1, 0, +1, +2]
// ^-S-^ S ^-S-^
float4 final = float4(0, 0, 0, 0);
float2 halfTexelDelta = u_texelDelta / 2.0;
if (u_radius % 2 == 0) {
// Even Numbers require the origin sample in the middle.
float4 origin = u_image.SampleLevel(pointSampler, vtx.uv, 0);
final = origin;
for (int k = 1; k <= u_radius; k+=2) {
float2 offset = k * u_texelDelta + halfTexelDelta;
final += u_image.SampleLevel(linearSampler, vtx.uv + offset, 0) * 2;
final += u_image.SampleLevel(linearSampler, vtx.uv - offset, 0) * 2;
}
} else {
// Odd Numbers put the origin sample in another location.
float4 origin = u_image.SampleLevel(pointSampler, vtx.uv + u_texelDelta, 0);
float4 group = u_image.SampleLevel(linearSampler, vtx.uv - halfTexelDelta, 0);
final = origin + group * 2;
for (int k = 2; k <= u_radius; k+=2) {
float2 offset = k * u_texelDelta + halfTexelDelta;
final += u_image.SampleLevel(linearSampler, vtx.uv + offset, 0) * 2;
final += u_image.SampleLevel(linearSampler, vtx.uv - offset, 0) * 2;
}
}
final /= u_diameter;
return final;
}
technique BoxLinear
{
pass
{
vertex_shader = VSDefault(vtx);
pixel_shader = PSBoxBlurLinear(vtx);
}
}
/// Blur: Gaussian
float4 PSGaussianBlur(VertDataOut vtx) : TARGET {
float2 uvOffset = float2(0, 0);
float4 final = u_image.SampleLevel(pointSampler, vtx.uv, 0)
* GetKernelAt(0);
for (int k = 1; k <= u_radius; k++) {
uvOffset += u_texelDelta;
float l_g = GetKernelAt(k);
float4 l_p = u_image.SampleLevel(pointSampler, vtx.uv + uvOffset, 0);
float4 l_n = u_image.SampleLevel(pointSampler, vtx.uv - uvOffset, 0);
final += (l_p + l_n) * l_g;
}
return final;
}
technique Gaussian
{
pass
{
vertex_shader = VSDefault(vtx);
pixel_shader = PSGaussianBlur(vtx);
}
}
/// Blur: Gaussian Linear
float4 PSGaussianLinearBlur(VertDataOut vtx) : TARGET {
// Origin sample must always be sampled.
// Even, Odd must decide differently than Box Blur
// Radius 5 (Odd):
// [-5, -4, -3, -2, -1, 0, +1, +2, +3, +4, +5]
// S ^-S-^ ^-S-^ S ^-S-^ ^-S-^ S
// S ^-S-^ ^-S-^ S ^-S-^ ^-S-^ S
// Total Samples: 7 (n+2)
// Radius 4 (Even):
// [-4, -3, -2, -1, 0, +1, +2, +3, +4]
// ^-S-^ ^-S-^ S ^-S-^ ^-S-^
// Total Samples: 5 (n+1)
// Radius 3 (Odd):
// [-3, -2, -1, 0, +1, +2, +3]
// S ^-S-^ S ^-S-^ S
// Total Samples: 5 (n+2)
// Radius 2 (Even):
// [-2, -1, 0, +1, +2]
// ^-S-^ S ^-S-^
// Total Samples: 3 (n+1)
float4 origin = u_image.SampleLevel(pointSampler, vtx.uv, 0);
float4 final = origin * GetKernelAt(0);
float2 halfTexelDelta = u_texelDelta / 2.0;
for (int k = 1; k < u_radius; k+=2) {
float2 offset = k * u_texelDelta + halfTexelDelta;
float l_g0 = GetKernelAt(k);
float l_g1 = GetKernelAt(k +1);
float4 l_p = u_image.SampleLevel(linearSampler, vtx.uv + offset, 0);
float4 l_n = u_image.SampleLevel(linearSampler, vtx.uv - offset, 0);
final += (l_p + l_n) * l_g0;
final += (l_p + l_n) * l_g1;
}
if (u_radius % 2 == 1) {
// Odd numbers require treatment of ends.
float4 left = u_image.SampleLevel(pointSampler, vtx.uv + u_texelDelta * u_radius, 0);
float4 right = u_image.SampleLevel(pointSampler, vtx.uv - u_texelDelta * u_radius, 0);
float krn = GetKernelAt(u_radius);
final += (left + right) * krn;
}
return final;
}
technique GaussianLinear
{
pass
{
vertex_shader = VSDefault(vtx);
pixel_shader = PSGaussianLinearBlur(vtx);
}
}
/// Blur: Bilateral
float Bilateral(float x, float sigma) {
return 0.39894 * exp(-0.5 * (x*x) / (sigma*sigma)) / sigma;
}
float Bilateral3(float3 v, float sigma) {
// First part is Bilateral function (1.0 / (o * sqrt(2.0 * pivalue))) with o = 1
return 0.39894 * exp(-0.5 * dot(v,v) / (sigma*sigma)) / sigma;
}
float4 PSBilateralBlur(VertDataOut vtx) : TARGET {
float4 origin = u_image.SampleLevel(pointSampler, vtx.uv, 0);
float2 uvOffset = float2(0, 0);
float Z = 0.0;
float bZ = 1.0 / Bilateral(0.0, bilateralSharpness);
float3 color = float3(0, 0, 0);
for (int k = 1; k <= u_radius; k++) {
uvOffset += u_texelDelta;
// Bilateral Kernel
float bKernel = Bilateral(abs(k), bilateralSmoothing);
bKernel *= bKernel;
float bZKernel = bZ * bKernel;
// Sample Color
float3 l_p = u_image.SampleLevel(pointSampler, vtx.uv + uvOffset, 0).rgb;
float3 l_n = u_image.SampleLevel(pointSampler, vtx.uv - uvOffset, 0).rgb;
// Bilateral Stuff
float l_factor_p = Bilateral3(l_p - origin.rgb, bilateralSharpness) * bZKernel;
float l_factor_n = Bilateral3(l_n - origin.rgb, bilateralSharpness) * bZKernel;
Z = Z + l_factor_p + l_factor_n;
// Store Color
color += l_p * l_factor_p;
color += l_n * l_factor_n;
}
return float4(color.rgb / Z, origin.a);
}
technique Bilateral
{
pass
{
vertex_shader = VSDefault(vtx);
pixel_shader = PSBilateralBlur(vtx);
}
}