mirror of
https://github.com/Xaymar/obs-StreamFX
synced 2024-11-10 22:05:06 +00:00
a6f9451654
This speeds up Gaussian Blur and Linear Gaussian Blur drastically reduces time spent reading textures and instead uses existing registers - maximizing time spent reading the actual image texture. See Also: #21 Blur Quality
269 lines
6.7 KiB
Text
269 lines
6.7 KiB
Text
// OBS Default
|
|
uniform float4x4 ViewProj;
|
|
|
|
// Settings (Shared)
|
|
uniform texture2d u_image;
|
|
uniform float2 u_imageSize;
|
|
uniform float2 u_imageTexel;
|
|
uniform int u_radius;
|
|
uniform int u_diameter;
|
|
uniform float2 u_texelDelta;
|
|
|
|
// Kernel Settings
|
|
uniform float4 kernel[8]; // max kernel radius 31+center.
|
|
|
|
// Bilateral Settings
|
|
uniform float bilateralSmoothing;
|
|
uniform float bilateralSharpness;
|
|
|
|
// Data
|
|
sampler_state pointSampler {
|
|
Filter = Point;
|
|
AddressU = Clamp;
|
|
AddressV = Clamp;
|
|
MinLOD = 0;
|
|
MaxLOD = 0;
|
|
};
|
|
|
|
sampler_state linearSampler {
|
|
Filter = Linear;
|
|
AddressU = Clamp;
|
|
AddressV = Clamp;
|
|
MinLOD = 0;
|
|
MaxLOD = 0;
|
|
};
|
|
|
|
struct VertDataIn {
|
|
float4 pos : POSITION;
|
|
float2 uv : TEXCOORD0;
|
|
};
|
|
|
|
struct VertDataOut {
|
|
float4 pos : POSITION;
|
|
float2 uv : TEXCOORD0;
|
|
};
|
|
|
|
VertDataOut VSDefault(VertDataIn vtx)
|
|
{
|
|
VertDataOut vert_out;
|
|
vert_out.pos = mul(float4(vtx.pos.xyz, 1.0), ViewProj);
|
|
vert_out.uv = vtx.uv;
|
|
return vert_out;
|
|
}
|
|
|
|
/// Utility
|
|
float GetKernelAt(int i) {
|
|
return ((float[4])(kernel[floor(i/4)]))[i%4];
|
|
}
|
|
|
|
/// Blur: Box
|
|
float4 PSBoxBlur(VertDataOut vtx) : TARGET {
|
|
float4 origin = u_image.SampleLevel(pointSampler, vtx.uv, 0);
|
|
|
|
float4 final = origin;
|
|
for (int k = 1; k <= u_radius; k++) {
|
|
final += u_image.SampleLevel(pointSampler, vtx.uv + (u_texelDelta * k), 0);
|
|
final += u_image.SampleLevel(pointSampler, vtx.uv - (u_texelDelta * k), 0);
|
|
}
|
|
final /= u_diameter;
|
|
|
|
return final;
|
|
}
|
|
|
|
technique Box
|
|
{
|
|
pass
|
|
{
|
|
vertex_shader = VSDefault(vtx);
|
|
pixel_shader = PSBoxBlur(vtx);
|
|
}
|
|
}
|
|
|
|
/// Blur: Box (Linear Optimized)
|
|
// By abusing Linear sampling we can reduce the necessary samples, halving the total samples.
|
|
float4 PSBoxBlurLinear(VertDataOut vtx) : TARGET {
|
|
// Radius 4 (Even):
|
|
// [-4, -3, -2, -1, 0, +1, +2, +3, +4]
|
|
// ^-S-^ ^-S-^ S ^-S-^ ^-S-^
|
|
// Total Samples: 5 (n+1)
|
|
|
|
// Radius 3 (Odd):
|
|
// [-3, -2, -1, 0, +1, +2, +3]
|
|
// ^-S-^ ^-S-^ S ^-S-^
|
|
// Total Samples: 4 (n)
|
|
|
|
// Radius 2 (Even):
|
|
// [-2, -1, 0, +1, +2]
|
|
// ^-S-^ S ^-S-^
|
|
|
|
float4 final = float4(0, 0, 0, 0);
|
|
float2 halfTexelDelta = u_texelDelta / 2.0;
|
|
if (u_radius % 2 == 0) {
|
|
// Even Numbers require the origin sample in the middle.
|
|
float4 origin = u_image.SampleLevel(pointSampler, vtx.uv, 0);
|
|
final = origin;
|
|
for (int k = 1; k <= u_radius; k+=2) {
|
|
float2 offset = k * u_texelDelta + halfTexelDelta;
|
|
final += u_image.SampleLevel(linearSampler, vtx.uv + offset, 0) * 2;
|
|
final += u_image.SampleLevel(linearSampler, vtx.uv - offset, 0) * 2;
|
|
}
|
|
} else {
|
|
// Odd Numbers put the origin sample in another location.
|
|
float4 origin = u_image.SampleLevel(pointSampler, vtx.uv + u_texelDelta, 0);
|
|
float4 group = u_image.SampleLevel(linearSampler, vtx.uv - halfTexelDelta, 0);
|
|
final = origin + group * 2;
|
|
|
|
for (int k = 2; k <= u_radius; k+=2) {
|
|
float2 offset = k * u_texelDelta + halfTexelDelta;
|
|
final += u_image.SampleLevel(linearSampler, vtx.uv + offset, 0) * 2;
|
|
final += u_image.SampleLevel(linearSampler, vtx.uv - offset, 0) * 2;
|
|
}
|
|
}
|
|
final /= u_diameter;
|
|
|
|
return final;
|
|
}
|
|
|
|
technique BoxLinear
|
|
{
|
|
pass
|
|
{
|
|
vertex_shader = VSDefault(vtx);
|
|
pixel_shader = PSBoxBlurLinear(vtx);
|
|
}
|
|
}
|
|
|
|
/// Blur: Gaussian
|
|
float4 PSGaussianBlur(VertDataOut vtx) : TARGET {
|
|
float2 uvOffset = float2(0, 0);
|
|
float4 final = u_image.SampleLevel(pointSampler, vtx.uv, 0)
|
|
* GetKernelAt(0);
|
|
for (int k = 1; k <= u_radius; k++) {
|
|
uvOffset += u_texelDelta;
|
|
float l_g = GetKernelAt(k);
|
|
float4 l_p = u_image.SampleLevel(pointSampler, vtx.uv + uvOffset, 0);
|
|
float4 l_n = u_image.SampleLevel(pointSampler, vtx.uv - uvOffset, 0);
|
|
final += (l_p + l_n) * l_g;
|
|
}
|
|
return final;
|
|
}
|
|
|
|
technique Gaussian
|
|
{
|
|
pass
|
|
{
|
|
vertex_shader = VSDefault(vtx);
|
|
pixel_shader = PSGaussianBlur(vtx);
|
|
}
|
|
}
|
|
|
|
/// Blur: Gaussian Linear
|
|
float4 PSGaussianLinearBlur(VertDataOut vtx) : TARGET {
|
|
// Origin sample must always be sampled.
|
|
// Even, Odd must decide differently than Box Blur
|
|
|
|
// Radius 5 (Odd):
|
|
// [-5, -4, -3, -2, -1, 0, +1, +2, +3, +4, +5]
|
|
// S ^-S-^ ^-S-^ S ^-S-^ ^-S-^ S
|
|
// S ^-S-^ ^-S-^ S ^-S-^ ^-S-^ S
|
|
// Total Samples: 7 (n+2)
|
|
|
|
// Radius 4 (Even):
|
|
// [-4, -3, -2, -1, 0, +1, +2, +3, +4]
|
|
// ^-S-^ ^-S-^ S ^-S-^ ^-S-^
|
|
// Total Samples: 5 (n+1)
|
|
|
|
// Radius 3 (Odd):
|
|
// [-3, -2, -1, 0, +1, +2, +3]
|
|
// S ^-S-^ S ^-S-^ S
|
|
// Total Samples: 5 (n+2)
|
|
|
|
// Radius 2 (Even):
|
|
// [-2, -1, 0, +1, +2]
|
|
// ^-S-^ S ^-S-^
|
|
// Total Samples: 3 (n+1)
|
|
|
|
float4 origin = u_image.SampleLevel(pointSampler, vtx.uv, 0);
|
|
float4 final = origin * GetKernelAt(0);
|
|
float2 halfTexelDelta = u_texelDelta / 2.0;
|
|
|
|
for (int k = 1; k < u_radius; k+=2) {
|
|
float2 offset = k * u_texelDelta + halfTexelDelta;
|
|
float l_g0 = GetKernelAt(k);
|
|
float l_g1 = GetKernelAt(k +1);
|
|
float4 l_p = u_image.SampleLevel(linearSampler, vtx.uv + offset, 0);
|
|
float4 l_n = u_image.SampleLevel(linearSampler, vtx.uv - offset, 0);
|
|
final += (l_p + l_n) * l_g0;
|
|
final += (l_p + l_n) * l_g1;
|
|
}
|
|
|
|
if (u_radius % 2 == 1) {
|
|
// Odd numbers require treatment of ends.
|
|
float4 left = u_image.SampleLevel(pointSampler, vtx.uv + u_texelDelta * u_radius, 0);
|
|
float4 right = u_image.SampleLevel(pointSampler, vtx.uv - u_texelDelta * u_radius, 0);
|
|
float krn = GetKernelAt(u_radius);
|
|
final += (left + right) * krn;
|
|
}
|
|
|
|
return final;
|
|
}
|
|
|
|
technique GaussianLinear
|
|
{
|
|
pass
|
|
{
|
|
vertex_shader = VSDefault(vtx);
|
|
pixel_shader = PSGaussianLinearBlur(vtx);
|
|
}
|
|
}
|
|
|
|
/// Blur: Bilateral
|
|
float Bilateral(float x, float sigma) {
|
|
return 0.39894 * exp(-0.5 * (x*x) / (sigma*sigma)) / sigma;
|
|
}
|
|
|
|
float Bilateral3(float3 v, float sigma) {
|
|
// First part is Bilateral function (1.0 / (o * sqrt(2.0 * pivalue))) with o = 1
|
|
return 0.39894 * exp(-0.5 * dot(v,v) / (sigma*sigma)) / sigma;
|
|
}
|
|
|
|
float4 PSBilateralBlur(VertDataOut vtx) : TARGET {
|
|
float4 origin = u_image.SampleLevel(pointSampler, vtx.uv, 0);
|
|
float2 uvOffset = float2(0, 0);
|
|
|
|
float Z = 0.0;
|
|
float bZ = 1.0 / Bilateral(0.0, bilateralSharpness);
|
|
float3 color = float3(0, 0, 0);
|
|
for (int k = 1; k <= u_radius; k++) {
|
|
uvOffset += u_texelDelta;
|
|
|
|
// Bilateral Kernel
|
|
float bKernel = Bilateral(abs(k), bilateralSmoothing);
|
|
bKernel *= bKernel;
|
|
float bZKernel = bZ * bKernel;
|
|
|
|
// Sample Color
|
|
float3 l_p = u_image.SampleLevel(pointSampler, vtx.uv + uvOffset, 0).rgb;
|
|
float3 l_n = u_image.SampleLevel(pointSampler, vtx.uv - uvOffset, 0).rgb;
|
|
|
|
// Bilateral Stuff
|
|
float l_factor_p = Bilateral3(l_p - origin.rgb, bilateralSharpness) * bZKernel;
|
|
float l_factor_n = Bilateral3(l_n - origin.rgb, bilateralSharpness) * bZKernel;
|
|
Z = Z + l_factor_p + l_factor_n;
|
|
|
|
// Store Color
|
|
color += l_p * l_factor_p;
|
|
color += l_n * l_factor_n;
|
|
}
|
|
|
|
return float4(color.rgb / Z, origin.a);
|
|
}
|
|
|
|
technique Bilateral
|
|
{
|
|
pass
|
|
{
|
|
vertex_shader = VSDefault(vtx);
|
|
pixel_shader = PSBilateralBlur(vtx);
|
|
}
|
|
}
|