obs-StreamFX/data/effects/blur.effect

// OBS Default
uniform float4x4 ViewProj;

// Settings (Shared)
uniform texture2d u_image;
uniform float2 u_imageSize;
uniform float2 u_imageTexel;
uniform int u_radius;
uniform int u_diameter;
uniform float2 u_texelDelta;

// Kernel Settings
//uniform float registerkernel[25];
uniform texture2d kernel;
uniform float2 kernelTexel;

// Bilateral Settings
uniform float bilateralSmoothing;
uniform float bilateralSharpness;

// Data
sampler_state pointSampler {
	Filter    = Point;
	AddressU  = Clamp;
	AddressV  = Clamp;
	MinLOD    = 0;
	MaxLOD    = 0;
};

sampler_state linearSampler {
	Filter    = Linear;
	AddressU  = Clamp;
	AddressV  = Clamp;
	MinLOD    = 0;
	MaxLOD    = 0;
};

struct VertDataIn {
	float4 pos : POSITION;
	float2 uv  : TEXCOORD0;
};

struct VertDataOut {
	float4 pos : POSITION;
	float2 uv  : TEXCOORD0;
};

VertDataOut VSDefault(VertDataIn vtx)
{
	VertDataOut vert_out;
	vert_out.pos = mul(float4(vtx.pos.xyz, 1.0), ViewProj);
	vert_out.uv  = vtx.uv;
	return vert_out;
}

/// Blur: Box
float4 PSBoxBlur(VertDataOut vtx) : TARGET {
	float4 origin = u_image.SampleLevel(pointSampler, vtx.uv, 0);

	float4 final = origin;
	for (int k = 1; k <= u_radius; k++) {
		final += u_image.SampleLevel(pointSampler, vtx.uv + (u_texelDelta * k), 0);
		final += u_image.SampleLevel(pointSampler, vtx.uv - (u_texelDelta * k), 0);
	}
	final /= u_diameter;

	return final;
}

technique Box
{
	pass
	{
		vertex_shader = VSDefault(vtx);
		pixel_shader  = PSBoxBlur(vtx);
	}
}

/// Blur: Box (Linear Optimized)
// By abusing Linear sampling we can reduce the necessary samples, halving the total samples.
float4 PSBoxBlurLinear(VertDataOut vtx) : TARGET {
	// Radius 4 (Even):
	// [-4, -3, -2, -1,  0, +1, +2, +3, +4]
	//   ^-S-^   ^-S-^   S   ^-S-^   ^-S-^
	// Total Samples: 5 (n+1)

	// Radius 3 (Odd):
	// [-3, -2, -1,  0, +1, +2, +3]
	//   ^-S-^   ^-S-^   S   ^-S-^
	// Total Samples: 4 (n)

	// Radius 2 (Even):
	// [-2, -1,  0, +1, +2]
	//   ^-S-^   S   ^-S-^

	float4 final = float4(0, 0, 0, 0);
	float2 halfTexelDelta = u_texelDelta / 2.0;
	if (u_radius % 2 == 0) {
		// Even Numbers require the origin sample in the middle.
		float4 origin = u_image.SampleLevel(pointSampler, vtx.uv, 0);
		final = origin;
		for (int k = 1; k <= u_radius; k+=2) {
			float2 offset = k * u_texelDelta + halfTexelDelta;
			final += u_image.SampleLevel(linearSampler, vtx.uv + offset, 0) * 2;
			final += u_image.SampleLevel(linearSampler, vtx.uv - offset, 0) * 2;
		}
	} else {
		// Odd Numbers put the origin sample in another location.
		float4 origin = u_image.SampleLevel(pointSampler, vtx.uv + u_texelDelta, 0);
		float4 group = u_image.SampleLevel(linearSampler, vtx.uv - halfTexelDelta, 0);
		final = origin + group * 2;

		for (int k = 2; k <= u_radius; k+=2) {
			float2 offset = k * u_texelDelta + halfTexelDelta;
			final += u_image.SampleLevel(linearSampler, vtx.uv + offset, 0) * 2;
			final += u_image.SampleLevel(linearSampler, vtx.uv - offset, 0) * 2;
		}
	}
	final /= u_diameter;

	return final;
}

technique BoxLinear
{
	pass
	{
		vertex_shader = VSDefault(vtx);
		pixel_shader  = PSBoxBlurLinear(vtx);
	}
}

/// Blur: Gaussian
// ToDo: Switch to array Kernel instead of Texture kernel.
float4 PSGaussianBlur(VertDataOut vtx) : TARGET {
	float2 uvOffset = float2(0, 0);
	float4 final = u_image.SampleLevel(pointSampler, vtx.uv, 0)
		* kernel.SampleLevel(pointSampler, (float2(0, u_radius - 1) * kernelTexel), 0).r;
	for (int k = 1; k <= u_radius; k++) {
		uvOffset += u_texelDelta;
		float l_g = kernel.SampleLevel(pointSampler, (float2(k, u_radius - 1) * kernelTexel), 0).r;
		float4 l_p = u_image.SampleLevel(pointSampler, vtx.uv + uvOffset, 0);
		float4 l_n = u_image.SampleLevel(pointSampler, vtx.uv - uvOffset, 0);
		final += (l_p + l_n) * l_g;
	}
	return final;
}

technique Gaussian
{
	pass
	{
		vertex_shader = VSDefault(vtx);
		pixel_shader  = PSGaussianBlur(vtx);
	}
}

/// Blur: Gaussian Linear
float4 PSGaussianLinearBlur(VertDataOut vtx) : TARGET {
	// Origin sample must always be sampled.
	// Even, Odd must decide differently than Box Blur

	// Radius 5 (Odd):
	// [-5, -4, -3, -2, -1,  0, +1, +2, +3, +4, +5]
	//   S   ^-S-^   ^-S-^   S   ^-S-^   ^-S-^   S
	//   S   ^-S-^   ^-S-^   S   ^-S-^   ^-S-^   S
	// Total Samples: 7 (n+2)

	// Radius 4 (Even):
	// [-4, -3, -2, -1,  0, +1, +2, +3, +4]
	//   ^-S-^   ^-S-^   S   ^-S-^   ^-S-^
	// Total Samples: 5 (n+1)

	// Radius 3 (Odd):
	// [-3, -2, -1,  0, +1, +2, +3]
	//   S   ^-S-^   S   ^-S-^   S
	// Total Samples: 5 (n+2)

	// Radius 2 (Even):
	// [-2, -1,  0, +1, +2]
	//   ^-S-^   S   ^-S-^
	// Total Samples: 3 (n+1)

	float4 origin = u_image.SampleLevel(pointSampler, vtx.uv, 0);
	float4 final = origin * kernel.SampleLevel(pointSampler, (float2(0, u_radius - 1) * kernelTexel), 0).r;
	float2 halfTexelDelta = u_texelDelta / 2.0;

	for (int k = 1; k < u_radius; k+=2) {
		float2 offset = k * u_texelDelta + halfTexelDelta;
		float l_g0 = kernel.SampleLevel(pointSampler, (float2(k, u_radius - 1) * kernelTexel), 0).r;
		float l_g1 = kernel.SampleLevel(pointSampler, (float2(k + 1, u_radius - 1) * kernelTexel), 0).r;
		float4 l_p = u_image.SampleLevel(linearSampler, vtx.uv + offset, 0);
		float4 l_n = u_image.SampleLevel(linearSampler, vtx.uv - offset, 0);
		final += (l_p + l_n) * l_g0;
		final += (l_p + l_n) * l_g1;
	}

	if (u_radius % 2 == 1) {
		// Odd numbers require treatment of ends.
		float4 left = u_image.SampleLevel(pointSampler, vtx.uv + u_texelDelta * u_radius, 0);
		float4 right = u_image.SampleLevel(pointSampler, vtx.uv - u_texelDelta * u_radius, 0);
		float krn = kernel.SampleLevel(pointSampler, (float2(u_radius, u_radius - 1) * kernelTexel), 0).r;
		final += (left + right) * krn;
	}

	return final;
}

technique GaussianLinear
{
	pass
	{
		vertex_shader = VSDefault(vtx);
		pixel_shader  = PSGaussianLinearBlur(vtx);
	}
}

/// Blur: Bilateral
float Bilateral(float x, float sigma) {
	return 0.39894 * exp(-0.5 * (x*x) / (sigma*sigma)) / sigma;
}

float Bilateral3(float3 v, float sigma) {
	// First part is Bilateral function (1.0 / (o * sqrt(2.0 * pivalue))) with o = 1
	return 0.39894 * exp(-0.5 * dot(v,v) / (sigma*sigma)) / sigma;
}

float4 PSBilateralBlur(VertDataOut vtx) : TARGET {
	float4 origin = u_image.SampleLevel(pointSampler, vtx.uv, 0);
	float2 uvOffset = float2(0, 0);

	float Z = 0.0;
	float bZ = 1.0 / Bilateral(0.0, bilateralSharpness);
	float3 color = float3(0, 0, 0);
	for (int k = 1; k <= u_radius; k++) {
		uvOffset += u_texelDelta;

		// Bilateral Kernel
		float bKernel = Bilateral(abs(k), bilateralSmoothing);
		bKernel *= bKernel;
		float bZKernel = bZ  * bKernel;

		// Sample Color
		float3 l_p = u_image.SampleLevel(pointSampler, vtx.uv + uvOffset, 0).rgb;
		float3 l_n = u_image.SampleLevel(pointSampler, vtx.uv - uvOffset, 0).rgb;

		// Bilateral Stuff
		float l_factor_p = Bilateral3(l_p - origin.rgb, bilateralSharpness) * bZKernel;
		float l_factor_n = Bilateral3(l_n - origin.rgb, bilateralSharpness) * bZKernel;
		Z = Z + l_factor_p + l_factor_n;

		// Store Color
		color += l_p * l_factor_p;
		color += l_n * l_factor_n;
	}

	return float4(color.rgb / Z, origin.a);
}

technique Bilateral
{
	pass
	{
		vertex_shader = VSDefault(vtx);
		pixel_shader  = PSBilateralBlur(vtx);
	}
}