// Parameters: /// OBS Default uniform float4x4 ViewProj; /// Texture uniform texture2d pImage; uniform float2 pImageTexel; /// Blur uniform float pSize; uniform float pAngle; uniform float2 pCenter; uniform float2 pStepScale; /// Gaussian uniform float4 pKernel[32]; #define MAX_BLUR_SIZE 128 // # Linear Optimization // While the normal way is to sample every texel in the pSize, linear optimization // takes advantage of the fact that most people, especially after compression, // will not be able to tell the difference between a linear approximation and // the actual thing. // // Instead of sampling every texel like this: // // |Tx|Tx|Tx|Tx|Tx| // Tx|-2|-1| 0|+1|+2| // // Linear optimization will sample like this: // // |Tx|Tx|Tx|Tx|Tx| // Tx| -1 | 0| +1 | // // This effectively removes half the necessary samples and looks identical when // when used with box blur. However there is an edge case when the blur width // is not a multiple of two, where two additional samples have to be spent on // reading the outer edge: // // |Tx|Tx|Tx|Tx|Tx|Tx|Tx| // Tx|-2| -1 | 0| +1 |+2| // // or this alternative pattern that uses two less samples: // // |Tx|Tx|Tx|Tx|Tx|Tx|Tx| // Tx| 0 | +1 | +2 |+3| // // or this alternative pattern that also uses two less samples: // // |Tx|Tx|Tx|Tx|Tx|Tx|Tx| // Tx| -2 | -1~~+1 | +2 | // // With careful planning this can even be used for other types of Blur, such as // Gaussian Blur, which suffers a larger hit - however there are better and // faster alternatives than linear sampling with Gaussian Blur, such as // Dual Filtering ("Dual Kawase"). // Sampler sampler_state linearSampler { Filter = Linear; AddressU = Clamp; AddressV = Clamp; MinLOD = 0; MaxLOD = 0; }; // Default Vertex Shader and Data struct VertDataIn { float4 pos : POSITION; float2 uv : TEXCOORD0; }; struct VertDataOut { float4 pos : POSITION; float2 uv : TEXCOORD0; }; VertDataOut VSDefault(VertDataIn vtx) { VertDataOut vert_out; vert_out.pos = mul(float4(vtx.pos.xyz, 1.0), ViewProj); vert_out.uv = vtx.uv; return vert_out; } // Functions float GetKernelAt(int i) { return ((float[4])(pKernel[floor(i/4)]))[i%4]; } // Blur 1 Dimensional float4 PSBlur1D(VertDataOut vtx) : TARGET { float4 final = pImage.Sample(linearSampler, vtx.uv) * GetKernelAt(0); bool is_odd = ((int(round(pSize)) % 2) == 1); // y = yes, s = skip, b = break // Size-> | 1| 2| 3| 4| 5| 6| 7| // -------+--+--+--+--+--+--+--+ // n=1 | b| y| y| y| y| y| y| // n=2 | |bs| s| s| s| s| s| // n=3 | | b| b| y| y| y| y| // n=4 | | | |bs| s| s| s| // n=5 | | | | b| b| y| y| // n=6 | | | | | |bs| s| // n=7 | | | | | | b| b| // n=8 | | | | | | | | // Loop unrolling is only possible with a fixed known maximum. // Some compilers may unroll up to x iterations, but most will not. for (int n = 1; n <= MAX_BLUR_SIZE; n+=2) { // Different from normal box, early exit instead of late exit. if (n >= pSize) { break; } // TODO: Determine better position than 0.5 for gaussian approximation. float2 nstep = (pImageTexel * pStepScale) * (n + 0.5); float kernel = GetKernelAt(n) + GetKernelAt(n + 1); final += pImage.Sample(linearSampler, vtx.uv + nstep) * kernel; final += pImage.Sample(linearSampler, vtx.uv - nstep) * kernel; } if (is_odd) { float kernel = GetKernelAt(pSize); float2 nstep = (pImageTexel * pStepScale) * pSize; final += pImage.Sample(linearSampler, vtx.uv + nstep) * kernel; final += pImage.Sample(linearSampler, vtx.uv - nstep) * kernel; } return final; } technique Draw { pass { vertex_shader = VSDefault(vtx); pixel_shader = PSBlur1D(vtx); } }