filters/color-grade: Redesign for use with LUTs

There is hardly any reason for us to recalculate everything all the time. LUTs can cache the work once, and then re-use it every time necessary, drastically reducing the impact of Color Grading by almost 60% (on some GPUs even more). Additionally this fixes the negative gamma issue, which plagued the filter for a while.

In the future, once PR 4199 (https://github.com/obsproject/obs-studio/pull/4199) has been merged, we can cut away one intermediate rendering step currently required to make the effect work. Hopefully this will be with the 27.x release of OBS Studio.
This commit is contained in:
Michael Fabian 'Xaymar' Dirks 2021-02-02 22:54:25 +01:00 committed by Xaymar
parent f396f52054
commit e436d22367
5 changed files with 400 additions and 261 deletions

View file

@ -1110,6 +1110,7 @@ if(T_CHECK)
list(APPEND PROJECT_DEFINITIONS list(APPEND PROJECT_DEFINITIONS
ENABLE_FILTER_COLOR_GRADE ENABLE_FILTER_COLOR_GRADE
) )
set(REQUIRE_LUT ON)
endif() endif()
# Filter/Displacement # Filter/Displacement

View file

@ -1,18 +1,38 @@
// Parameters #include "shared.effect"
uniform float4x4 ViewProj;
//------------------------------------------------------------------------------
// Uniforms
//------------------------------------------------------------------------------
// Texture to which the effect is applied to.
uniform texture2d image; uniform texture2d image;
// Lift: (rgb + lift.rgb) + lift.a
uniform float4 pLift; uniform float4 pLift;
// Gamma: pow(pow(rgb, gamma.rgb), gamma.a)
uniform float4 pGamma; uniform float4 pGamma;
// Gain: mul(rgb, gain.rgb) * gain.a
uniform float4 pGain; uniform float4 pGain;
// Offset: (rgb + offset.rgb) + offset.a
uniform float4 pOffset; uniform float4 pOffset;
// Tinting
uniform int pTintDetection; // 0 = HSV, 1 = HSL, 2 = YUV HD SDR uniform int pTintDetection; // 0 = HSV, 1 = HSL, 2 = YUV HD SDR
uniform int pTintMode; // 0 = Linear, 1 = Exp, 2 = Exp2, 3 = Log, 4 = Log10 uniform int pTintMode; // 0 = Linear, 1 = Exp, 2 = Exp2, 3 = Log, 4 = Log10
uniform float pTintExponent; uniform float pTintExponent;
uniform float3 pTintLow; uniform float3 pTintLow;
uniform float3 pTintMid; uniform float3 pTintMid;
uniform float3 pTintHig; uniform float3 pTintHig;
// Color Correction
uniform float4 pCorrection; uniform float4 pCorrection;
//------------------------------------------------------------------------------
// Defines
//------------------------------------------------------------------------------
#define TINT_DETECTION_HSV 0 #define TINT_DETECTION_HSV 0
#define TINT_DETECTION_HSL 1 #define TINT_DETECTION_HSL 1
#define TINT_DETECTION_YUV_SDR 2 #define TINT_DETECTION_YUV_SDR 2
@ -26,146 +46,30 @@ uniform float4 pCorrection;
#define C_e 2,7182818284590452353602874713527 #define C_e 2,7182818284590452353602874713527
#define C_log2_e 1.4426950408889634073599246810019 // Windows calculator: log(e(1)) / log(2) #define C_log2_e 1.4426950408889634073599246810019 // Windows calculator: log(e(1)) / log(2)
// Data //------------------------------------------------------------------------------
sampler_state def_sampler { // Functionality
Filter = Point; //------------------------------------------------------------------------------
AddressU = Clamp;
AddressV = Clamp; float3 grade_lift(float3 v) { // same as grade_offset?
MinLOD = 0; return (v.rgb + pLift.rgb) + pLift.a;
MaxLOD = 0;
}; };
struct VertDataIn { float3 grade_gamma(float3 v) {
float4 pos : POSITION; float3 s = sign(v); // Store sign for later use.
float2 uv : TEXCOORD0; float3 u = abs(v); // Remove sign
u.rgb = pow(pow(u.rgb, pGamma.rgb), pGamma.a);
return u * s; // Restore sign.
}; };
struct VertDataOut { float3 grade_gain(float3 v) {
float4 pos : POSITION; return (v.rgb * pGain.rgb) * pGain.a;
float2 uv : TEXCOORD0;
}; };
VertDataOut VSDefault(VertDataIn v) float3 grade_offset(float3 v) {
{ return (v.rgb + pOffset.rgb) + pOffset.a;
VertDataOut ov; };
ov.pos = mul(float4(v.pos.xyz, 1.0), ViewProj);
ov.uv = v.uv;
return ov;
}
// Utility functions ----------------------------------------------------------- float3 grade_tint(float3 v) {
float4 RGBtoYUV(float4 rgba, float3x3 yuv) {
return float4(
rgba.r * yuv._m00 + rgba.g * yuv._m01 + rgba.b * yuv._m02,
rgba.r * yuv._m10 + rgba.g * yuv._m11 + rgba.b * yuv._m12,
rgba.r * yuv._m20 + rgba.g * yuv._m21 + rgba.b * yuv._m22,
rgba.a
) + float4(0,0.5,0.5,0);
}
float4 YUVtoRGB(float4 yuva, float3x3 yuvi) {
yuva.gb -= 0.5;
return float4(
yuva.r * yuvi._m00 + yuva.g * yuvi._m01 + yuva.b * yuvi._m02,
yuva.r * yuvi._m10 + yuva.g * yuvi._m11 + yuva.b * yuvi._m12,
yuva.r * yuvi._m20 + yuva.g * yuvi._m21 + yuva.b * yuvi._m22,
yuva.a);
}
float4 RGBtoHSV(float4 RGBA) {
const float4 K = float4(0.0, -1.0 / 3.0, 2.0 / 3.0, -1.0);
const float e = 1.0e-10;
float4 p = lerp(float4(RGBA.bg, K.wz), float4(RGBA.gb, K.xy), step(RGBA.b, RGBA.g));
float4 q = lerp(float4(p.xyw, RGBA.r), float4(RGBA.r, p.yzx), step(p.x, RGBA.r));
float d = q.x - min(q.w, q.y);
return float4(abs(q.z + (q.w - q.y) / (6.0 * d + e)), d / (q.x + e), q.x, RGBA.a);
}
float4 HSVtoRGB(float4 HSVA) {
const float4 K = float4(1.0, 2.0 / 3.0, 1.0 / 3.0, 3.0);
float4 v = float4(0,0,0,0);
v.rgb = HSVA.z * lerp(K.xxx, clamp(abs(frac(HSVA.xxx + K.xyz) * 6.0 - K.www) - K.xxx, 0.0, 1.0), HSVA.y);
v.a = HSVA.a;
return v;
}
float4 HSLtoRGB(float4 HSLA) {
float3 rgb = clamp(
abs(
fmod(
HSLA.x * 6.0 + float3(0.0, 4.0, 2.0),
6.0
) - 3.0
) - 1.0,
0.0,
1.0
);
return float4(HSLA.z + HSLA.y * (rgb - 0.5) * (1.0 - abs(2.0 * HSLA.z - 1.0)), HSLA.a);
}
float4 RGBtoHSL(float4 RGBA) {
float h = 0.0;
float s = 0.0;
float l = 0.0;
float r = RGBA.r;
float g = RGBA.g;
float b = RGBA.b;
float cMin = min( r, min( g, b ) );
float cMax = max( r, max( g, b ) );
l = ( cMax + cMin ) / 2.0;
if ( cMax > cMin ) {
float cDelta = cMax - cMin;
//s = l < .05 ? cDelta / ( cMax + cMin ) : cDelta / ( 2.0 - ( cMax + cMin ) ); Original
s = l < .0 ? cDelta / ( cMax + cMin ) : cDelta / ( 2.0 - ( cMax + cMin ) );
if ( r == cMax ) {
h = ( g - b ) / cDelta;
} else if ( g == cMax ) {
h = 2.0 + ( b - r ) / cDelta;
} else {
h = 4.0 + ( r - g ) / cDelta;
}
if ( h < 0.0) {
h += 6.0;
}
h = h / 6.0;
}
return float4( h, s, l, RGBA.a );
}
// Actual Code -----------------------------------------------------------------
float4 Lift(float4 v)
{
v.rgb = pLift.aaa + v.rgb;
v.rgb = pLift.rgb + v.rgb;
return v;
}
float4 Gamma(float4 v)
{
v.rgb = pow(pow(v.rgb, pGamma.rgb), pGamma.aaa);
return v;
}
float4 Gain(float4 v)
{
v.rgb *= pGain.rgb;
v.rgb *= pGain.a;
return v;
}
float4 Offset(float4 v)
{
v.rgb = pOffset.aaa + v.rgb;
v.rgb = pOffset.rgb + v.rgb;
return v;
}
float4 Tint(float4 v)
{
float value = 0.; float value = 0.;
if (pTintDetection == TINT_DETECTION_HSV) { // HSV if (pTintDetection == TINT_DETECTION_HSV) { // HSV
value = RGBtoHSV(v).z; value = RGBtoHSV(v).z;
@ -199,29 +103,38 @@ float4 Tint(float4 v)
} }
v.rgb *= tint; v.rgb *= tint;
return v; return v;
} };
float4 Correction(float4 v) float3 grade_colorcorrection(float3 v) {
{ float3 v1 = RGBtoHSV(v);
float4 v1 = RGBtoHSV(v); v1.r += pCorrection.r; // Hue Shift
v1.r += pCorrection.r; v1.g *= pCorrection.g; // Saturation Multiplier
v1.g *= pCorrection.g; v1.b *= pCorrection.b; // Lightness Multiplier
v1.b *= pCorrection.b; float3 v2 = HSVtoRGB(v1);
float4 v2 = HSVtoRGB(v1);
// Contrast
v2.rgb = ((v2.rgb - 0.5) * max(pCorrection.a, 0)) + 0.5; v2.rgb = ((v2.rgb - 0.5) * max(pCorrection.a, 0)) + 0.5;
return v2;
}
float4 PSColorGrade(VertDataOut v) : TARGET return v2;
{ };
return Correction(Tint(Offset(Gain(Gamma(Lift(image.Sample(def_sampler, v.uv)))))));
} float4 PSDraw(VertexData vtx) : TARGET {
float4 v1 = image.Sample(PointClampSampler, vtx.uv);
float3 v2 = grade_lift(v1.rgb);
float3 v3 = grade_gamma(v2);
float3 v4 = grade_gain(v3);
float3 v5 = grade_offset(v4);
float3 v6 = grade_tint(v5);
float3 v7 = grade_colorcorrection(v6);
float3 vf = v7;
return float4(vf, v1.a);
};
technique Draw technique Draw
{ {
pass pass
{ {
vertex_shader = VSDefault(v); vertex_shader = DefaultVertexShader(vtx);
pixel_shader = PSColorGrade(v); pixel_shader = PSDraw(vtx);
} };
} };

View file

@ -215,6 +215,14 @@ Filter.ColorGrade.Correction.Hue="Hue Shift"
Filter.ColorGrade.Correction.Saturation="Saturation" Filter.ColorGrade.Correction.Saturation="Saturation"
Filter.ColorGrade.Correction.Lightness="Lightness" Filter.ColorGrade.Correction.Lightness="Lightness"
Filter.ColorGrade.Correction.Contrast="Contrast" Filter.ColorGrade.Correction.Contrast="Contrast"
Filter.ColorGrade.RenderMode="Render Mode"
Filter.ColorGrade.RenderMode.Description="The color grading effect is an expensive operation on the GPU, so two rendering modes exist:\n- 'Direct Rendering' calculates the entire color grade for every single pixel.\n- '#-Bit Look-Up Table' calculates a LUT first, and then renders using said LUT instead, which\nis significantly faster but sacrifices some accuracy. A 2-Bit LUT will be super fast but it\nwill not be as accurate as a 8-Bit LUT would be."
Filter.ColorGrade.RenderMode.Direct="Direct Rendering"
Filter.ColorGrade.RenderMode.LUT.2Bit="2-Bit Look-Up Table"
Filter.ColorGrade.RenderMode.LUT.4Bit="4-Bit Look-Up Table"
Filter.ColorGrade.RenderMode.LUT.6Bit="6-Bit Look-Up Table"
Filter.ColorGrade.RenderMode.LUT.8Bit="8-Bit Look-Up Table"
Filter.ColorGrade.RenderMode.LUT.10Bit="10-Bit Look-Up Table"
# Filter - Displacement # Filter - Displacement
Filter.Displacement="Displacement Mapping" Filter.Displacement="Displacement Mapping"

View file

@ -54,6 +54,14 @@
#define ST_CORRECTION ST ".Correction" #define ST_CORRECTION ST ".Correction"
#define ST_CORRECTION_(x) ST_CORRECTION "." D_VSTR(x) #define ST_CORRECTION_(x) ST_CORRECTION "." D_VSTR(x)
#define ST_RENDERMODE ST ".RenderMode"
#define ST_RENDERMODE_DIRECT ST_RENDERMODE ".Direct"
#define ST_RENDERMODE_LUT_2BIT ST_RENDERMODE ".LUT.2Bit"
#define ST_RENDERMODE_LUT_4BIT ST_RENDERMODE ".LUT.4Bit"
#define ST_RENDERMODE_LUT_6BIT ST_RENDERMODE ".LUT.6Bit"
#define ST_RENDERMODE_LUT_8BIT ST_RENDERMODE ".LUT.8Bit"
#define ST_RENDERMODE_LUT_10BIT ST_RENDERMODE ".LUT.10Bit"
#define RED Red #define RED Red
#define GREEN Green #define GREEN Green
#define BLUE Blue #define BLUE Blue
@ -76,37 +84,64 @@
using namespace streamfx::filter::color_grade; using namespace streamfx::filter::color_grade;
// TODO: Figure out a way to merge _lut_rt, _lut_texture, _rt_source, _rt_grad, _tex_source, _tex_grade, _source_updated and _grade_updated.
// Seriously this is too much GPU space wasted on unused trash.
#define LOCAL_PREFIX "<filter::color-grade> "
color_grade_instance::~color_grade_instance() {} color_grade_instance::~color_grade_instance() {}
color_grade_instance::color_grade_instance(obs_data_t* data, obs_source_t* self) : obs::source_instance(data, self) color_grade_instance::color_grade_instance(obs_data_t* data, obs_source_t* self)
: obs::source_instance(data, self), _effect(),
_lift(), _gamma(), _gain(), _offset(), _tint_detection(), _tint_luma(), _tint_exponent(), _tint_low(),
_tint_mid(), _tint_hig(), _correction(), _lut_enabled(true), _lut_depth(),
_cache_rt(), _cache_texture(), _cache_fresh(false),
_lut_initialized(false), _lut_dirty(true), _lut_producer(), _lut_consumer()
{ {
{ // Load the color grading effect.
auto file = streamfx::data_file_path("effects/color-grade.effect").u8string(); auto path = streamfx::data_file_path("effects/color-grade.effect");
if (!std::filesystem::exists(path)) {
DLOG_ERROR(LOCAL_PREFIX "Failed to locate effect file '%s'.", path.u8string().c_str());
throw std::runtime_error("Failed to load color grade effect.");
} else {
try { try {
_effect = gs::effect::create(file); _effect = gs::effect::create(path.u8string());
} catch (std::runtime_error& ex) { } catch (std::exception const& ex) {
DLOG_ERROR("<filter-color-grade> Loading effect '%s' failed with error(s): %s", file.c_str(), ex.what()); DLOG_ERROR(LOCAL_PREFIX "Failed to load effect '%s': %s", path.u8string().c_str(), ex.what());
throw; throw;
} }
} }
{
_rt_source = std::make_unique<gs::rendertarget>(GS_RGBA, GS_ZS_NONE); // Initialize LUT work flow.
{ try {
auto op = _rt_source->render(1, 1); _lut_producer = std::make_shared<gfx::lut::producer>();
_lut_consumer = std::make_shared<gfx::lut::consumer>();
_lut_initialized = true;
} catch (std::exception const& ex) {
DLOG_WARNING(LOCAL_PREFIX "Failed to initialize LUT rendering, falling back to direct rendering.\n%s",
ex.what());
_lut_initialized = false;
} }
_tex_source = _rt_source->get_texture();
} // Allocate render target for rendering.
{ try {
_rt_grade = std::make_unique<gs::rendertarget>(GS_RGBA, GS_ZS_NONE); allocate_rendertarget(GS_RGBA);
{ } catch (std::exception const& ex) {
auto op = _rt_grade->render(1, 1); DLOG_ERROR(LOCAL_PREFIX "Failed to acquire render target for rendering: %s", ex.what());
} throw;
_tex_grade = _rt_grade->get_texture();
} }
update(data); update(data);
} }
void color_grade_instance::allocate_rendertarget(gs_color_format format)
{
_cache_rt = std::make_unique<gs::rendertarget>(format, GS_ZS_NONE);
}
float_t fix_gamma_value(double_t v) float_t fix_gamma_value(double_t v)
{ {
if (v < 0.0) { if (v < 0.0) {
@ -157,86 +192,110 @@ void color_grade_instance::update(obs_data_t* data)
_correction.y = static_cast<float_t>(obs_data_get_double(data, ST_CORRECTION_(SATURATION)) / 100.0); _correction.y = static_cast<float_t>(obs_data_get_double(data, ST_CORRECTION_(SATURATION)) / 100.0);
_correction.z = static_cast<float_t>(obs_data_get_double(data, ST_CORRECTION_(LIGHTNESS)) / 100.0); _correction.z = static_cast<float_t>(obs_data_get_double(data, ST_CORRECTION_(LIGHTNESS)) / 100.0);
_correction.w = static_cast<float_t>(obs_data_get_double(data, ST_CORRECTION_(CONTRAST)) / 100.0); _correction.w = static_cast<float_t>(obs_data_get_double(data, ST_CORRECTION_(CONTRAST)) / 100.0);
}
void color_grade_instance::video_tick(float)
{
_source_updated = false;
_grade_updated = false;
}
void color_grade_instance::video_render(gs_effect_t* effect)
{
// Grab initial values.
obs_source_t* parent = obs_filter_get_parent(_self);
obs_source_t* target = obs_filter_get_target(_self);
uint32_t width = obs_source_get_base_width(target);
uint32_t height = obs_source_get_base_height(target);
gs_effect_t* effect_default = obs_get_base_effect(obs_base_effect::OBS_EFFECT_DEFAULT);
// Skip filter if anything is wrong.
if (!parent || !target || !width || !height || !effect_default) {
obs_source_skip_video_filter(_self);
return;
}
#ifdef ENABLE_PROFILING
gs::debug_marker gdmp{gs::debug_color_source, "Color Grading '%s'", obs_source_get_name(_self)};
#endif
if (!_source_updated) {
#ifdef ENABLE_PROFILING
gs::debug_marker gdm{gs::debug_color_cache, "Cache"};
#endif
if (obs_source_process_filter_begin(_self, GS_RGBA, OBS_ALLOW_DIRECT_RENDERING)) {
auto op = _rt_source->render(width, height);
gs_blend_state_push();
gs_reset_blend_state();
gs_set_cull_mode(GS_NEITHER);
gs_enable_color(true, true, true, true);
gs_enable_blending(false);
gs_enable_depth_test(false);
gs_enable_stencil_test(false);
gs_enable_stencil_write(false);
gs_ortho(0, static_cast<float_t>(width), 0, static_cast<float_t>(height), -1., 1.);
obs_source_process_filter_end(_self, effect ? effect : effect_default, width, height);
gs_blend_state_pop();
}
_tex_source = _rt_source->get_texture();
_source_updated = true;
}
if (!_grade_updated) {
#ifdef ENABLE_PROFILING
gs::debug_marker gdm{gs::debug_color_convert, "Calculate"};
#endif
{ {
auto op = _rt_grade->render(width, height); int64_t v = obs_data_get_int(data, ST_RENDERMODE);
// LUT status depends on selected option.
_lut_enabled = v != 0; // 0 (Direct)
if (v == -1) {
_lut_depth = gfx::lut::color_depth::_8;
} else if (v > 0) {
_lut_depth = static_cast<gfx::lut::color_depth>(v);
}
}
if (_lut_enabled && _lut_initialized)
_lut_dirty = true;
}
void color_grade_instance::prepare_effect()
{
if (auto p = _effect.get_parameter("pLift"); p) {
p.set_float4(_lift);
}
if (auto p = _effect.get_parameter("pGamma"); p) {
p.set_float4(_gamma);
}
if (auto p = _effect.get_parameter("pGain"); p) {
p.set_float4(_gain);
}
if (auto p = _effect.get_parameter("pOffset"); p) {
p.set_float4(_offset);
}
if (auto p = _effect.get_parameter("pLift"); p) {
p.set_float4(_lift);
}
if (auto p = _effect.get_parameter("pTintDetection"); p) {
p.set_int(static_cast<int32_t>(_tint_detection));
}
if (auto p = _effect.get_parameter("pTintMode"); p) {
p.set_int(static_cast<int32_t>(_tint_luma));
}
if (auto p = _effect.get_parameter("pTintExponent"); p) {
p.set_float(_tint_exponent);
}
if (auto p = _effect.get_parameter("pTintLow"); p) {
p.set_float3(_tint_low);
}
if (auto p = _effect.get_parameter("pTintMid"); p) {
p.set_float3(_tint_mid);
}
if (auto p = _effect.get_parameter("pTintHig"); p) {
p.set_float3(_tint_hig);
}
if (auto p = _effect.get_parameter("pCorrection"); p) {
p.set_float4(_correction);
}
}
void color_grade_instance::rebuild_lut()
{
#ifdef ENABLE_PROFILING
gs::debug_marker gdm{gs::debug_color_cache, "Rebuild LUT"};
#endif
// Generate a fresh LUT texture.
auto lut_texture = _lut_producer->produce(_lut_depth);
// Modify the LUT with our color grade.
if (lut_texture) {
// Check if we have a render target to work with and if it's the correct format.
if (!_lut_rt || (lut_texture->get_color_format() != _lut_rt->get_color_format())) {
// Create a new render target with new format.
_lut_rt = std::make_unique<gs::rendertarget>(lut_texture->get_color_format(), GS_ZS_NONE);
}
// Prepare our color grade effect.
prepare_effect();
// Assign texture.
if (auto p = _effect.get_parameter("image"); p) {
p.set_texture(lut_texture);
}
{ // Begin rendering.
auto op = _lut_rt->render(lut_texture->get_width(), lut_texture->get_height());
// Set up graphics context.
gs_ortho(0, 1, 0, 1, 0, 1);
gs_blend_state_push(); gs_blend_state_push();
gs_reset_blend_state();
gs_set_cull_mode(GS_NEITHER);
gs_enable_color(true, true, true, true);
gs_enable_blending(false); gs_enable_blending(false);
gs_enable_depth_test(false); gs_enable_color(true, true, true, true);
gs_enable_stencil_test(false); gs_enable_stencil_test(false);
gs_enable_stencil_write(false); gs_enable_stencil_write(false);
gs_ortho(0, 1, 0, 1, -1., 1.);
_effect.get_parameter("image").set_texture(_tex_source);
_effect.get_parameter("pLift").set_float4(_lift);
_effect.get_parameter("pGamma").set_float4(_gamma);
_effect.get_parameter("pGain").set_float4(_gain);
_effect.get_parameter("pOffset").set_float4(_offset);
_effect.get_parameter("pTintDetection").set_int(static_cast<int32_t>(_tint_detection));
_effect.get_parameter("pTintMode").set_int(static_cast<int32_t>(_tint_luma));
_effect.get_parameter("pTintExponent").set_float(_tint_exponent);
_effect.get_parameter("pTintLow").set_float3(_tint_low);
_effect.get_parameter("pTintMid").set_float3(_tint_mid);
_effect.get_parameter("pTintHig").set_float3(_tint_hig);
_effect.get_parameter("pCorrection").set_float4(_correction);
while (gs_effect_loop(_effect.get_object(), "Draw")) { while (gs_effect_loop(_effect.get_object(), "Draw")) {
streamfx::gs_draw_fullscreen_tri(); streamfx::gs_draw_fullscreen_tri();
@ -245,21 +304,140 @@ void color_grade_instance::video_render(gs_effect_t* effect)
gs_blend_state_pop(); gs_blend_state_pop();
} }
_tex_grade = _rt_grade->get_texture(); _lut_rt->get_texture(_lut_texture);
_source_updated = true; if (!_lut_texture) {
throw std::runtime_error("Failed to produce modified LUT texture.");
}
} else {
throw std::runtime_error("Failed to produce LUT texture.");
}
_lut_dirty = false;
}
void color_grade_instance::video_tick(float)
{
_ccache_fresh = false;
_cache_fresh = false;
}
void color_grade_instance::video_render(gs_effect_t*)
{
// Grab initial values.
obs_source_t* parent = obs_filter_get_parent(_self);
obs_source_t* target = obs_filter_get_target(_self);
uint32_t width = obs_source_get_base_width(target);
uint32_t height = obs_source_get_base_height(target);
// Skip filter if anything is wrong.
if (!parent || !target || !width || !height) {
obs_source_skip_video_filter(_self);
return;
} }
// Render final result.
{
#ifdef ENABLE_PROFILING #ifdef ENABLE_PROFILING
gs::debug_marker gdm{gs::debug_color_render, "Render"}; gs::debug_marker gdmp{gs::debug_color_source, "Color Grading '%s'", obs_source_get_name(_self)};
#endif #endif
// TODO: Optimize this once (https://github.com/obsproject/obs-studio/pull/4199) is merged.
// - We can skip the original capture and reduce the overall impact of this.
// 1. Capture the filter/source rendered above this.
if (!_ccache_fresh) {
#ifdef ENABLE_PROFILING
gs::debug_marker gdmp{gs::debug_color_cache, "Cache '%s'", obs_source_get_name(target)};
#endif
if (!_ccache_rt) {
_ccache_rt = std::make_shared<gs::rendertarget>(GS_RGBA, GS_ZS_NONE);
}
{
auto op = _ccache_rt->render(width, height);
gs_ortho(0, static_cast<float_t>(width), 0, static_cast<float_t>(height), 0, 1);
obs_source_process_filter_begin(_self, GS_RGBA, OBS_ALLOW_DIRECT_RENDERING);
obs_source_process_filter_end(_self, obs_get_base_effect(OBS_EFFECT_DEFAULT), width, height);
}
_ccache_rt->get_texture(_ccache_texture);
if (!_ccache_texture) {
throw std::runtime_error("Failed to cache original source.");
}
_ccache_fresh = true;
}
// 2. Apply one of the two rendering methods (LUT or Direct).
if (_lut_initialized && _lut_enabled) {
try {
#ifdef ENABLE_PROFILING
gs::debug_marker gdm{gs::debug_color_convert, "LUT Rendering"};
#endif
if (_lut_dirty) {
rebuild_lut();
_cache_fresh = false;
}
if (!_cache_fresh) {
{ // Render the source to the cache.
auto op = _cache_rt->render(width, height);
gs_ortho(0, 1., 0, 1., 0, 1);
auto effect = _lut_consumer->prepare(_lut_depth, _lut_texture);
effect->get_parameter("image").set_texture(_ccache_texture);
while (gs_effect_loop(effect->get_object(), "Draw")) {
streamfx::gs_draw_fullscreen_tri();
}
}
_cache_rt->get_texture(_cache_texture);
_cache_fresh = true;
}
} catch (std::exception const& ex) {
_lut_rt.reset();
_lut_texture.reset();
_lut_enabled = false;
DLOG_WARNING(LOCAL_PREFIX "Reverting to direct rendering due to error: %s", ex.what());
}
}
if ((!_lut_initialized || !_lut_enabled) && !_cache_fresh) {
#ifdef ENABLE_PROFILING
gs::debug_marker gdm{gs::debug_color_convert, "Direct Rendering"};
#endif
// Reallocate the rendertarget if necessary.
if (_cache_rt->get_color_format() != GS_RGBA) {
allocate_rendertarget(GS_RGBA);
}
{ // Render the source to the cache.
auto op = _cache_rt->render(width, height);
gs_ortho(0, static_cast<float_t>(width), 0, static_cast<float_t>(height), 0, 1);
// TODO: Check if clearing things is required.
prepare_effect();
obs_source_process_filter_begin(_self, GS_RGBA, OBS_ALLOW_DIRECT_RENDERING);
obs_source_process_filter_end(_self, _effect.get_object(), width, height);
}
_cache_rt->get_texture(_cache_texture);
_cache_fresh = true;
}
if (!_cache_texture) {
throw std::runtime_error("Failed to cache processed source.");
}
// 3. Render the output cache.
{
#ifdef ENABLE_PROFILING
gs::debug_marker gdm{gs::debug_color_cache_render, "Draw Cache"};
#endif
auto shader = obs_get_base_effect(OBS_EFFECT_DEFAULT); auto shader = obs_get_base_effect(OBS_EFFECT_DEFAULT);
gs_enable_depth_test(false); gs_enable_depth_test(false);
while (gs_effect_loop(shader, "Draw")) { while (gs_effect_loop(shader, "Draw")) {
gs_effect_set_texture(gs_effect_get_param_by_name(shader, "image"), gs_effect_set_texture(gs_effect_get_param_by_name(shader, "image"),
_tex_grade ? _tex_grade->get_object() : nullptr); _cache_texture ? _cache_texture->get_object() : nullptr);
gs_draw_sprite(nullptr, 0, width, height); gs_draw_sprite(nullptr, 0, width, height);
} }
} }
@ -269,7 +447,7 @@ color_grade_factory::color_grade_factory()
{ {
_info.id = PREFIX "filter-color-grade"; _info.id = PREFIX "filter-color-grade";
_info.type = OBS_SOURCE_TYPE_FILTER; _info.type = OBS_SOURCE_TYPE_FILTER;
_info.output_flags = OBS_SOURCE_VIDEO; _info.output_flags = OBS_SOURCE_VIDEO | OBS_SOURCE_CUSTOM_DRAW;
set_resolution_enabled(false); set_resolution_enabled(false);
finish_setup(); finish_setup();
@ -318,6 +496,8 @@ void color_grade_factory::get_defaults2(obs_data_t* data)
obs_data_set_default_double(data, ST_CORRECTION_(SATURATION), 100.0); obs_data_set_default_double(data, ST_CORRECTION_(SATURATION), 100.0);
obs_data_set_default_double(data, ST_CORRECTION_(LIGHTNESS), 100.0); obs_data_set_default_double(data, ST_CORRECTION_(LIGHTNESS), 100.0);
obs_data_set_default_double(data, ST_CORRECTION_(CONTRAST), 100.0); obs_data_set_default_double(data, ST_CORRECTION_(CONTRAST), 100.0);
obs_data_set_default_int(data, ST_RENDERMODE, -1);
} }
obs_properties_t* color_grade_factory::get_properties2(color_grade_instance* data) obs_properties_t* color_grade_factory::get_properties2(color_grade_instance* data)
@ -433,6 +613,24 @@ obs_properties_t* color_grade_factory::get_properties2(color_grade_instance* dat
} }
obs_properties_add_float_slider(grp, ST_TINT_EXPONENT, D_TRANSLATE(ST_TINT_EXPONENT), 0., 10., 0.01); obs_properties_add_float_slider(grp, ST_TINT_EXPONENT, D_TRANSLATE(ST_TINT_EXPONENT), 0., 10., 0.01);
{
auto p = obs_properties_add_list(grp, ST_RENDERMODE, D_TRANSLATE(ST_RENDERMODE), OBS_COMBO_TYPE_LIST,
OBS_COMBO_FORMAT_INT);
obs_property_set_long_description(p, D_TRANSLATE(D_DESC(ST_RENDERMODE)));
std::pair<const char*, int64_t> els[] = {
{S_STATE_AUTOMATIC, -1},
{ST_RENDERMODE_DIRECT, 0},
{ST_RENDERMODE_LUT_2BIT, static_cast<int64_t>(gfx::lut::color_depth::_2)},
{ST_RENDERMODE_LUT_4BIT, static_cast<int64_t>(gfx::lut::color_depth::_4)},
{ST_RENDERMODE_LUT_6BIT, static_cast<int64_t>(gfx::lut::color_depth::_6)},
{ST_RENDERMODE_LUT_8BIT, static_cast<int64_t>(gfx::lut::color_depth::_8)},
//{ST_RENDERMODE_LUT_10BIT, static_cast<int64_t>(gfx::lut::color_depth::_10)},
};
for (auto kv : els) {
obs_property_list_add_int(p, D_TRANSLATE(kv.first), kv.second);
}
}
} }
return pr; return pr;

View file

@ -19,6 +19,9 @@
#pragma once #pragma once
#include <vector> #include <vector>
#include "gfx/lut/gfx-lut-consumer.hpp"
#include "gfx/lut/gfx-lut-producer.hpp"
#include "gfx/lut/gfx-lut.hpp"
#include "obs/gs/gs-mipmapper.hpp" #include "obs/gs/gs-mipmapper.hpp"
#include "obs/gs/gs-rendertarget.hpp" #include "obs/gs/gs-rendertarget.hpp"
#include "obs/gs/gs-texture.hpp" #include "obs/gs/gs-texture.hpp"
@ -44,17 +47,7 @@ namespace streamfx::filter::color_grade {
class color_grade_instance : public obs::source_instance { class color_grade_instance : public obs::source_instance {
gs::effect _effect; gs::effect _effect;
// Source // User Configuration
std::unique_ptr<gs::rendertarget> _rt_source;
std::shared_ptr<gs::texture> _tex_source;
bool _source_updated;
// Grading
std::unique_ptr<gs::rendertarget> _rt_grade;
std::shared_ptr<gs::texture> _tex_grade;
bool _grade_updated;
// Parameters
vec4 _lift; vec4 _lift;
vec4 _gamma; vec4 _gamma;
vec4 _gain; vec4 _gain;
@ -66,15 +59,41 @@ namespace streamfx::filter::color_grade {
vec3 _tint_mid; vec3 _tint_mid;
vec3 _tint_hig; vec3 _tint_hig;
vec4 _correction; vec4 _correction;
bool _lut_enabled;
gfx::lut::color_depth _lut_depth;
// Capture Cache
std::shared_ptr<gs::rendertarget> _ccache_rt;
std::shared_ptr<gs::texture> _ccache_texture;
bool _ccache_fresh;
// LUT work flow
bool _lut_initialized;
bool _lut_dirty;
std::shared_ptr<gfx::lut::producer> _lut_producer;
std::shared_ptr<gfx::lut::consumer> _lut_consumer;
std::shared_ptr<gs::rendertarget> _lut_rt;
std::shared_ptr<gs::texture> _lut_texture;
// Render Cache
std::shared_ptr<gs::rendertarget> _cache_rt;
std::shared_ptr<gs::texture> _cache_texture;
bool _cache_fresh;
public: public:
color_grade_instance(obs_data_t* data, obs_source_t* self); color_grade_instance(obs_data_t* data, obs_source_t* self);
virtual ~color_grade_instance(); virtual ~color_grade_instance();
void allocate_rendertarget(gs_color_format format);
virtual void load(obs_data_t* data) override; virtual void load(obs_data_t* data) override;
virtual void migrate(obs_data_t* data, uint64_t version) override; virtual void migrate(obs_data_t* data, uint64_t version) override;
virtual void update(obs_data_t* data) override; virtual void update(obs_data_t* data) override;
void prepare_effect();
void rebuild_lut();
virtual void video_tick(float_t time) override; virtual void video_tick(float_t time) override;
virtual void video_render(gs_effect_t* effect) override; virtual void video_render(gs_effect_t* effect) override;
}; };