glsl: Implement Load/WriteGlobal

along with some other misc changes and fixes
This commit is contained in:
ameerj 2021-06-03 19:15:36 -04:00
parent af9696059c
commit 8d8ce24f20
9 changed files with 185 additions and 98 deletions

View file

@ -9,6 +9,14 @@
namespace Shader::Backend::GLSL {
namespace {
u32 CbufIndex(u32 offset) {
return (offset / 4) % 4;
}
char OffsetSwizzle(u32 offset) {
return "xyzw"[CbufIndex(offset)];
}
std::string_view InterpDecorator(Interpolation interp) {
switch (interp) {
case Interpolation::Smooth:
@ -382,6 +390,8 @@ void EmitContext::DefineGenericOutput(size_t index, u32 invocations) {
}
void EmitContext::DefineHelperFunctions() {
header += "\n#define ftoi floatBitsToInt\n#define ftou floatBitsToUint\n"
"#define itof intBitsToFloat\n#define utof uintBitsToFloat\n";
if (info.uses_global_increment || info.uses_shared_increment) {
header += "uint CasIncrement(uint op_a,uint op_b){return(op_a>=op_b)?0u:(op_a+1u);}\n";
}
@ -391,7 +401,7 @@ void EmitContext::DefineHelperFunctions() {
}
if (info.uses_atomic_f32_add) {
header += "uint CasFloatAdd(uint op_a,float op_b){return "
"floatBitsToUint(uintBitsToFloat(op_a)+op_b);}\n";
"ftou(utof(op_a)+op_b);}\n";
}
if (info.uses_atomic_f32x2_add) {
header += "uint CasFloatAdd32x2(uint op_a,vec2 op_b){return "
@ -423,6 +433,80 @@ void EmitContext::DefineHelperFunctions() {
if (info.uses_atomic_s32_max) {
header += "uint CasMaxS32(uint op_a,uint op_b){return uint(max(int(op_a),int(op_b)));}";
}
if (info.uses_global_memory) {
std::string write_func{"void WriteGlobal32(uint64_t addr,uint data){\n"};
std::string write_func_64{"void WriteGlobal64(uint64_t addr,uvec2 data){\n"};
std::string write_func_128{"void WriteGlobal128(uint64_t addr,uvec4 data){\n"};
std::string load_func{"uint LoadGlobal32(uint64_t addr){\n"};
std::string load_func_64{"uvec2 LoadGlobal64(uint64_t addr){\n"};
std::string load_func_128{"uvec4 LoadGlobal128(uint64_t addr){\n"};
const size_t num_buffers{info.storage_buffers_descriptors.size()};
for (size_t index = 0; index < num_buffers; ++index) {
if (!info.nvn_buffer_used[index]) {
continue;
}
const auto& ssbo{info.storage_buffers_descriptors[index]};
const u32 size_cbuf_offset{ssbo.cbuf_offset + 8};
const auto ssbo_addr{fmt::format("ssbo_addr{}", index)};
const auto cbuf{fmt::format("{}_cbuf{}", stage_name, ssbo.cbuf_index)};
const auto cbuf_value{fmt::format(
"uint64_t {}=packUint2x32(uvec2(ftou({}[{}].{}),ftou({}[{}].{})));", ssbo_addr,
cbuf, ssbo.cbuf_offset / 16, OffsetSwizzle(ssbo.cbuf_offset), cbuf,
(ssbo.cbuf_offset + 4) / 16, OffsetSwizzle(ssbo.cbuf_offset + 4))};
write_func += cbuf_value;
write_func_64 += cbuf_value;
write_func_128 += cbuf_value;
load_func += cbuf_value;
load_func_64 += cbuf_value;
load_func_128 += cbuf_value;
const auto ssbo_size{fmt::format("ftou({}[{}].{}),ftou({}[{}].{})", cbuf,
size_cbuf_offset / 16, OffsetSwizzle(size_cbuf_offset),
cbuf, (size_cbuf_offset + 4) / 16,
OffsetSwizzle(size_cbuf_offset + 4))};
const auto comparison{fmt::format("if((addr>={})&&(addr<({}+\nuint64_t(uvec2({}))))){{",
ssbo_addr, ssbo_addr, ssbo_size)};
write_func += comparison;
write_func_64 += comparison;
write_func_128 += comparison;
load_func += comparison;
load_func_64 += comparison;
load_func_128 += comparison;
const auto ssbo_name{fmt::format("{}_ssbo{}", stage_name, index)};
write_func += fmt::format("{}[uint(addr-{})>>2]=data;return;}}", ssbo_name, ssbo_addr);
write_func_64 +=
fmt::format("{}[uint(addr-{})>>2]=data.x;{}[uint(addr-{}+4)>>2]=data.y;return;}}",
ssbo_name, ssbo_addr, ssbo_name, ssbo_addr);
write_func_128 +=
fmt::format("{}[uint(addr-{})>>2]=data.x;{}[uint(addr-{}+4)>>2]=data.y;{}[uint("
"addr-{}+8)>>2]=data.z;{}[uint(addr-{}+12)>>2]=data.w;return;}}",
ssbo_name, ssbo_addr, ssbo_name, ssbo_addr, ssbo_name, ssbo_addr,
ssbo_name, ssbo_addr);
load_func += fmt::format("return {}[uint(addr-{})>>2];}}", ssbo_name, ssbo_addr);
load_func_64 +=
fmt::format("return uvec2({}[uint(addr-{})>>2],{}[uint(addr-{}+4)>>2]);}}",
ssbo_name, ssbo_addr, ssbo_name, ssbo_addr);
load_func_128 += fmt::format("return "
"uvec4({}[uint(addr-{})>>2],{}[uint(addr-{}+4)>>2],{}["
"uint(addr-{}+8)>>2],{}[uint(addr-{}+12)>>2]);}}",
ssbo_name, ssbo_addr, ssbo_name, ssbo_addr, ssbo_name,
ssbo_addr, ssbo_name, ssbo_addr);
}
write_func += "}\n";
write_func_64 += "}\n";
write_func_128 += "}\n";
load_func += "return 0u;}\n";
load_func_64 += "return uvec2(0);}\n";
load_func_128 += "return uvec4(0);}\n";
header += write_func;
header += write_func_64;
header += write_func_128;
header += load_func;
header += load_func_64;
header += load_func_128;
}
}
void EmitContext::SetupImages(Bindings& bindings) {

View file

@ -39,7 +39,7 @@ void SsboCasFunctionF32(EmitContext& ctx, IR::Inst& inst, const IR::Value& bindi
ctx.var_alloc.Consume(offset))};
const auto ret{ctx.var_alloc.Define(inst, GlslVarType::U32)};
ctx.Add(cas_loop.data(), ssbo, ret, ssbo, function, ssbo, value, ret);
ctx.AddF32("{}=uintBitsToFloat({});", inst, ret);
ctx.AddF32("{}=utof({});", inst, ret);
}
} // namespace

View file

@ -40,7 +40,7 @@ void EmitBitCastU16F16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::I
}
void EmitBitCastU32F32(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
ctx.AddU32("{}=floatBitsToUint({});", inst, value);
ctx.AddU32("{}=ftou({});", inst, value);
}
void EmitBitCastU64F64(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
@ -52,7 +52,7 @@ void EmitBitCastF16U16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::I
}
void EmitBitCastF32U32(EmitContext& ctx, IR::Inst& inst, std::string_view value) {
ctx.AddF32("{}=uintBitsToFloat({});", inst, value);
ctx.AddF32("{}=utof({});", inst, value);
}
void EmitBitCastF64U64(EmitContext& ctx, IR::Inst& inst, std::string_view value) {

View file

@ -45,14 +45,13 @@ void EmitGetCbufU8([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst&
[[maybe_unused]] const IR::Value& binding,
[[maybe_unused]] const IR::Value& offset) {
if (offset.IsImmediate()) {
ctx.AddU32("{}=bitfieldExtract(floatBitsToUint({}_cbuf{}[{}].{}),int({}),8);", inst,
ctx.stage_name, binding.U32(), offset.U32() / 16, OffsetSwizzle(offset.U32()),
ctx.AddU32("{}=bitfieldExtract(ftou({}_cbuf{}[{}].{}),int({}),8);", inst, ctx.stage_name,
binding.U32(), offset.U32() / 16, OffsetSwizzle(offset.U32()),
(offset.U32() % 4) * 8);
} else {
const auto offset_var{ctx.var_alloc.Consume(offset)};
ctx.AddU32(
"{}=bitfieldExtract(floatBitsToUint({}_cbuf{}[{}/16][({}>>2)%4]),int(({}%4)*8),8);",
inst, ctx.stage_name, binding.U32(), offset_var, offset_var, offset_var);
ctx.AddU32("{}=bitfieldExtract(ftou({}_cbuf{}[{}/16][({}>>2)%4]),int(({}%4)*8),8);", inst,
ctx.stage_name, binding.U32(), offset_var, offset_var, offset_var);
}
}
@ -60,14 +59,13 @@ void EmitGetCbufS8([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst&
[[maybe_unused]] const IR::Value& binding,
[[maybe_unused]] const IR::Value& offset) {
if (offset.IsImmediate()) {
ctx.AddU32("{}=bitfieldExtract(floatBitsToInt({}_cbuf{}[{}].{}),int({}),8);", inst,
ctx.stage_name, binding.U32(), offset.U32() / 16, OffsetSwizzle(offset.U32()),
ctx.AddU32("{}=bitfieldExtract(ftoi({}_cbuf{}[{}].{}),int({}),8);", inst, ctx.stage_name,
binding.U32(), offset.U32() / 16, OffsetSwizzle(offset.U32()),
(offset.U32() % 4) * 8);
} else {
const auto offset_var{ctx.var_alloc.Consume(offset)};
ctx.AddU32(
"{}=bitfieldExtract(floatBitsToInt({}_cbuf{}[{}/16][({}>>2)%4]),int(({}%4)*8),8);",
inst, ctx.stage_name, binding.U32(), offset_var, offset_var, offset_var);
ctx.AddU32("{}=bitfieldExtract(ftoi({}_cbuf{}[{}/16][({}>>2)%4]),int(({}%4)*8),8);", inst,
ctx.stage_name, binding.U32(), offset_var, offset_var, offset_var);
}
}
@ -75,12 +73,12 @@ void EmitGetCbufU16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst
[[maybe_unused]] const IR::Value& binding,
[[maybe_unused]] const IR::Value& offset) {
if (offset.IsImmediate()) {
ctx.AddU32("{}=bitfieldExtract(floatBitsToUint({}_cbuf{}[{}].{}),int({}),16);", inst,
ctx.stage_name, binding.U32(), offset.U32() / 16, OffsetSwizzle(offset.U32()),
ctx.AddU32("{}=bitfieldExtract(ftou({}_cbuf{}[{}].{}),int({}),16);", inst, ctx.stage_name,
binding.U32(), offset.U32() / 16, OffsetSwizzle(offset.U32()),
((offset.U32() / 2) % 2) * 16);
} else {
const auto offset_var{ctx.var_alloc.Consume(offset)};
ctx.AddU32("{}=bitfieldExtract(floatBitsToUint({}_cbuf{}[{}/16][({}>>2)%4]),int((({}/"
ctx.AddU32("{}=bitfieldExtract(ftou({}_cbuf{}[{}/16][({}>>2)%4]),int((({}/"
"2)%2)*16),16);",
inst, ctx.stage_name, binding.U32(), offset_var, offset_var, offset_var);
}
@ -90,12 +88,12 @@ void EmitGetCbufS16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst
[[maybe_unused]] const IR::Value& binding,
[[maybe_unused]] const IR::Value& offset) {
if (offset.IsImmediate()) {
ctx.AddU32("{}=bitfieldExtract(floatBitsToInt({}_cbuf{}[{}].{}),int({}),16);", inst,
ctx.stage_name, binding.U32(), offset.U32() / 16, OffsetSwizzle(offset.U32()),
ctx.AddU32("{}=bitfieldExtract(ftoi({}_cbuf{}[{}].{}),int({}),16);", inst, ctx.stage_name,
binding.U32(), offset.U32() / 16, OffsetSwizzle(offset.U32()),
((offset.U32() / 2) % 2) * 16);
} else {
const auto offset_var{ctx.var_alloc.Consume(offset)};
ctx.AddU32("{}=bitfieldExtract(floatBitsToInt({}_cbuf{}[{}/16][({}>>2)%4]),int((({}/"
ctx.AddU32("{}=bitfieldExtract(ftoi({}_cbuf{}[{}/16][({}>>2)%4]),int((({}/"
"2)%2)*16),16);",
inst, ctx.stage_name, binding.U32(), offset_var, offset_var, offset_var);
}
@ -104,12 +102,12 @@ void EmitGetCbufS16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst
void EmitGetCbufU32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
const IR::Value& offset) {
if (offset.IsImmediate()) {
ctx.AddU32("{}=floatBitsToUint({}_cbuf{}[{}].{});", inst, ctx.stage_name, binding.U32(),
ctx.AddU32("{}=ftou({}_cbuf{}[{}].{});", inst, ctx.stage_name, binding.U32(),
offset.U32() / 16, OffsetSwizzle(offset.U32()));
} else {
const auto offset_var{ctx.var_alloc.Consume(offset)};
ctx.AddU32("{}=floatBitsToUint({}_cbuf{}[{}/16][({}>>2)%4]);", inst, ctx.stage_name,
binding.U32(), offset_var, offset_var);
ctx.AddU32("{}=ftou({}_cbuf{}[{}/16][({}>>2)%4]);", inst, ctx.stage_name, binding.U32(),
offset_var, offset_var);
}
}
@ -128,15 +126,14 @@ void EmitGetCbufF32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
void EmitGetCbufU32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding,
const IR::Value& offset) {
if (offset.IsImmediate()) {
ctx.AddU32x2(
"{}=uvec2(floatBitsToUint({}_cbuf{}[{}].{}),floatBitsToUint({}_cbuf{}[{}].{}));", inst,
ctx.stage_name, binding.U32(), offset.U32() / 16, OffsetSwizzle(offset.U32()),
ctx.stage_name, binding.U32(), (offset.U32() + 4) / 16,
OffsetSwizzle(offset.U32() + 4));
ctx.AddU32x2("{}=uvec2(ftou({}_cbuf{}[{}].{}),ftou({}_cbuf{}[{}].{}));", inst,
ctx.stage_name, binding.U32(), offset.U32() / 16, OffsetSwizzle(offset.U32()),
ctx.stage_name, binding.U32(), (offset.U32() + 4) / 16,
OffsetSwizzle(offset.U32() + 4));
} else {
const auto offset_var{ctx.var_alloc.Consume(offset)};
ctx.AddU32x2("{}=uvec2(floatBitsToUint({}_cbuf{}[{}/16][({}/"
"4)%4]),floatBitsToUint({}_cbuf{}[({}+4)/16][(({}+4)>>2)%4]));",
ctx.AddU32x2("{}=uvec2(ftou({}_cbuf{}[{}/16][({}/"
"4)%4]),ftou({}_cbuf{}[({}+4)/16][(({}+4)>>2)%4]));",
inst, ctx.stage_name, binding.U32(), offset_var, offset_var, ctx.stage_name,
binding.U32(), offset_var, offset_var);
}
@ -180,13 +177,13 @@ void EmitGetAttribute(EmitContext& ctx, IR::Inst& inst, IR::Attribute attr,
ctx.AddF32("{}=gl_PointCoord.{};", inst, swizzle);
break;
case IR::Attribute::InstanceId:
ctx.AddF32("{}=intBitsToFloat(gl_InstanceID);", inst);
ctx.AddF32("{}=itof(gl_InstanceID);", inst);
break;
case IR::Attribute::VertexId:
ctx.AddF32("{}=intBitsToFloat(gl_VertexID);", inst);
ctx.AddF32("{}=itof(gl_VertexID);", inst);
break;
case IR::Attribute::FrontFace:
ctx.AddF32("{}=intBitsToFloat(gl_FrontFacing?-1:0);", inst);
ctx.AddF32("{}=itof(gl_FrontFacing?-1:0);", inst);
break;
case IR::Attribute::TessellationEvaluationPointU:
case IR::Attribute::TessellationEvaluationPointV:
@ -231,7 +228,7 @@ void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, std::string_view val
// layer extension");
break;
}
ctx.Add("gl_ViewportIndex=floatBitsToInt({});", value);
ctx.Add("gl_ViewportIndex=ftoi({});", value);
break;
case IR::Attribute::ClipDistance0:
case IR::Attribute::ClipDistance1:

View file

@ -212,7 +212,11 @@ void EmitImageSampleDrefImplicitLod([[maybe_unused]] EmitContext& ctx,
}
} else {
if (ctx.stage == Stage::Fragment) {
ctx.AddF32("{}=texture({},{}({},{}){});", inst, texture, cast, coords, dref, bias);
if (info.type == TextureType::ColorArrayCube) {
ctx.AddF32("{}=texture({},vec4({}),{});", inst, texture, coords, dref);
} else {
ctx.AddF32("{}=texture({},{}({},{}){});", inst, texture, cast, coords, dref, bias);
}
} else {
ctx.AddF32("{}=textureLod({},{}({},{}),0.0);", inst, texture, cast, coords, dref);
}
@ -238,6 +242,7 @@ void EmitImageSampleDrefExplicitLod([[maybe_unused]] EmitContext& ctx,
throw NotImplementedException("EmitImageSampleDrefExplicitLod Lod clamp samples");
}
const auto texture{Texture(ctx, info, index)};
const auto cast{ShadowSamplerVecCast(info.type)};
if (!offset.IsEmpty()) {
const auto offset_str{CastToIntVec(ctx.var_alloc.Consume(offset), info)};
if (info.type == TextureType::ColorArrayCube) {
@ -251,7 +256,8 @@ void EmitImageSampleDrefExplicitLod([[maybe_unused]] EmitContext& ctx,
if (info.type == TextureType::ColorArrayCube) {
ctx.AddF32("{}=textureLod({},{},{},{});", inst, texture, coords, dref, lod_lc);
} else {
ctx.AddF32("{}=textureLod({},vec3({},{}),{});", inst, texture, coords, dref, lod_lc);
ctx.AddF32("{}=textureLod({},{}({},{}),{});", inst, texture, cast, coords, dref,
lod_lc);
}
}
}

View file

@ -105,9 +105,9 @@ void EmitLoadGlobalU8(EmitContext& ctx);
void EmitLoadGlobalS8(EmitContext& ctx);
void EmitLoadGlobalU16(EmitContext& ctx);
void EmitLoadGlobalS16(EmitContext& ctx);
void EmitLoadGlobal32(EmitContext& ctx, std::string_view address);
void EmitLoadGlobal64(EmitContext& ctx, std::string_view address);
void EmitLoadGlobal128(EmitContext& ctx, std::string_view address);
void EmitLoadGlobal32(EmitContext& ctx, IR::Inst& inst, std::string_view address);
void EmitLoadGlobal64(EmitContext& ctx, IR::Inst& inst, std::string_view address);
void EmitLoadGlobal128(EmitContext& ctx, IR::Inst& inst, std::string_view address);
void EmitWriteGlobalU8(EmitContext& ctx);
void EmitWriteGlobalS8(EmitContext& ctx);
void EmitWriteGlobalU16(EmitContext& ctx);

View file

@ -9,6 +9,62 @@
#include "shader_recompiler/frontend/ir/value.h"
namespace Shader::Backend::GLSL {
void EmitLoadGlobalU8([[maybe_unused]] EmitContext& ctx) {
NotImplemented();
}
void EmitLoadGlobalS8([[maybe_unused]] EmitContext& ctx) {
NotImplemented();
}
void EmitLoadGlobalU16([[maybe_unused]] EmitContext& ctx) {
NotImplemented();
}
void EmitLoadGlobalS16([[maybe_unused]] EmitContext& ctx) {
NotImplemented();
}
void EmitLoadGlobal32(EmitContext& ctx, IR::Inst& inst, std::string_view address) {
ctx.AddU32("{}=LoadGlobal32({});", inst, address);
}
void EmitLoadGlobal64(EmitContext& ctx, IR::Inst& inst, std::string_view address) {
ctx.AddU32x2("{}=LoadGlobal64({});", inst, address);
}
void EmitLoadGlobal128(EmitContext& ctx, IR::Inst& inst, std::string_view address) {
ctx.AddU32x4("{}=LoadGlobal128({});", inst, address);
}
void EmitWriteGlobalU8([[maybe_unused]] EmitContext& ctx) {
NotImplemented();
}
void EmitWriteGlobalS8([[maybe_unused]] EmitContext& ctx) {
NotImplemented();
}
void EmitWriteGlobalU16([[maybe_unused]] EmitContext& ctx) {
NotImplemented();
}
void EmitWriteGlobalS16([[maybe_unused]] EmitContext& ctx) {
NotImplemented();
}
void EmitWriteGlobal32(EmitContext& ctx, std::string_view address, std::string_view value) {
ctx.Add("WriteGlobal32({},{});", address, value);
}
void EmitWriteGlobal64(EmitContext& ctx, std::string_view address, std::string_view value) {
ctx.Add("WriteGlobal64({},{});", address, value);
}
void EmitWriteGlobal128(EmitContext& ctx, std::string_view address, std::string_view value) {
ctx.Add("WriteGlobal128({},{});", address, value);
}
void EmitLoadStorageU8([[maybe_unused]] EmitContext& ctx, IR::Inst& inst,
[[maybe_unused]] const IR::Value& binding,
[[maybe_unused]] const IR::Value& offset) {

View file

@ -232,62 +232,6 @@ void EmitUndefU64(EmitContext& ctx, IR::Inst& inst) {
NotImplemented();
}
void EmitLoadGlobalU8(EmitContext& ctx) {
NotImplemented();
}
void EmitLoadGlobalS8(EmitContext& ctx) {
NotImplemented();
}
void EmitLoadGlobalU16(EmitContext& ctx) {
NotImplemented();
}
void EmitLoadGlobalS16(EmitContext& ctx) {
NotImplemented();
}
void EmitLoadGlobal32(EmitContext& ctx, std::string_view address) {
NotImplemented();
}
void EmitLoadGlobal64(EmitContext& ctx, std::string_view address) {
NotImplemented();
}
void EmitLoadGlobal128(EmitContext& ctx, std::string_view address) {
NotImplemented();
}
void EmitWriteGlobalU8(EmitContext& ctx) {
NotImplemented();
}
void EmitWriteGlobalS8(EmitContext& ctx) {
NotImplemented();
}
void EmitWriteGlobalU16(EmitContext& ctx) {
NotImplemented();
}
void EmitWriteGlobalS16(EmitContext& ctx) {
NotImplemented();
}
void EmitWriteGlobal32(EmitContext& ctx, std::string_view address, std::string_view value) {
NotImplemented();
}
void EmitWriteGlobal64(EmitContext& ctx, std::string_view address, std::string_view value) {
NotImplemented();
}
void EmitWriteGlobal128(EmitContext& ctx, std::string_view address, std::string_view value) {
NotImplemented();
}
void EmitGetZeroFromOp(EmitContext& ctx) {
NotImplemented();
}

View file

@ -58,13 +58,13 @@ std::string FormatFloat(std::string_view value, IR::Type type) {
// TODO: Confirm FP64 nan/inf
if (type == IR::Type::F32) {
if (value == "nan") {
return "uintBitsToFloat(0x7fc00000)";
return "utof(0x7fc00000)";
}
if (value == "inf") {
return "uintBitsToFloat(0x7f800000)";
return "utof(0x7f800000)";
}
if (value == "-inf") {
return "uintBitsToFloat(0xff800000)";
return "utof(0xff800000)";
}
}
if (value.find_first_of('e') != std::string_view::npos) {