shader_decode: Implement LDG and basic cbuf tracking
This commit is contained in:
parent
ba38d91fe2
commit
3b84e04af1
7 changed files with 240 additions and 10 deletions
|
@ -87,6 +87,7 @@ add_library(video_core STATIC
|
||||||
shader/decode.cpp
|
shader/decode.cpp
|
||||||
shader/shader_ir.cpp
|
shader/shader_ir.cpp
|
||||||
shader/shader_ir.h
|
shader/shader_ir.h
|
||||||
|
shader/track.cpp
|
||||||
surface.cpp
|
surface.cpp
|
||||||
surface.h
|
surface.h
|
||||||
textures/astc.cpp
|
textures/astc.cpp
|
||||||
|
|
|
@ -208,6 +208,8 @@ enum class UniformType : u64 {
|
||||||
SignedShort = 3,
|
SignedShort = 3,
|
||||||
Single = 4,
|
Single = 4,
|
||||||
Double = 5,
|
Double = 5,
|
||||||
|
Quad = 6,
|
||||||
|
UnsignedQuad = 7,
|
||||||
};
|
};
|
||||||
|
|
||||||
enum class StoreType : u64 {
|
enum class StoreType : u64 {
|
||||||
|
@ -784,6 +786,12 @@ union Instruction {
|
||||||
BitField<44, 2, u64> unknown;
|
BitField<44, 2, u64> unknown;
|
||||||
} st_l;
|
} st_l;
|
||||||
|
|
||||||
|
union {
|
||||||
|
BitField<48, 3, UniformType> type;
|
||||||
|
BitField<46, 2, u64> cache_mode;
|
||||||
|
BitField<20, 24, s64> immediate_offset;
|
||||||
|
} ldg;
|
||||||
|
|
||||||
union {
|
union {
|
||||||
BitField<0, 3, u64> pred0;
|
BitField<0, 3, u64> pred0;
|
||||||
BitField<3, 3, u64> pred3;
|
BitField<3, 3, u64> pred3;
|
||||||
|
|
|
@ -34,6 +34,8 @@ using Operation = const OperationNode&;
|
||||||
enum : u32 { POSITION_VARYING_LOCATION = 0, GENERIC_VARYING_START_LOCATION = 1 };
|
enum : u32 { POSITION_VARYING_LOCATION = 0, GENERIC_VARYING_START_LOCATION = 1 };
|
||||||
constexpr u32 MAX_CONSTBUFFER_ELEMENTS =
|
constexpr u32 MAX_CONSTBUFFER_ELEMENTS =
|
||||||
static_cast<u32>(RasterizerOpenGL::MaxConstbufferSize) / (4 * sizeof(float));
|
static_cast<u32>(RasterizerOpenGL::MaxConstbufferSize) / (4 * sizeof(float));
|
||||||
|
constexpr u32 MAX_GLOBALMEMORY_ELEMENTS =
|
||||||
|
static_cast<u32>(RasterizerOpenGL::MaxGlobalMemorySize) / sizeof(float);
|
||||||
|
|
||||||
enum class Type { Bool, Bool2, Float, Int, Uint, HalfFloat };
|
enum class Type { Bool, Bool2, Float, Int, Uint, HalfFloat };
|
||||||
|
|
||||||
|
@ -143,6 +145,7 @@ public:
|
||||||
DeclareInputAttributes();
|
DeclareInputAttributes();
|
||||||
DeclareOutputAttributes();
|
DeclareOutputAttributes();
|
||||||
DeclareConstantBuffers();
|
DeclareConstantBuffers();
|
||||||
|
DeclareGlobalMemory();
|
||||||
DeclareSamplers();
|
DeclareSamplers();
|
||||||
|
|
||||||
code.AddLine("void execute_" + suffix + "() {");
|
code.AddLine("void execute_" + suffix + "() {");
|
||||||
|
@ -190,12 +193,15 @@ public:
|
||||||
ShaderEntries GetShaderEntries() const {
|
ShaderEntries GetShaderEntries() const {
|
||||||
ShaderEntries entries;
|
ShaderEntries entries;
|
||||||
for (const auto& cbuf : ir.GetConstantBuffers()) {
|
for (const auto& cbuf : ir.GetConstantBuffers()) {
|
||||||
ConstBufferEntry desc(cbuf.second, stage, GetConstBufferBlock(cbuf.first), cbuf.first);
|
entries.const_buffers.emplace_back(cbuf.second, stage, GetConstBufferBlock(cbuf.first),
|
||||||
entries.const_buffers.push_back(desc);
|
cbuf.first);
|
||||||
}
|
}
|
||||||
for (const auto& sampler : ir.GetSamplers()) {
|
for (const auto& sampler : ir.GetSamplers()) {
|
||||||
SamplerEntry desc(sampler, stage, GetSampler(sampler));
|
entries.samplers.emplace_back(sampler, stage, GetSampler(sampler));
|
||||||
entries.samplers.push_back(desc);
|
}
|
||||||
|
for (const auto& gmem : ir.GetGlobalMemoryBases()) {
|
||||||
|
entries.global_memory_entries.emplace_back(gmem.cbuf_index, gmem.cbuf_offset, stage,
|
||||||
|
GetGlobalMemoryBlock(gmem));
|
||||||
}
|
}
|
||||||
entries.clip_distances = ir.GetClipDistances();
|
entries.clip_distances = ir.GetClipDistances();
|
||||||
entries.shader_length = ir.GetLength();
|
entries.shader_length = ir.GetLength();
|
||||||
|
@ -375,6 +381,15 @@ private:
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void DeclareGlobalMemory() {
|
||||||
|
for (const auto& entry : ir.GetGlobalMemoryBases()) {
|
||||||
|
code.AddLine("layout (std430) buffer " + GetGlobalMemoryBlock(entry) + " {");
|
||||||
|
code.AddLine(" float " + GetGlobalMemory(entry) + "[MAX_GLOBALMEMORY_ELEMENTS];");
|
||||||
|
code.AddLine("};");
|
||||||
|
code.AddNewLine();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
void DeclareSamplers() {
|
void DeclareSamplers() {
|
||||||
const auto& samplers = ir.GetSamplers();
|
const auto& samplers = ir.GetSamplers();
|
||||||
for (const auto& sampler : samplers) {
|
for (const auto& sampler : samplers) {
|
||||||
|
@ -538,6 +553,12 @@ private:
|
||||||
UNREACHABLE_MSG("Unmanaged offset node type");
|
UNREACHABLE_MSG("Unmanaged offset node type");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
} else if (const auto gmem = std::get_if<GmemNode>(node)) {
|
||||||
|
const std::string real = Visit(gmem->GetRealAddress());
|
||||||
|
const std::string base = Visit(gmem->GetBaseAddress());
|
||||||
|
const std::string final_offset = "(ftou(" + real + ") - ftou(" + base + ")) / 4";
|
||||||
|
return fmt::format("{}[{}]", GetGlobalMemory(gmem->GetDescriptor()), final_offset);
|
||||||
|
|
||||||
} else if (const auto lmem = std::get_if<LmemNode>(node)) {
|
} else if (const auto lmem = std::get_if<LmemNode>(node)) {
|
||||||
return fmt::format("{}[ftou({}) / 4]", GetLocalMemory(), Visit(lmem->GetAddress()));
|
return fmt::format("{}[ftou({}) / 4]", GetLocalMemory(), Visit(lmem->GetAddress()));
|
||||||
|
|
||||||
|
@ -1471,6 +1492,15 @@ private:
|
||||||
return GetDeclarationWithSuffix(index, "cbuf");
|
return GetDeclarationWithSuffix(index, "cbuf");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::string GetGlobalMemory(const GlobalMemoryBase& descriptor) const {
|
||||||
|
return fmt::format("gmem_{}_{}_{}", descriptor.cbuf_index, descriptor.cbuf_offset, suffix);
|
||||||
|
}
|
||||||
|
|
||||||
|
std::string GetGlobalMemoryBlock(const GlobalMemoryBase& descriptor) const {
|
||||||
|
return fmt::format("gmem_block_{}_{}_{}", descriptor.cbuf_index, descriptor.cbuf_offset,
|
||||||
|
suffix);
|
||||||
|
}
|
||||||
|
|
||||||
std::string GetConstBufferBlock(u32 index) const {
|
std::string GetConstBufferBlock(u32 index) const {
|
||||||
return GetDeclarationWithSuffix(index, "cbuf_block");
|
return GetDeclarationWithSuffix(index, "cbuf_block");
|
||||||
}
|
}
|
||||||
|
@ -1505,8 +1535,10 @@ private:
|
||||||
};
|
};
|
||||||
|
|
||||||
std::string GetCommonDeclarations() {
|
std::string GetCommonDeclarations() {
|
||||||
return "#define MAX_CONSTBUFFER_ELEMENTS " + std::to_string(MAX_CONSTBUFFER_ELEMENTS) +
|
const auto cbuf = std::to_string(MAX_CONSTBUFFER_ELEMENTS);
|
||||||
"\n"
|
const auto gmem = std::to_string(MAX_GLOBALMEMORY_ELEMENTS);
|
||||||
|
return "#define MAX_CONSTBUFFER_ELEMENTS " + cbuf + "\n" +
|
||||||
|
"#define MAX_GLOBALMEMORY_ELEMENTS " + gmem + "\n" +
|
||||||
"#define ftoi floatBitsToInt\n"
|
"#define ftoi floatBitsToInt\n"
|
||||||
"#define ftou floatBitsToUint\n"
|
"#define ftou floatBitsToUint\n"
|
||||||
"#define itof intBitsToFloat\n"
|
"#define itof intBitsToFloat\n"
|
||||||
|
|
|
@ -71,9 +71,43 @@ private:
|
||||||
Maxwell::ShaderStage stage{};
|
Maxwell::ShaderStage stage{};
|
||||||
};
|
};
|
||||||
|
|
||||||
|
class GlobalMemoryEntry {
|
||||||
|
public:
|
||||||
|
explicit GlobalMemoryEntry(u32 cbuf_index, u32 cbuf_offset, Maxwell::ShaderStage stage,
|
||||||
|
std::string name)
|
||||||
|
: cbuf_index{cbuf_index}, cbuf_offset{cbuf_offset}, stage{stage}, name{std::move(name)} {}
|
||||||
|
|
||||||
|
u32 GetCbufIndex() const {
|
||||||
|
return cbuf_index;
|
||||||
|
}
|
||||||
|
|
||||||
|
u32 GetCbufOffset() const {
|
||||||
|
return cbuf_offset;
|
||||||
|
}
|
||||||
|
|
||||||
|
const std::string& GetName() const {
|
||||||
|
return name;
|
||||||
|
}
|
||||||
|
|
||||||
|
Maxwell::ShaderStage GetStage() const {
|
||||||
|
return stage;
|
||||||
|
}
|
||||||
|
|
||||||
|
u32 GetHash() const {
|
||||||
|
return (static_cast<u32>(stage) << 24) | (cbuf_index << 16) | cbuf_offset;
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
u32 cbuf_index{};
|
||||||
|
u32 cbuf_offset{};
|
||||||
|
Maxwell::ShaderStage stage{};
|
||||||
|
std::string name;
|
||||||
|
};
|
||||||
|
|
||||||
struct ShaderEntries {
|
struct ShaderEntries {
|
||||||
std::vector<ConstBufferEntry> const_buffers;
|
std::vector<ConstBufferEntry> const_buffers;
|
||||||
std::vector<SamplerEntry> samplers;
|
std::vector<SamplerEntry> samplers;
|
||||||
|
std::vector<GlobalMemoryEntry> global_memory_entries;
|
||||||
std::array<bool, Maxwell::NumClipDistances> clip_distances{};
|
std::array<bool, Maxwell::NumClipDistances> clip_distances{};
|
||||||
std::size_t shader_length{};
|
std::size_t shader_length{};
|
||||||
};
|
};
|
||||||
|
|
|
@ -4,6 +4,7 @@
|
||||||
|
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
#include <fmt/format.h>
|
||||||
|
|
||||||
#include "common/assert.h"
|
#include "common/assert.h"
|
||||||
#include "common/common_types.h"
|
#include "common/common_types.h"
|
||||||
|
@ -119,6 +120,54 @@ u32 ShaderIR::DecodeMemory(BasicBlock& bb, const BasicBlock& code, u32 pc) {
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
case OpCode::Id::LDG: {
|
||||||
|
const u32 count = [&]() {
|
||||||
|
switch (instr.ldg.type) {
|
||||||
|
case Tegra::Shader::UniformType::Single:
|
||||||
|
return 1;
|
||||||
|
case Tegra::Shader::UniformType::Double:
|
||||||
|
return 2;
|
||||||
|
case Tegra::Shader::UniformType::Quad:
|
||||||
|
case Tegra::Shader::UniformType::UnsignedQuad:
|
||||||
|
return 4;
|
||||||
|
default:
|
||||||
|
UNIMPLEMENTED_MSG("Unimplemented LDG size!");
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
}();
|
||||||
|
|
||||||
|
const Node addr_register = GetRegister(instr.gpr8);
|
||||||
|
const Node base_address = TrackCbuf(addr_register, code, static_cast<s64>(code.size()));
|
||||||
|
const auto cbuf = std::get_if<CbufNode>(base_address);
|
||||||
|
ASSERT(cbuf != nullptr);
|
||||||
|
const auto cbuf_offset_imm = std::get_if<ImmediateNode>(cbuf->GetOffset());
|
||||||
|
ASSERT(cbuf_offset_imm != nullptr);
|
||||||
|
const auto cbuf_offset = cbuf_offset_imm->GetValue() * 4;
|
||||||
|
|
||||||
|
bb.push_back(Comment(
|
||||||
|
fmt::format("Base address is c[0x{:x}][0x{:x}]", cbuf->GetIndex(), cbuf_offset)));
|
||||||
|
|
||||||
|
const GlobalMemoryBase descriptor{cbuf->GetIndex(), cbuf_offset};
|
||||||
|
used_global_memory_bases.insert(descriptor);
|
||||||
|
|
||||||
|
const Node immediate_offset =
|
||||||
|
Immediate(static_cast<u32>(instr.ldg.immediate_offset.Value()));
|
||||||
|
const Node base_real_address =
|
||||||
|
Operation(OperationCode::UAdd, NO_PRECISE, immediate_offset, addr_register);
|
||||||
|
|
||||||
|
for (u32 i = 0; i < count; ++i) {
|
||||||
|
const Node it_offset = Immediate(i * 4);
|
||||||
|
const Node real_address =
|
||||||
|
Operation(OperationCode::UAdd, NO_PRECISE, base_real_address, it_offset);
|
||||||
|
const Node gmem = StoreNode(GmemNode(real_address, base_address, descriptor));
|
||||||
|
|
||||||
|
SetTemporal(bb, i, gmem);
|
||||||
|
}
|
||||||
|
for (u32 i = 0; i < count; ++i) {
|
||||||
|
SetRegister(bb, instr.gpr0.Value() + i, GetTemporal(i));
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
case OpCode::Id::ST_A: {
|
case OpCode::Id::ST_A: {
|
||||||
UNIMPLEMENTED_IF_MSG(instr.gpr8.Value() != Register::ZeroIndex,
|
UNIMPLEMENTED_IF_MSG(instr.gpr8.Value() != Register::ZeroIndex,
|
||||||
"Indirect attribute loads are not supported");
|
"Indirect attribute loads are not supported");
|
||||||
|
|
|
@ -257,6 +257,15 @@ private:
|
||||||
bool is_indirect{};
|
bool is_indirect{};
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct GlobalMemoryBase {
|
||||||
|
u32 cbuf_index{};
|
||||||
|
u32 cbuf_offset{};
|
||||||
|
|
||||||
|
bool operator<(const GlobalMemoryBase& rhs) const {
|
||||||
|
return std::tie(cbuf_index, cbuf_offset) < std::tie(rhs.cbuf_index, rhs.cbuf_offset);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
struct MetaArithmetic {
|
struct MetaArithmetic {
|
||||||
bool precise{};
|
bool precise{};
|
||||||
};
|
};
|
||||||
|
@ -478,14 +487,26 @@ private:
|
||||||
/// Global memory node
|
/// Global memory node
|
||||||
class GmemNode final {
|
class GmemNode final {
|
||||||
public:
|
public:
|
||||||
explicit constexpr GmemNode(Node address) : address{address} {}
|
explicit constexpr GmemNode(Node real_address, Node base_address,
|
||||||
|
const GlobalMemoryBase& descriptor)
|
||||||
|
: real_address{real_address}, base_address{base_address}, descriptor{descriptor} {}
|
||||||
|
|
||||||
Node GetAddress() const {
|
Node GetRealAddress() const {
|
||||||
return address;
|
return real_address;
|
||||||
|
}
|
||||||
|
|
||||||
|
Node GetBaseAddress() const {
|
||||||
|
return base_address;
|
||||||
|
}
|
||||||
|
|
||||||
|
const GlobalMemoryBase& GetDescriptor() const {
|
||||||
|
return descriptor;
|
||||||
}
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
const Node address;
|
const Node real_address;
|
||||||
|
const Node base_address;
|
||||||
|
const GlobalMemoryBase descriptor;
|
||||||
};
|
};
|
||||||
|
|
||||||
/// Commentary, can be dropped
|
/// Commentary, can be dropped
|
||||||
|
@ -543,6 +564,10 @@ public:
|
||||||
return used_clip_distances;
|
return used_clip_distances;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const std::set<GlobalMemoryBase>& GetGlobalMemoryBases() const {
|
||||||
|
return used_global_memory_bases;
|
||||||
|
}
|
||||||
|
|
||||||
std::size_t GetLength() const {
|
std::size_t GetLength() const {
|
||||||
return static_cast<std::size_t>(coverage_end * sizeof(u64));
|
return static_cast<std::size_t>(coverage_end * sizeof(u64));
|
||||||
}
|
}
|
||||||
|
@ -734,6 +759,10 @@ private:
|
||||||
void WriteLop3Instruction(BasicBlock& bb, Tegra::Shader::Register dest, Node op_a, Node op_b,
|
void WriteLop3Instruction(BasicBlock& bb, Tegra::Shader::Register dest, Node op_a, Node op_b,
|
||||||
Node op_c, Node imm_lut, bool sets_cc);
|
Node op_c, Node imm_lut, bool sets_cc);
|
||||||
|
|
||||||
|
Node TrackCbuf(Node tracked, const BasicBlock& code, s64 cursor);
|
||||||
|
|
||||||
|
std::pair<Node, s64> TrackRegister(const GprNode* tracked, const BasicBlock& code, s64 cursor);
|
||||||
|
|
||||||
template <typename... T>
|
template <typename... T>
|
||||||
Node Operation(OperationCode code, const T*... operands) {
|
Node Operation(OperationCode code, const T*... operands) {
|
||||||
return StoreNode(OperationNode(code, operands...));
|
return StoreNode(OperationNode(code, operands...));
|
||||||
|
@ -786,6 +815,7 @@ private:
|
||||||
std::map<u32, ConstBuffer> used_cbufs;
|
std::map<u32, ConstBuffer> used_cbufs;
|
||||||
std::set<Sampler> used_samplers;
|
std::set<Sampler> used_samplers;
|
||||||
std::array<bool, Tegra::Engines::Maxwell3D::Regs::NumClipDistances> used_clip_distances{};
|
std::array<bool, Tegra::Engines::Maxwell3D::Regs::NumClipDistances> used_clip_distances{};
|
||||||
|
std::set<GlobalMemoryBase> used_global_memory_bases;
|
||||||
|
|
||||||
Tegra::Shader::Header header;
|
Tegra::Shader::Header header;
|
||||||
};
|
};
|
||||||
|
|
76
src/video_core/shader/track.cpp
Normal file
76
src/video_core/shader/track.cpp
Normal file
|
@ -0,0 +1,76 @@
|
||||||
|
// Copyright 2018 yuzu Emulator Project
|
||||||
|
// Licensed under GPLv2 or any later version
|
||||||
|
// Refer to the license.txt file included.
|
||||||
|
|
||||||
|
#include <algorithm>
|
||||||
|
#include <utility>
|
||||||
|
#include <variant>
|
||||||
|
|
||||||
|
#include "video_core/shader/shader_ir.h"
|
||||||
|
|
||||||
|
namespace VideoCommon::Shader {
|
||||||
|
|
||||||
|
namespace {
|
||||||
|
std::pair<Node, s64> FindOperation(const BasicBlock& code, s64 cursor,
|
||||||
|
OperationCode operation_code) {
|
||||||
|
for (; cursor >= 0; --cursor) {
|
||||||
|
const Node node = code[cursor];
|
||||||
|
if (const auto operation = std::get_if<OperationNode>(node)) {
|
||||||
|
if (operation->GetCode() == operation_code)
|
||||||
|
return {node, cursor};
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return {};
|
||||||
|
}
|
||||||
|
} // namespace
|
||||||
|
|
||||||
|
Node ShaderIR::TrackCbuf(Node tracked, const BasicBlock& code, s64 cursor) {
|
||||||
|
if (const auto cbuf = std::get_if<CbufNode>(tracked)) {
|
||||||
|
// Cbuf found, but it has to be immediate
|
||||||
|
return std::holds_alternative<ImmediateNode>(*cbuf->GetOffset()) ? tracked : nullptr;
|
||||||
|
}
|
||||||
|
if (const auto gpr = std::get_if<GprNode>(tracked)) {
|
||||||
|
if (gpr->GetIndex() == Tegra::Shader::Register::ZeroIndex) {
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
// Reduce the cursor in one to avoid infinite loops when the instruction sets the same
|
||||||
|
// register that it uses as operand
|
||||||
|
const auto [source, new_cursor] = TrackRegister(gpr, code, cursor - 1);
|
||||||
|
if (!source) {
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
return TrackCbuf(source, code, new_cursor);
|
||||||
|
}
|
||||||
|
if (const auto operation = std::get_if<OperationNode>(tracked)) {
|
||||||
|
for (std::size_t i = 0; i < operation->GetOperandsCount(); ++i) {
|
||||||
|
if (const auto found = TrackCbuf((*operation)[i], code, cursor)) {
|
||||||
|
// Cbuf found in operand
|
||||||
|
return found;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::pair<Node, s64> ShaderIR::TrackRegister(const GprNode* tracked, const BasicBlock& code,
|
||||||
|
s64 cursor) {
|
||||||
|
for (; cursor >= 0; --cursor) {
|
||||||
|
const auto [found_node, new_cursor] = FindOperation(code, cursor, OperationCode::Assign);
|
||||||
|
if (!found_node) {
|
||||||
|
return {};
|
||||||
|
}
|
||||||
|
const auto operation = std::get_if<OperationNode>(found_node);
|
||||||
|
ASSERT(operation);
|
||||||
|
|
||||||
|
const auto& target = (*operation)[0];
|
||||||
|
if (const auto gpr_target = std::get_if<GprNode>(target)) {
|
||||||
|
if (gpr_target->GetIndex() == tracked->GetIndex()) {
|
||||||
|
return {(*operation)[1], new_cursor};
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return {};
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace VideoCommon::Shader
|
Loading…
Reference in a new issue