gl_rasterizer: Implement a VAO cache.
This patch caches VAO objects instead of re-emiting all pointers per draw call. Configuring this pointers is known as a fast task, but it yields too many GL calls. So for better performance, just bind the VAO instead of 16 pointers.
This commit is contained in:
parent
527e362a83
commit
d3ad9469a1
3 changed files with 60 additions and 53 deletions
|
@ -127,6 +127,7 @@ public:
|
||||||
BitField<21, 6, Size> size;
|
BitField<21, 6, Size> size;
|
||||||
BitField<27, 3, Type> type;
|
BitField<27, 3, Type> type;
|
||||||
BitField<31, 1, u32> bgra;
|
BitField<31, 1, u32> bgra;
|
||||||
|
u32 hex;
|
||||||
};
|
};
|
||||||
|
|
||||||
u32 ComponentCount() const {
|
u32 ComponentCount() const {
|
||||||
|
@ -262,6 +263,10 @@ public:
|
||||||
bool IsValid() const {
|
bool IsValid() const {
|
||||||
return size != Size::Invalid;
|
return size != Size::Invalid;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool operator<(const VertexAttribute& other) const {
|
||||||
|
return hex < other.hex;
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
enum class PrimitiveTopology : u32 {
|
enum class PrimitiveTopology : u32 {
|
||||||
|
@ -545,7 +550,7 @@ public:
|
||||||
|
|
||||||
INSERT_PADDING_WORDS(0x5B);
|
INSERT_PADDING_WORDS(0x5B);
|
||||||
|
|
||||||
VertexAttribute vertex_attrib_format[NumVertexAttributes];
|
std::array<VertexAttribute, NumVertexAttributes> vertex_attrib_format;
|
||||||
|
|
||||||
INSERT_PADDING_WORDS(0xF);
|
INSERT_PADDING_WORDS(0xF);
|
||||||
|
|
||||||
|
@ -964,7 +969,7 @@ ASSERT_REG_POSITION(stencil_back_func_ref, 0x3D5);
|
||||||
ASSERT_REG_POSITION(stencil_back_mask, 0x3D6);
|
ASSERT_REG_POSITION(stencil_back_mask, 0x3D6);
|
||||||
ASSERT_REG_POSITION(stencil_back_func_mask, 0x3D7);
|
ASSERT_REG_POSITION(stencil_back_func_mask, 0x3D7);
|
||||||
ASSERT_REG_POSITION(zeta, 0x3F8);
|
ASSERT_REG_POSITION(zeta, 0x3F8);
|
||||||
ASSERT_REG_POSITION(vertex_attrib_format[0], 0x458);
|
ASSERT_REG_POSITION(vertex_attrib_format, 0x458);
|
||||||
ASSERT_REG_POSITION(rt_control, 0x487);
|
ASSERT_REG_POSITION(rt_control, 0x487);
|
||||||
ASSERT_REG_POSITION(zeta_width, 0x48a);
|
ASSERT_REG_POSITION(zeta_width, 0x48a);
|
||||||
ASSERT_REG_POSITION(zeta_height, 0x48b);
|
ASSERT_REG_POSITION(zeta_height, 0x48b);
|
||||||
|
|
|
@ -70,28 +70,13 @@ RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& window, ScreenInfo
|
||||||
// Clipping plane 0 is always enabled for PICA fixed clip plane z <= 0
|
// Clipping plane 0 is always enabled for PICA fixed clip plane z <= 0
|
||||||
state.clip_distance[0] = true;
|
state.clip_distance[0] = true;
|
||||||
|
|
||||||
// Generate VAO and UBO
|
|
||||||
sw_vao.Create();
|
|
||||||
uniform_buffer.Create();
|
|
||||||
|
|
||||||
state.draw.vertex_array = sw_vao.handle;
|
|
||||||
state.draw.uniform_buffer = uniform_buffer.handle;
|
|
||||||
state.Apply();
|
|
||||||
|
|
||||||
// Create render framebuffer
|
// Create render framebuffer
|
||||||
framebuffer.Create();
|
framebuffer.Create();
|
||||||
|
|
||||||
hw_vao.Create();
|
|
||||||
|
|
||||||
state.draw.vertex_buffer = buffer_cache.GetHandle();
|
|
||||||
|
|
||||||
shader_program_manager = std::make_unique<GLShader::ProgramManager>();
|
shader_program_manager = std::make_unique<GLShader::ProgramManager>();
|
||||||
state.draw.shader_program = 0;
|
state.draw.shader_program = 0;
|
||||||
state.draw.vertex_array = hw_vao.handle;
|
|
||||||
state.Apply();
|
state.Apply();
|
||||||
|
|
||||||
glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, buffer_cache.GetHandle());
|
|
||||||
|
|
||||||
glEnable(GL_BLEND);
|
glEnable(GL_BLEND);
|
||||||
|
|
||||||
glGetIntegerv(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT, &uniform_buffer_alignment);
|
glGetIntegerv(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT, &uniform_buffer_alignment);
|
||||||
|
@ -106,7 +91,54 @@ void RasterizerOpenGL::SetupVertexArrays() {
|
||||||
const auto& gpu = Core::System::GetInstance().GPU().Maxwell3D();
|
const auto& gpu = Core::System::GetInstance().GPU().Maxwell3D();
|
||||||
const auto& regs = gpu.regs;
|
const auto& regs = gpu.regs;
|
||||||
|
|
||||||
state.draw.vertex_array = hw_vao.handle;
|
auto [iter, is_cache_miss] = vertex_array_cache.try_emplace(regs.vertex_attrib_format);
|
||||||
|
auto& VAO = iter->second;
|
||||||
|
|
||||||
|
if (is_cache_miss) {
|
||||||
|
VAO.Create();
|
||||||
|
state.draw.vertex_array = VAO.handle;
|
||||||
|
state.Apply();
|
||||||
|
|
||||||
|
// The index buffer binding is stored within the VAO. Stupid OpenGL, but easy to work
|
||||||
|
// around.
|
||||||
|
glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, buffer_cache.GetHandle());
|
||||||
|
|
||||||
|
// Use the vertex array as-is, assumes that the data is formatted correctly for OpenGL.
|
||||||
|
// Enables the first 16 vertex attributes always, as we don't know which ones are actually
|
||||||
|
// used until shader time. Note, Tegra technically supports 32, but we're capping this to 16
|
||||||
|
// for now to avoid OpenGL errors.
|
||||||
|
// TODO(Subv): Analyze the shader to identify which attributes are actually used and don't
|
||||||
|
// assume every shader uses them all.
|
||||||
|
for (unsigned index = 0; index < 16; ++index) {
|
||||||
|
const auto& attrib = regs.vertex_attrib_format[index];
|
||||||
|
|
||||||
|
// Ignore invalid attributes.
|
||||||
|
if (!attrib.IsValid())
|
||||||
|
continue;
|
||||||
|
|
||||||
|
const auto& buffer = regs.vertex_array[attrib.buffer];
|
||||||
|
LOG_TRACE(HW_GPU,
|
||||||
|
"vertex attrib {}, count={}, size={}, type={}, offset={}, normalize={}",
|
||||||
|
index, attrib.ComponentCount(), attrib.SizeString(), attrib.TypeString(),
|
||||||
|
attrib.offset.Value(), attrib.IsNormalized());
|
||||||
|
|
||||||
|
ASSERT(buffer.IsEnabled());
|
||||||
|
|
||||||
|
glEnableVertexAttribArray(index);
|
||||||
|
if (attrib.type == Tegra::Engines::Maxwell3D::Regs::VertexAttribute::Type::SignedInt ||
|
||||||
|
attrib.type ==
|
||||||
|
Tegra::Engines::Maxwell3D::Regs::VertexAttribute::Type::UnsignedInt) {
|
||||||
|
glVertexAttribIFormat(index, attrib.ComponentCount(),
|
||||||
|
MaxwellToGL::VertexType(attrib), attrib.offset);
|
||||||
|
} else {
|
||||||
|
glVertexAttribFormat(index, attrib.ComponentCount(),
|
||||||
|
MaxwellToGL::VertexType(attrib),
|
||||||
|
attrib.IsNormalized() ? GL_TRUE : GL_FALSE, attrib.offset);
|
||||||
|
}
|
||||||
|
glVertexAttribBinding(index, attrib.buffer);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
state.draw.vertex_array = VAO.handle;
|
||||||
state.draw.vertex_buffer = buffer_cache.GetHandle();
|
state.draw.vertex_buffer = buffer_cache.GetHandle();
|
||||||
state.Apply();
|
state.Apply();
|
||||||
|
|
||||||
|
@ -142,38 +174,6 @@ void RasterizerOpenGL::SetupVertexArrays() {
|
||||||
glVertexBindingDivisor(index, 0);
|
glVertexBindingDivisor(index, 0);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Use the vertex array as-is, assumes that the data is formatted correctly for OpenGL.
|
|
||||||
// Enables the first 16 vertex attributes always, as we don't know which ones are actually used
|
|
||||||
// until shader time. Note, Tegra technically supports 32, but we're capping this to 16 for now
|
|
||||||
// to avoid OpenGL errors.
|
|
||||||
// TODO(Subv): Analyze the shader to identify which attributes are actually used and don't
|
|
||||||
// assume every shader uses them all.
|
|
||||||
for (unsigned index = 0; index < 16; ++index) {
|
|
||||||
auto& attrib = regs.vertex_attrib_format[index];
|
|
||||||
|
|
||||||
// Ignore invalid attributes.
|
|
||||||
if (!attrib.IsValid())
|
|
||||||
continue;
|
|
||||||
|
|
||||||
auto& buffer = regs.vertex_array[attrib.buffer];
|
|
||||||
LOG_TRACE(HW_GPU, "vertex attrib {}, count={}, size={}, type={}, offset={}, normalize={}",
|
|
||||||
index, attrib.ComponentCount(), attrib.SizeString(), attrib.TypeString(),
|
|
||||||
attrib.offset.Value(), attrib.IsNormalized());
|
|
||||||
|
|
||||||
ASSERT(buffer.IsEnabled());
|
|
||||||
|
|
||||||
glEnableVertexAttribArray(index);
|
|
||||||
if (attrib.type == Tegra::Engines::Maxwell3D::Regs::VertexAttribute::Type::SignedInt ||
|
|
||||||
attrib.type == Tegra::Engines::Maxwell3D::Regs::VertexAttribute::Type::UnsignedInt) {
|
|
||||||
glVertexAttribIFormat(index, attrib.ComponentCount(), MaxwellToGL::VertexType(attrib),
|
|
||||||
attrib.offset);
|
|
||||||
} else {
|
|
||||||
glVertexAttribFormat(index, attrib.ComponentCount(), MaxwellToGL::VertexType(attrib),
|
|
||||||
attrib.IsNormalized() ? GL_TRUE : GL_FALSE, attrib.offset);
|
|
||||||
}
|
|
||||||
glVertexAttribBinding(index, attrib.buffer);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void RasterizerOpenGL::SetupShaders() {
|
void RasterizerOpenGL::SetupShaders() {
|
||||||
|
|
|
@ -6,6 +6,7 @@
|
||||||
|
|
||||||
#include <array>
|
#include <array>
|
||||||
#include <cstddef>
|
#include <cstddef>
|
||||||
|
#include <map>
|
||||||
#include <memory>
|
#include <memory>
|
||||||
#include <tuple>
|
#include <tuple>
|
||||||
#include <utility>
|
#include <utility>
|
||||||
|
@ -168,14 +169,15 @@ private:
|
||||||
ScreenInfo& screen_info;
|
ScreenInfo& screen_info;
|
||||||
|
|
||||||
std::unique_ptr<GLShader::ProgramManager> shader_program_manager;
|
std::unique_ptr<GLShader::ProgramManager> shader_program_manager;
|
||||||
OGLVertexArray sw_vao;
|
std::map<std::array<Tegra::Engines::Maxwell3D::Regs::VertexAttribute,
|
||||||
OGLVertexArray hw_vao;
|
Tegra::Engines::Maxwell3D::Regs::NumVertexAttributes>,
|
||||||
|
OGLVertexArray>
|
||||||
|
vertex_array_cache;
|
||||||
|
|
||||||
std::array<SamplerInfo, GLShader::NumTextureSamplers> texture_samplers;
|
std::array<SamplerInfo, GLShader::NumTextureSamplers> texture_samplers;
|
||||||
|
|
||||||
static constexpr size_t STREAM_BUFFER_SIZE = 128 * 1024 * 1024;
|
static constexpr size_t STREAM_BUFFER_SIZE = 128 * 1024 * 1024;
|
||||||
OGLBufferCache buffer_cache;
|
OGLBufferCache buffer_cache;
|
||||||
OGLBuffer uniform_buffer;
|
|
||||||
OGLFramebuffer framebuffer;
|
OGLFramebuffer framebuffer;
|
||||||
GLint uniform_buffer_alignment;
|
GLint uniform_buffer_alignment;
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue