vk_pipeline_cache: Unify pipeline cache keys into a single operation
This allows us to call Common::CityHash and std::memcmp only once for GraphicsPipelineCacheKey. While we are at it, do the same for compute.
This commit is contained in:
parent
f665c92114
commit
8c37cd1af6
5 changed files with 58 additions and 46 deletions
|
@ -140,6 +140,12 @@ void FixedPipelineState::BlendingAttachment::Fill(const Maxwell& regs, std::size
|
|||
enable.Assign(1);
|
||||
}
|
||||
|
||||
void FixedPipelineState::Fill(const Maxwell& regs) {
|
||||
rasterizer.Fill(regs);
|
||||
depth_stencil.Fill(regs);
|
||||
color_blending.Fill(regs);
|
||||
}
|
||||
|
||||
std::size_t FixedPipelineState::Hash() const noexcept {
|
||||
const u64 hash = Common::CityHash64(reinterpret_cast<const char*>(this), sizeof *this);
|
||||
return static_cast<std::size_t>(hash);
|
||||
|
@ -149,15 +155,6 @@ bool FixedPipelineState::operator==(const FixedPipelineState& rhs) const noexcep
|
|||
return std::memcmp(this, &rhs, sizeof *this) == 0;
|
||||
}
|
||||
|
||||
FixedPipelineState GetFixedPipelineState(const Maxwell& regs) {
|
||||
FixedPipelineState fixed_state;
|
||||
fixed_state.rasterizer.Fill(regs);
|
||||
fixed_state.depth_stencil.Fill(regs);
|
||||
fixed_state.color_blending.Fill(regs);
|
||||
fixed_state.padding = {};
|
||||
return fixed_state;
|
||||
}
|
||||
|
||||
u32 FixedPipelineState::PackComparisonOp(Maxwell::ComparisonOp op) noexcept {
|
||||
// OpenGL enums go from 0x200 to 0x207 and the others from 1 to 8
|
||||
// If we substract 0x200 to OpenGL enums and 1 to the others we get a 0-7 range.
|
||||
|
|
|
@ -17,7 +17,7 @@ namespace Vulkan {
|
|||
|
||||
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
|
||||
|
||||
struct alignas(32) FixedPipelineState {
|
||||
struct FixedPipelineState {
|
||||
static u32 PackComparisonOp(Maxwell::ComparisonOp op) noexcept;
|
||||
static Maxwell::ComparisonOp UnpackComparisonOp(u32 packed) noexcept;
|
||||
|
||||
|
@ -237,7 +237,8 @@ struct alignas(32) FixedPipelineState {
|
|||
Rasterizer rasterizer;
|
||||
DepthStencil depth_stencil;
|
||||
ColorBlending color_blending;
|
||||
std::array<u8, 20> padding;
|
||||
|
||||
void Fill(const Maxwell& regs);
|
||||
|
||||
std::size_t Hash() const noexcept;
|
||||
|
||||
|
@ -250,9 +251,6 @@ struct alignas(32) FixedPipelineState {
|
|||
static_assert(std::has_unique_object_representations_v<FixedPipelineState>);
|
||||
static_assert(std::is_trivially_copyable_v<FixedPipelineState>);
|
||||
static_assert(std::is_trivially_constructible_v<FixedPipelineState>);
|
||||
static_assert(sizeof(FixedPipelineState) % 32 == 0, "Size is not aligned");
|
||||
|
||||
FixedPipelineState GetFixedPipelineState(const Maxwell& regs);
|
||||
|
||||
} // namespace Vulkan
|
||||
|
||||
|
|
|
@ -161,6 +161,24 @@ u32 FillDescriptorLayout(const ShaderEntries& entries,
|
|||
|
||||
} // Anonymous namespace
|
||||
|
||||
std::size_t GraphicsPipelineCacheKey::Hash() const noexcept {
|
||||
const u64 hash = Common::CityHash64(reinterpret_cast<const char*>(this), sizeof *this);
|
||||
return static_cast<std::size_t>(hash);
|
||||
}
|
||||
|
||||
bool GraphicsPipelineCacheKey::operator==(const GraphicsPipelineCacheKey& rhs) const noexcept {
|
||||
return std::memcmp(&rhs, this, sizeof *this) == 0;
|
||||
}
|
||||
|
||||
std::size_t ComputePipelineCacheKey::Hash() const noexcept {
|
||||
const u64 hash = Common::CityHash64(reinterpret_cast<const char*>(this), sizeof *this);
|
||||
return static_cast<std::size_t>(hash);
|
||||
}
|
||||
|
||||
bool ComputePipelineCacheKey::operator==(const ComputePipelineCacheKey& rhs) const noexcept {
|
||||
return std::memcmp(&rhs, this, sizeof *this) == 0;
|
||||
}
|
||||
|
||||
CachedShader::CachedShader(Core::System& system, Tegra::Engines::ShaderType stage,
|
||||
GPUVAddr gpu_addr, VAddr cpu_addr, ProgramCode program_code,
|
||||
u32 main_offset)
|
||||
|
|
|
@ -7,7 +7,6 @@
|
|||
#include <array>
|
||||
#include <cstddef>
|
||||
#include <memory>
|
||||
#include <tuple>
|
||||
#include <type_traits>
|
||||
#include <unordered_map>
|
||||
#include <utility>
|
||||
|
@ -51,42 +50,38 @@ using ProgramCode = std::vector<u64>;
|
|||
|
||||
struct GraphicsPipelineCacheKey {
|
||||
FixedPipelineState fixed_state;
|
||||
std::array<GPUVAddr, Maxwell::MaxShaderProgram> shaders;
|
||||
RenderPassParams renderpass_params;
|
||||
std::array<GPUVAddr, Maxwell::MaxShaderProgram> shaders;
|
||||
u64 padding; // This is necessary for unique object representations
|
||||
|
||||
std::size_t Hash() const noexcept {
|
||||
std::size_t hash = fixed_state.Hash();
|
||||
for (const auto& shader : shaders) {
|
||||
boost::hash_combine(hash, shader);
|
||||
}
|
||||
boost::hash_combine(hash, renderpass_params.Hash());
|
||||
return hash;
|
||||
}
|
||||
std::size_t Hash() const noexcept;
|
||||
|
||||
bool operator==(const GraphicsPipelineCacheKey& rhs) const noexcept {
|
||||
return std::tie(fixed_state, shaders, renderpass_params) ==
|
||||
std::tie(rhs.fixed_state, rhs.shaders, rhs.renderpass_params);
|
||||
bool operator==(const GraphicsPipelineCacheKey& rhs) const noexcept;
|
||||
|
||||
bool operator!=(const GraphicsPipelineCacheKey& rhs) const noexcept {
|
||||
return !operator==(rhs);
|
||||
}
|
||||
};
|
||||
static_assert(std::has_unique_object_representations_v<GraphicsPipelineCacheKey>);
|
||||
static_assert(std::is_trivially_copyable_v<GraphicsPipelineCacheKey>);
|
||||
static_assert(std::is_trivially_constructible_v<GraphicsPipelineCacheKey>);
|
||||
|
||||
struct ComputePipelineCacheKey {
|
||||
GPUVAddr shader{};
|
||||
u32 shared_memory_size{};
|
||||
std::array<u32, 3> workgroup_size{};
|
||||
GPUVAddr shader;
|
||||
u32 shared_memory_size;
|
||||
std::array<u32, 3> workgroup_size;
|
||||
|
||||
std::size_t Hash() const noexcept {
|
||||
return static_cast<std::size_t>(shader) ^
|
||||
((static_cast<std::size_t>(shared_memory_size) >> 7) << 40) ^
|
||||
static_cast<std::size_t>(workgroup_size[0]) ^
|
||||
(static_cast<std::size_t>(workgroup_size[1]) << 16) ^
|
||||
(static_cast<std::size_t>(workgroup_size[2]) << 24);
|
||||
}
|
||||
std::size_t Hash() const noexcept;
|
||||
|
||||
bool operator==(const ComputePipelineCacheKey& rhs) const noexcept {
|
||||
return std::tie(shader, shared_memory_size, workgroup_size) ==
|
||||
std::tie(rhs.shader, rhs.shared_memory_size, rhs.workgroup_size);
|
||||
bool operator==(const ComputePipelineCacheKey& rhs) const noexcept;
|
||||
|
||||
bool operator!=(const ComputePipelineCacheKey& rhs) const noexcept {
|
||||
return !operator==(rhs);
|
||||
}
|
||||
};
|
||||
static_assert(std::has_unique_object_representations_v<ComputePipelineCacheKey>);
|
||||
static_assert(std::is_trivially_copyable_v<ComputePipelineCacheKey>);
|
||||
static_assert(std::is_trivially_constructible_v<ComputePipelineCacheKey>);
|
||||
|
||||
} // namespace Vulkan
|
||||
|
||||
|
|
|
@ -316,7 +316,8 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) {
|
|||
query_cache.UpdateCounters();
|
||||
|
||||
const auto& gpu = system.GPU().Maxwell3D();
|
||||
GraphicsPipelineCacheKey key{GetFixedPipelineState(gpu.regs)};
|
||||
GraphicsPipelineCacheKey key;
|
||||
key.fixed_state.Fill(gpu.regs);
|
||||
|
||||
buffer_cache.Map(CalculateGraphicsStreamBufferSize(is_indexed));
|
||||
|
||||
|
@ -334,10 +335,11 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) {
|
|||
|
||||
buffer_cache.Unmap();
|
||||
|
||||
const auto texceptions = UpdateAttachments();
|
||||
const Texceptions texceptions = UpdateAttachments();
|
||||
SetupImageTransitions(texceptions, color_attachments, zeta_attachment);
|
||||
|
||||
key.renderpass_params = GetRenderPassParams(texceptions);
|
||||
key.padding = 0;
|
||||
|
||||
auto& pipeline = pipeline_cache.GetGraphicsPipeline(key);
|
||||
scheduler.BindGraphicsPipeline(pipeline.GetHandle());
|
||||
|
@ -453,10 +455,12 @@ void RasterizerVulkan::DispatchCompute(GPUVAddr code_addr) {
|
|||
query_cache.UpdateCounters();
|
||||
|
||||
const auto& launch_desc = system.GPU().KeplerCompute().launch_description;
|
||||
const ComputePipelineCacheKey key{
|
||||
code_addr,
|
||||
launch_desc.shared_alloc,
|
||||
{launch_desc.block_dim_x, launch_desc.block_dim_y, launch_desc.block_dim_z}};
|
||||
ComputePipelineCacheKey key;
|
||||
key.shader = code_addr;
|
||||
key.shared_memory_size = launch_desc.shared_alloc;
|
||||
key.workgroup_size = {launch_desc.block_dim_x, launch_desc.block_dim_y,
|
||||
launch_desc.block_dim_z};
|
||||
|
||||
auto& pipeline = pipeline_cache.GetComputePipeline(key);
|
||||
|
||||
// Compute dispatches can't be executed inside a renderpass
|
||||
|
|
Loading…
Reference in a new issue