vk_pipeline_cache: Unify pipeline cache keys into a single operation

This allows us to call Common::CityHash and std::memcmp only once for GraphicsPipelineCacheKey. While we are at it, do the same for compute.
2020-04-22 20:52:29 -03:00 · 2020-04-22 20:52:29 -03:00 · 8c37cd1af6
commit 8c37cd1af6
parent f665c92114
5 changed files with 58 additions and 46 deletions
--- a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp
+++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp
@ -140,6 +140,12 @@ void FixedPipelineState::BlendingAttachment::Fill(const Maxwell& regs, std::size
    enable.Assign(1);
 }
 void FixedPipelineState::Fill(const Maxwell& regs) {
    rasterizer.Fill(regs);
    depth_stencil.Fill(regs);
    color_blending.Fill(regs);
 }
 std::size_t FixedPipelineState::Hash() const noexcept {
    const u64 hash = Common::CityHash64(reinterpret_cast<const char*>(this), sizeof *this);
    return static_cast<std::size_t>(hash);
@ -149,15 +155,6 @@ bool FixedPipelineState::operator==(const FixedPipelineState& rhs) const noexcep
    return std::memcmp(this, &rhs, sizeof *this) == 0;
 }
 FixedPipelineState GetFixedPipelineState(const Maxwell& regs) {
    FixedPipelineState fixed_state;
    fixed_state.rasterizer.Fill(regs);
    fixed_state.depth_stencil.Fill(regs);
    fixed_state.color_blending.Fill(regs);
    fixed_state.padding = {};
    return fixed_state;
 }
 u32 FixedPipelineState::PackComparisonOp(Maxwell::ComparisonOp op) noexcept {
    // OpenGL enums go from 0x200 to 0x207 and the others from 1 to 8
    // If we substract 0x200 to OpenGL enums and 1 to the others we get a 0-7 range.
--- a/src/video_core/renderer_vulkan/fixed_pipeline_state.h
+++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.h
@ -17,7 +17,7 @@ namespace Vulkan {
 using Maxwell = Tegra::Engines::Maxwell3D::Regs;
-struct alignas(32) FixedPipelineState {
+struct FixedPipelineState {
    static u32 PackComparisonOp(Maxwell::ComparisonOp op) noexcept;
    static Maxwell::ComparisonOp UnpackComparisonOp(u32 packed) noexcept;
@ -237,7 +237,8 @@ struct alignas(32) FixedPipelineState {
    Rasterizer rasterizer;
    DepthStencil depth_stencil;
    ColorBlending color_blending;
-    std::array<u8, 20> padding;
+
    void Fill(const Maxwell& regs);
    std::size_t Hash() const noexcept;
@ -250,9 +251,6 @@ struct alignas(32) FixedPipelineState {
 static_assert(std::has_unique_object_representations_v<FixedPipelineState>);
 static_assert(std::is_trivially_copyable_v<FixedPipelineState>);
 static_assert(std::is_trivially_constructible_v<FixedPipelineState>);
 static_assert(sizeof(FixedPipelineState) % 32 == 0, "Size is not aligned");
 FixedPipelineState GetFixedPipelineState(const Maxwell& regs);
 } // namespace Vulkan
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
@ -161,6 +161,24 @@ u32 FillDescriptorLayout(const ShaderEntries& entries,
 } // Anonymous namespace
 std::size_t GraphicsPipelineCacheKey::Hash() const noexcept {
    const u64 hash = Common::CityHash64(reinterpret_cast<const char*>(this), sizeof *this);
    return static_cast<std::size_t>(hash);
 }
 bool GraphicsPipelineCacheKey::operator==(const GraphicsPipelineCacheKey& rhs) const noexcept {
    return std::memcmp(&rhs, this, sizeof *this) == 0;
 }
 std::size_t ComputePipelineCacheKey::Hash() const noexcept {
    const u64 hash = Common::CityHash64(reinterpret_cast<const char*>(this), sizeof *this);
    return static_cast<std::size_t>(hash);
 }
 bool ComputePipelineCacheKey::operator==(const ComputePipelineCacheKey& rhs) const noexcept {
    return std::memcmp(&rhs, this, sizeof *this) == 0;
 }
 CachedShader::CachedShader(Core::System& system, Tegra::Engines::ShaderType stage,
                           GPUVAddr gpu_addr, VAddr cpu_addr, ProgramCode program_code,
                           u32 main_offset)
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h
@ -7,7 +7,6 @@
 #include <array>
 #include <cstddef>
 #include <memory>
 #include <tuple>
 #include <type_traits>
 #include <unordered_map>
 #include <utility>
@ -51,42 +50,38 @@ using ProgramCode = std::vector<u64>;
 struct GraphicsPipelineCacheKey {
    FixedPipelineState fixed_state;
    std::array<GPUVAddr, Maxwell::MaxShaderProgram> shaders;
    RenderPassParams renderpass_params;
    std::array<GPUVAddr, Maxwell::MaxShaderProgram> shaders;
    u64 padding; // This is necessary for unique object representations
-    std::size_t Hash() const noexcept {
+    std::size_t Hash() const noexcept;
        std::size_t hash = fixed_state.Hash();
        for (const auto& shader : shaders) {
            boost::hash_combine(hash, shader);
        }
        boost::hash_combine(hash, renderpass_params.Hash());
        return hash;
    }
-    bool operator==(const GraphicsPipelineCacheKey& rhs) const noexcept {
+    bool operator==(const GraphicsPipelineCacheKey& rhs) const noexcept;
-        return std::tie(fixed_state, shaders, renderpass_params) ==
+
-               std::tie(rhs.fixed_state, rhs.shaders, rhs.renderpass_params);
+    bool operator!=(const GraphicsPipelineCacheKey& rhs) const noexcept {
        return !operator==(rhs);
    }
 };
 static_assert(std::has_unique_object_representations_v<GraphicsPipelineCacheKey>);
 static_assert(std::is_trivially_copyable_v<GraphicsPipelineCacheKey>);
 static_assert(std::is_trivially_constructible_v<GraphicsPipelineCacheKey>);
 struct ComputePipelineCacheKey {
-    GPUVAddr shader{};
+    GPUVAddr shader;
-    u32 shared_memory_size{};
+    u32 shared_memory_size;
-    std::array<u32, 3> workgroup_size{};
+    std::array<u32, 3> workgroup_size;
-    std::size_t Hash() const noexcept {
+    std::size_t Hash() const noexcept;
        return static_cast<std::size_t>(shader) ^
               ((static_cast<std::size_t>(shared_memory_size) >> 7) << 40) ^
               static_cast<std::size_t>(workgroup_size[0]) ^
               (static_cast<std::size_t>(workgroup_size[1]) << 16) ^
               (static_cast<std::size_t>(workgroup_size[2]) << 24);
    }
-    bool operator==(const ComputePipelineCacheKey& rhs) const noexcept {
+    bool operator==(const ComputePipelineCacheKey& rhs) const noexcept;
-        return std::tie(shader, shared_memory_size, workgroup_size) ==
+
-               std::tie(rhs.shader, rhs.shared_memory_size, rhs.workgroup_size);
+    bool operator!=(const ComputePipelineCacheKey& rhs) const noexcept {
        return !operator==(rhs);
    }
 };
 static_assert(std::has_unique_object_representations_v<ComputePipelineCacheKey>);
 static_assert(std::is_trivially_copyable_v<ComputePipelineCacheKey>);
 static_assert(std::is_trivially_constructible_v<ComputePipelineCacheKey>);
 } // namespace Vulkan
--- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
@ -316,7 +316,8 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) {
    query_cache.UpdateCounters();
    const auto& gpu = system.GPU().Maxwell3D();
-    GraphicsPipelineCacheKey key{GetFixedPipelineState(gpu.regs)};
+    GraphicsPipelineCacheKey key;
    key.fixed_state.Fill(gpu.regs);
    buffer_cache.Map(CalculateGraphicsStreamBufferSize(is_indexed));
@ -334,10 +335,11 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) {
    buffer_cache.Unmap();
-    const auto texceptions = UpdateAttachments();
+    const Texceptions texceptions = UpdateAttachments();
    SetupImageTransitions(texceptions, color_attachments, zeta_attachment);
    key.renderpass_params = GetRenderPassParams(texceptions);
    key.padding = 0;
    auto& pipeline = pipeline_cache.GetGraphicsPipeline(key);
    scheduler.BindGraphicsPipeline(pipeline.GetHandle());
@ -453,10 +455,12 @@ void RasterizerVulkan::DispatchCompute(GPUVAddr code_addr) {
    query_cache.UpdateCounters();
    const auto& launch_desc = system.GPU().KeplerCompute().launch_description;
-    const ComputePipelineCacheKey key{
+    ComputePipelineCacheKey key;
-        code_addr,
+    key.shader = code_addr;
-        launch_desc.shared_alloc,
+    key.shared_memory_size = launch_desc.shared_alloc;
-        {launch_desc.block_dim_x, launch_desc.block_dim_y, launch_desc.block_dim_z}};
+    key.workgroup_size = {launch_desc.block_dim_x, launch_desc.block_dim_y,
                          launch_desc.block_dim_z};
    auto& pipeline = pipeline_cache.GetComputePipeline(key);
    // Compute dispatches can't be executed inside a renderpass