From 2fc698b040e7e25223ba6ebe31abb04b1fc65f06 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Thu, 1 Apr 2021 01:36:22 -0300 Subject: [PATCH] vulkan: Build pipelines in parallel at runtime Wait from the worker thread for a pipeline to build before binding it to the command buffer. This allows queueing pipelines to multiple threads. --- .../renderer_vulkan/vk_compute_pipeline.cpp | 95 +++++++++++-------- .../renderer_vulkan/vk_compute_pipeline.h | 30 +++--- .../renderer_vulkan/vk_graphics_pipeline.cpp | 72 ++++++++------ .../renderer_vulkan/vk_graphics_pipeline.h | 31 +++--- .../renderer_vulkan/vk_pipeline_cache.cpp | 78 +++++++-------- .../renderer_vulkan/vk_pipeline_cache.h | 24 +++-- .../renderer_vulkan/vk_rasterizer.cpp | 15 +-- .../renderer_vulkan/vk_scheduler.cpp | 10 +- src/video_core/renderer_vulkan/vk_scheduler.h | 7 +- 9 files changed, 197 insertions(+), 165 deletions(-) diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp index 8e544d745..1c3249e3c 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp @@ -27,8 +27,9 @@ DescriptorLayoutTuple CreateLayout(const Device& device, const Shader::Info& inf ComputePipeline::ComputePipeline(const Device& device, VKDescriptorPool& descriptor_pool, VKUpdateDescriptorQueue& update_descriptor_queue_, - const Shader::Info& info_, vk::ShaderModule spv_module_) - : update_descriptor_queue{&update_descriptor_queue_}, info{info_}, + Common::ThreadWorker* thread_worker, const Shader::Info& info_, + vk::ShaderModule spv_module_) + : update_descriptor_queue{update_descriptor_queue_}, info{info_}, spv_module(std::move(spv_module_)) { DescriptorLayoutTuple tuple{CreateLayout(device, info)}; descriptor_set_layout = std::move(tuple.descriptor_set_layout); @@ -36,46 +37,55 @@ ComputePipeline::ComputePipeline(const Device& device, VKDescriptorPool& descrip descriptor_update_template = std::move(tuple.descriptor_update_template); descriptor_allocator = DescriptorAllocator(descriptor_pool, *descriptor_set_layout); - const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT subgroup_size_ci{ - .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT, - .pNext = nullptr, - .requiredSubgroupSize = GuestWarpSize, - }; - pipeline = device.GetLogical().CreateComputePipeline({ - .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - .stage{ - .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, - .pNext = device.IsExtSubgroupSizeControlSupported() ? &subgroup_size_ci : nullptr, + auto func{[this, &device] { + const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT subgroup_size_ci{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT, + .pNext = nullptr, + .requiredSubgroupSize = GuestWarpSize, + }; + pipeline = device.GetLogical().CreateComputePipeline({ + .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, + .pNext = nullptr, .flags = 0, - .stage = VK_SHADER_STAGE_COMPUTE_BIT, - .module = *spv_module, - .pName = "main", - .pSpecializationInfo = nullptr, - }, - .layout = *pipeline_layout, - .basePipelineHandle = 0, - .basePipelineIndex = 0, - }); + .stage{ + .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, + .pNext = device.IsExtSubgroupSizeControlSupported() ? &subgroup_size_ci : nullptr, + .flags = 0, + .stage = VK_SHADER_STAGE_COMPUTE_BIT, + .module = *spv_module, + .pName = "main", + .pSpecializationInfo = nullptr, + }, + .layout = *pipeline_layout, + .basePipelineHandle = 0, + .basePipelineIndex = 0, + }); + building_flag.test_and_set(); + building_flag.notify_all(); + }}; + if (thread_worker) { + thread_worker->QueueWork(std::move(func)); + } else { + func(); + } } -void ComputePipeline::ConfigureBufferCache(BufferCache& buffer_cache) { +void ComputePipeline::Configure(Tegra::Engines::KeplerCompute& kepler_compute, + Tegra::MemoryManager& gpu_memory, VKScheduler& scheduler, + BufferCache& buffer_cache, TextureCache& texture_cache) { + update_descriptor_queue.Acquire(); + buffer_cache.SetEnabledComputeUniformBuffers(info.constant_buffer_mask); buffer_cache.UnbindComputeStorageBuffers(); - size_t index{}; + size_t ssbo_index{}; for (const auto& desc : info.storage_buffers_descriptors) { ASSERT(desc.count == 1); - buffer_cache.BindComputeStorageBuffer(index, desc.cbuf_index, desc.cbuf_offset, true); - ++index; + buffer_cache.BindComputeStorageBuffer(ssbo_index, desc.cbuf_index, desc.cbuf_offset, true); + ++ssbo_index; } buffer_cache.UpdateComputeBuffers(); buffer_cache.BindHostComputeBuffers(); -} -void ComputePipeline::ConfigureTextureCache(Tegra::Engines::KeplerCompute& kepler_compute, - Tegra::MemoryManager& gpu_memory, - TextureCache& texture_cache) { texture_cache.SynchronizeComputeDescriptors(); static constexpr size_t max_elements = 64; @@ -103,15 +113,26 @@ void ComputePipeline::ConfigureTextureCache(Tegra::Engines::KeplerCompute& keple const std::span indices_span(image_view_indices.data(), image_view_indices.size()); texture_cache.FillComputeImageViews(indices_span, image_view_ids); - size_t index{}; + size_t image_index{}; PushImageDescriptors(info, samplers.data(), image_view_ids.data(), texture_cache, - *update_descriptor_queue, index); -} + update_descriptor_queue, image_index); -VkDescriptorSet ComputePipeline::UpdateDescriptorSet() { + if (!building_flag.test()) { + // Wait for the pipeline to be built + scheduler.Record([this](vk::CommandBuffer) { building_flag.wait(false); }); + } + scheduler.Record([this](vk::CommandBuffer cmdbuf) { + cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline); + }); + if (!descriptor_set_layout) { + return; + } const VkDescriptorSet descriptor_set{descriptor_allocator.Commit()}; - update_descriptor_queue->Send(*descriptor_update_template, descriptor_set); - return descriptor_set; + update_descriptor_queue.Send(*descriptor_update_template, descriptor_set); + scheduler.Record([this, descriptor_set](vk::CommandBuffer cmdbuf) { + cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline_layout, 0, + descriptor_set, nullptr); + }); } } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.h b/src/video_core/renderer_vulkan/vk_compute_pipeline.h index e82e5816b..02da504f7 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.h +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.h @@ -4,7 +4,10 @@ #pragma once +#include + #include "common/common_types.h" +#include "common/thread_worker.h" #include "shader_recompiler/shader_info.h" #include "video_core/memory_manager.h" #include "video_core/renderer_vulkan/vk_buffer_cache.h" @@ -16,36 +19,26 @@ namespace Vulkan { class Device; +class VKScheduler; class ComputePipeline { public: - explicit ComputePipeline() = default; explicit ComputePipeline(const Device& device, VKDescriptorPool& descriptor_pool, VKUpdateDescriptorQueue& update_descriptor_queue, - const Shader::Info& info, vk::ShaderModule spv_module); + Common::ThreadWorker* thread_worker, const Shader::Info& info, + vk::ShaderModule spv_module); - ComputePipeline& operator=(ComputePipeline&&) noexcept = default; - ComputePipeline(ComputePipeline&&) noexcept = default; + ComputePipeline& operator=(ComputePipeline&&) noexcept = delete; + ComputePipeline(ComputePipeline&&) noexcept = delete; ComputePipeline& operator=(const ComputePipeline&) = delete; ComputePipeline(const ComputePipeline&) = delete; - void ConfigureBufferCache(BufferCache& buffer_cache); - void ConfigureTextureCache(Tegra::Engines::KeplerCompute& kepler_compute, - Tegra::MemoryManager& gpu_memory, TextureCache& texture_cache); - - [[nodiscard]] VkDescriptorSet UpdateDescriptorSet(); - - [[nodiscard]] VkPipeline Handle() const noexcept { - return *pipeline; - } - - [[nodiscard]] VkPipelineLayout PipelineLayout() const noexcept { - return *pipeline_layout; - } + void Configure(Tegra::Engines::KeplerCompute& kepler_compute, Tegra::MemoryManager& gpu_memory, + VKScheduler& scheduler, BufferCache& buffer_cache, TextureCache& texture_cache); private: - VKUpdateDescriptorQueue* update_descriptor_queue; + VKUpdateDescriptorQueue& update_descriptor_queue; Shader::Info info; vk::ShaderModule spv_module; @@ -54,6 +47,7 @@ private: vk::PipelineLayout pipeline_layout; vk::DescriptorUpdateTemplateKHR descriptor_update_template; vk::Pipeline pipeline; + std::atomic_flag building_flag{}; }; } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index 278509bf0..ddc08b8c4 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -112,13 +112,15 @@ GraphicsPipeline::GraphicsPipeline(Tegra::Engines::Maxwell3D& maxwell3d_, BufferCache& buffer_cache_, TextureCache& texture_cache_, const Device& device, VKDescriptorPool& descriptor_pool, VKUpdateDescriptorQueue& update_descriptor_queue_, + Common::ThreadWorker* worker_thread, RenderPassCache& render_pass_cache, - const FixedPipelineState& state, + const FixedPipelineState& state_, std::array stages, const std::array& infos) - : maxwell3d{&maxwell3d_}, gpu_memory{&gpu_memory_}, texture_cache{&texture_cache_}, - buffer_cache{&buffer_cache_}, scheduler{&scheduler_}, - update_descriptor_queue{&update_descriptor_queue_}, spv_modules{std::move(stages)} { + : maxwell3d{maxwell3d_}, gpu_memory{gpu_memory_}, texture_cache{texture_cache_}, + buffer_cache{buffer_cache_}, scheduler{scheduler_}, + update_descriptor_queue{update_descriptor_queue_}, state{state_}, spv_modules{ + std::move(stages)} { std::ranges::transform(infos, stage_infos.begin(), [](const Shader::Info* info) { return info ? *info : Shader::Info{}; }); @@ -128,8 +130,17 @@ GraphicsPipeline::GraphicsPipeline(Tegra::Engines::Maxwell3D& maxwell3d_, descriptor_update_template = std::move(tuple.descriptor_update_template); descriptor_allocator = DescriptorAllocator(descriptor_pool, *descriptor_set_layout); - const VkRenderPass render_pass{render_pass_cache.Get(MakeRenderPassKey(state))}; - MakePipeline(device, state, render_pass); + auto func{[this, &device, &render_pass_cache] { + const VkRenderPass render_pass{render_pass_cache.Get(MakeRenderPassKey(state))}; + MakePipeline(device, render_pass); + building_flag.test_and_set(); + building_flag.notify_all(); + }}; + if (worker_thread) { + worker_thread->QueueWork(std::move(func)); + } else { + func(); + } } void GraphicsPipeline::Configure(bool is_indexed) { @@ -138,67 +149,72 @@ void GraphicsPipeline::Configure(bool is_indexed) { static_vector image_view_indices; static_vector samplers; - texture_cache->SynchronizeGraphicsDescriptors(); + texture_cache.SynchronizeGraphicsDescriptors(); - const auto& regs{maxwell3d->regs}; + const auto& regs{maxwell3d.regs}; const bool via_header_index{regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex}; for (size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) { const Shader::Info& info{stage_infos[stage]}; - buffer_cache->SetEnabledUniformBuffers(stage, info.constant_buffer_mask); - buffer_cache->UnbindGraphicsStorageBuffers(stage); + buffer_cache.SetEnabledUniformBuffers(stage, info.constant_buffer_mask); + buffer_cache.UnbindGraphicsStorageBuffers(stage); size_t index{}; for (const auto& desc : info.storage_buffers_descriptors) { ASSERT(desc.count == 1); - buffer_cache->BindGraphicsStorageBuffer(stage, index, desc.cbuf_index, desc.cbuf_offset, - true); + buffer_cache.BindGraphicsStorageBuffer(stage, index, desc.cbuf_index, desc.cbuf_offset, + true); ++index; } - const auto& cbufs{maxwell3d->state.shader_stages[stage].const_buffers}; + const auto& cbufs{maxwell3d.state.shader_stages[stage].const_buffers}; for (const auto& desc : info.texture_descriptors) { const u32 cbuf_index{desc.cbuf_index}; const u32 cbuf_offset{desc.cbuf_offset}; ASSERT(cbufs[cbuf_index].enabled); const GPUVAddr addr{cbufs[cbuf_index].address + cbuf_offset}; - const u32 raw_handle{gpu_memory->Read(addr)}; + const u32 raw_handle{gpu_memory.Read(addr)}; const TextureHandle handle(raw_handle, via_header_index); image_view_indices.push_back(handle.image); - Sampler* const sampler{texture_cache->GetGraphicsSampler(handle.sampler)}; + Sampler* const sampler{texture_cache.GetGraphicsSampler(handle.sampler)}; samplers.push_back(sampler->Handle()); } } const std::span indices_span(image_view_indices.data(), image_view_indices.size()); - buffer_cache->UpdateGraphicsBuffers(is_indexed); - texture_cache->FillGraphicsImageViews(indices_span, image_view_ids); + buffer_cache.UpdateGraphicsBuffers(is_indexed); + texture_cache.FillGraphicsImageViews(indices_span, image_view_ids); - buffer_cache->BindHostGeometryBuffers(is_indexed); + buffer_cache.BindHostGeometryBuffers(is_indexed); size_t index{}; for (size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) { - buffer_cache->BindHostStageBuffers(stage); + buffer_cache.BindHostStageBuffers(stage); PushImageDescriptors(stage_infos[stage], samplers.data(), image_view_ids.data(), - *texture_cache, *update_descriptor_queue, index); + texture_cache, update_descriptor_queue, index); } - texture_cache->UpdateRenderTargets(false); - scheduler->RequestRenderpass(texture_cache->GetFramebuffer()); - - scheduler->BindGraphicsPipeline(*pipeline); + texture_cache.UpdateRenderTargets(false); + scheduler.RequestRenderpass(texture_cache.GetFramebuffer()); + if (!building_flag.test()) { + scheduler.Record([this](vk::CommandBuffer) { building_flag.wait(false); }); + } + if (scheduler.UpdateGraphicsPipeline(this)) { + scheduler.Record([this](vk::CommandBuffer cmdbuf) { + cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline); + }); + } if (!descriptor_set_layout) { return; } const VkDescriptorSet descriptor_set{descriptor_allocator.Commit()}; - update_descriptor_queue->Send(*descriptor_update_template, descriptor_set); + update_descriptor_queue.Send(*descriptor_update_template, descriptor_set); - scheduler->Record([descriptor_set, layout = *pipeline_layout](vk::CommandBuffer cmdbuf) { + scheduler.Record([descriptor_set, layout = *pipeline_layout](vk::CommandBuffer cmdbuf) { cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, layout, 0, descriptor_set, nullptr); }); } -void GraphicsPipeline::MakePipeline(const Device& device, const FixedPipelineState& state, - VkRenderPass render_pass) { +void GraphicsPipeline::MakePipeline(const Device& device, VkRenderPass render_pass) { FixedPipelineState::DynamicState dynamic{}; if (!device.IsExtExtendedDynamicStateSupported()) { dynamic = state.dynamic_state; diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h index ba1d34a83..4e0583157 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h @@ -5,13 +5,15 @@ #pragma once #include +#include +#include "common/thread_worker.h" #include "shader_recompiler/shader_info.h" #include "video_core/engines/maxwell_3d.h" #include "video_core/renderer_vulkan/fixed_pipeline_state.h" +#include "video_core/renderer_vulkan/vk_buffer_cache.h" #include "video_core/renderer_vulkan/vk_descriptor_pool.h" #include "video_core/renderer_vulkan/vk_texture_cache.h" -#include "video_core/renderer_vulkan/vk_buffer_cache.h" #include "video_core/vulkan_common/vulkan_wrapper.h" namespace Vulkan { @@ -25,34 +27,34 @@ class GraphicsPipeline { static constexpr size_t NUM_STAGES = Tegra::Engines::Maxwell3D::Regs::MaxShaderStage; public: - explicit GraphicsPipeline() = default; explicit GraphicsPipeline(Tegra::Engines::Maxwell3D& maxwell3d, Tegra::MemoryManager& gpu_memory, VKScheduler& scheduler, - BufferCache& buffer_cache, - TextureCache& texture_cache, const Device& device, VKDescriptorPool& descriptor_pool, + BufferCache& buffer_cache, TextureCache& texture_cache, + const Device& device, VKDescriptorPool& descriptor_pool, VKUpdateDescriptorQueue& update_descriptor_queue, + Common::ThreadWorker* worker_thread, RenderPassCache& render_pass_cache, const FixedPipelineState& state, std::array stages, const std::array& infos); void Configure(bool is_indexed); - GraphicsPipeline& operator=(GraphicsPipeline&&) noexcept = default; - GraphicsPipeline(GraphicsPipeline&&) noexcept = default; + GraphicsPipeline& operator=(GraphicsPipeline&&) noexcept = delete; + GraphicsPipeline(GraphicsPipeline&&) noexcept = delete; GraphicsPipeline& operator=(const GraphicsPipeline&) = delete; GraphicsPipeline(const GraphicsPipeline&) = delete; private: - void MakePipeline(const Device& device, const FixedPipelineState& state, - VkRenderPass render_pass); + void MakePipeline(const Device& device, VkRenderPass render_pass); - Tegra::Engines::Maxwell3D* maxwell3d{}; - Tegra::MemoryManager* gpu_memory{}; - TextureCache* texture_cache{}; - BufferCache* buffer_cache{}; - VKScheduler* scheduler{}; - VKUpdateDescriptorQueue* update_descriptor_queue{}; + Tegra::Engines::Maxwell3D& maxwell3d; + Tegra::MemoryManager& gpu_memory; + TextureCache& texture_cache; + BufferCache& buffer_cache; + VKScheduler& scheduler; + VKUpdateDescriptorQueue& update_descriptor_queue; + const FixedPipelineState state; std::array spv_modules; std::array stage_infos; @@ -61,6 +63,7 @@ private: vk::PipelineLayout pipeline_layout; vk::DescriptorUpdateTemplateKHR descriptor_update_template; vk::Pipeline pipeline; + std::atomic_flag building_flag{}; }; } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 30a707599..e3d9debf4 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -518,9 +518,8 @@ void PipelineCache::LoadDiskResources(u64 title_id, std::stop_token stop_loading } pipeline_cache_filename = fmt::format("{}/{:016x}.bin", transferable_dir, title_id); - Common::ThreadWorker worker(11, "PipelineBuilder"); - std::mutex cache_mutex; struct { + std::mutex mutex; size_t total{0}; size_t built{0}; bool has_loaded{false}; @@ -542,51 +541,53 @@ void PipelineCache::LoadDiskResources(u64 title_id, std::stop_token stop_loading } u32 num_envs{}; file.read(reinterpret_cast(&num_envs), sizeof(num_envs)); - auto envs{std::make_shared>(num_envs)}; - for (FileEnvironment& env : *envs) { + std::vector envs(num_envs); + for (FileEnvironment& env : envs) { env.Deserialize(file); } - if (envs->front().ShaderStage() == Shader::Stage::Compute) { + if (envs.front().ShaderStage() == Shader::Stage::Compute) { ComputePipelineCacheKey key; file.read(reinterpret_cast(&key), sizeof(key)); - worker.QueueWork([this, key, envs, &cache_mutex, &state, &callback] { + workers.QueueWork([this, key, envs = std::move(envs), &state, &callback]() mutable { ShaderPools pools; - ComputePipeline pipeline{CreateComputePipeline(pools, key, envs->front())}; + auto pipeline{CreateComputePipeline(pools, key, envs.front(), false)}; - std::lock_guard lock{cache_mutex}; + std::lock_guard lock{state.mutex}; compute_cache.emplace(key, std::move(pipeline)); + ++state.built; if (state.has_loaded) { - callback(VideoCore::LoadCallbackStage::Build, ++state.built, state.total); + callback(VideoCore::LoadCallbackStage::Build, state.built, state.total); } }); } else { GraphicsPipelineCacheKey key; file.read(reinterpret_cast(&key), sizeof(key)); - worker.QueueWork([this, key, envs, &cache_mutex, &state, &callback] { + workers.QueueWork([this, key, envs = std::move(envs), &state, &callback]() mutable { ShaderPools pools; boost::container::static_vector env_ptrs; - for (auto& env : *envs) { + for (auto& env : envs) { env_ptrs.push_back(&env); } - GraphicsPipeline pipeline{CreateGraphicsPipeline(pools, key, MakeSpan(env_ptrs))}; + auto pipeline{CreateGraphicsPipeline(pools, key, MakeSpan(env_ptrs), false)}; - std::lock_guard lock{cache_mutex}; + std::lock_guard lock{state.mutex}; graphics_cache.emplace(key, std::move(pipeline)); + ++state.built; if (state.has_loaded) { - callback(VideoCore::LoadCallbackStage::Build, ++state.built, state.total); + callback(VideoCore::LoadCallbackStage::Build, state.built, state.total); } }); } ++state.total; } { - std::lock_guard lock{cache_mutex}; + std::lock_guard lock{state.mutex}; callback(VideoCore::LoadCallbackStage::Build, 0, state.total); state.has_loaded = true; } - worker.WaitForRequests(); + workers.WaitForRequests(); } size_t ComputePipelineCacheKey::Hash() const noexcept { @@ -619,7 +620,7 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::GPU& gpu_, kepler_compute{kepler_compute_}, gpu_memory{gpu_memory_}, device{device_}, scheduler{scheduler_}, descriptor_pool{descriptor_pool_}, update_descriptor_queue{update_descriptor_queue_}, render_pass_cache{render_pass_cache_}, - buffer_cache{buffer_cache_}, texture_cache{texture_cache_} { + buffer_cache{buffer_cache_}, texture_cache{texture_cache_}, workers(11, "PipelineBuilder") { const auto& float_control{device.FloatControlProperties()}; const VkDriverIdKHR driver_id{device.GetDriverID()}; base_profile = Shader::Profile{ @@ -662,10 +663,10 @@ GraphicsPipeline* PipelineCache::CurrentGraphicsPipeline() { const auto [pair, is_new]{graphics_cache.try_emplace(graphics_key)}; auto& pipeline{pair->second}; if (!is_new) { - return &pipeline; + return pipeline.get(); } pipeline = CreateGraphicsPipeline(); - return &pipeline; + return pipeline.get(); } ComputePipeline* PipelineCache::CurrentComputePipeline() { @@ -691,10 +692,10 @@ ComputePipeline* PipelineCache::CurrentComputePipeline() { const auto [pair, is_new]{compute_cache.try_emplace(key)}; auto& pipeline{pair->second}; if (!is_new) { - return &pipeline; + return pipeline.get(); } pipeline = CreateComputePipeline(key, shader); - return &pipeline; + return pipeline.get(); } bool PipelineCache::RefreshStages() { @@ -743,9 +744,9 @@ const ShaderInfo* PipelineCache::MakeShaderInfo(GenericEnvironment& env, VAddr c return result; } -GraphicsPipeline PipelineCache::CreateGraphicsPipeline(ShaderPools& pools, - const GraphicsPipelineCacheKey& key, - std::span envs) { +std::unique_ptr PipelineCache::CreateGraphicsPipeline( + ShaderPools& pools, const GraphicsPipelineCacheKey& key, + std::span envs, bool build_in_parallel) { LOG_INFO(Render_Vulkan, "0x{:016x}", key.Hash()); size_t env_index{0}; std::array programs; @@ -783,12 +784,14 @@ GraphicsPipeline PipelineCache::CreateGraphicsPipeline(ShaderPools& pools, modules[stage_index].SetObjectNameEXT(name.c_str()); } } - return GraphicsPipeline(maxwell3d, gpu_memory, scheduler, buffer_cache, texture_cache, device, - descriptor_pool, update_descriptor_queue, render_pass_cache, key.state, - std::move(modules), infos); + Common::ThreadWorker* const thread_worker{build_in_parallel ? &workers : nullptr}; + return std::make_unique( + maxwell3d, gpu_memory, scheduler, buffer_cache, texture_cache, device, descriptor_pool, + update_descriptor_queue, thread_worker, render_pass_cache, key.state, std::move(modules), + infos); } -GraphicsPipeline PipelineCache::CreateGraphicsPipeline() { +std::unique_ptr PipelineCache::CreateGraphicsPipeline() { main_pools.ReleaseContents(); std::array graphics_envs; @@ -809,22 +812,22 @@ GraphicsPipeline PipelineCache::CreateGraphicsPipeline() { generic_envs.push_back(&env); envs.push_back(&env); } - GraphicsPipeline pipeline{CreateGraphicsPipeline(main_pools, graphics_key, MakeSpan(envs))}; + auto pipeline{CreateGraphicsPipeline(main_pools, graphics_key, MakeSpan(envs), true)}; if (!pipeline_cache_filename.empty()) { SerializePipeline(graphics_key, generic_envs, pipeline_cache_filename); } return pipeline; } -ComputePipeline PipelineCache::CreateComputePipeline(const ComputePipelineCacheKey& key, - const ShaderInfo* shader) { +std::unique_ptr PipelineCache::CreateComputePipeline( + const ComputePipelineCacheKey& key, const ShaderInfo* shader) { const GPUVAddr program_base{kepler_compute.regs.code_loc.Address()}; const auto& qmd{kepler_compute.launch_description}; ComputeEnvironment env{kepler_compute, gpu_memory, program_base, qmd.program_start}; env.SetCachedSize(shader->size_bytes); main_pools.ReleaseContents(); - ComputePipeline pipeline{CreateComputePipeline(main_pools, key, env)}; + auto pipeline{CreateComputePipeline(main_pools, key, env, true)}; if (!pipeline_cache_filename.empty()) { SerializePipeline(key, std::array{&env}, pipeline_cache_filename); @@ -832,9 +835,9 @@ ComputePipeline PipelineCache::CreateComputePipeline(const ComputePipelineCacheK return pipeline; } -ComputePipeline PipelineCache::CreateComputePipeline(ShaderPools& pools, - const ComputePipelineCacheKey& key, - Shader::Environment& env) const { +std::unique_ptr PipelineCache::CreateComputePipeline( + ShaderPools& pools, const ComputePipelineCacheKey& key, Shader::Environment& env, + bool build_in_parallel) { LOG_INFO(Render_Vulkan, "0x{:016x}", key.Hash()); Shader::Maxwell::Flow::CFG cfg{env, pools.flow_block, env.StartAddress()}; @@ -846,8 +849,9 @@ ComputePipeline PipelineCache::CreateComputePipeline(ShaderPools& pools, const auto name{fmt::format("{:016x}{:016x}", key.unique_hash[0], key.unique_hash[1])}; spv_module.SetObjectNameEXT(name.c_str()); } - return ComputePipeline{device, descriptor_pool, update_descriptor_queue, program.info, - std::move(spv_module)}; + Common::ThreadWorker* const thread_worker{build_in_parallel ? &workers : nullptr}; + return std::make_unique(device, descriptor_pool, update_descriptor_queue, + thread_worker, program.info, std::move(spv_module)); } static Shader::AttributeType CastAttributeType(const FixedPipelineState::VertexAttribute& attr) { diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h index b55e14189..609f00898 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h @@ -14,6 +14,7 @@ #include #include "common/common_types.h" +#include "common/thread_worker.h" #include "shader_recompiler/frontend/ir/basic_block.h" #include "shader_recompiler/frontend/ir/microinstruction.h" #include "shader_recompiler/frontend/maxwell/control_flow.h" @@ -145,16 +146,19 @@ private: const ShaderInfo* MakeShaderInfo(GenericEnvironment& env, VAddr cpu_addr); - GraphicsPipeline CreateGraphicsPipeline(); + std::unique_ptr CreateGraphicsPipeline(); - GraphicsPipeline CreateGraphicsPipeline(ShaderPools& pools, const GraphicsPipelineCacheKey& key, - std::span envs); + std::unique_ptr CreateGraphicsPipeline( + ShaderPools& pools, const GraphicsPipelineCacheKey& key, + std::span envs, bool build_in_parallel); - ComputePipeline CreateComputePipeline(const ComputePipelineCacheKey& key, - const ShaderInfo* shader); + std::unique_ptr CreateComputePipeline(const ComputePipelineCacheKey& key, + const ShaderInfo* shader); - ComputePipeline CreateComputePipeline(ShaderPools& pools, const ComputePipelineCacheKey& key, - Shader::Environment& env) const; + std::unique_ptr CreateComputePipeline(ShaderPools& pools, + const ComputePipelineCacheKey& key, + Shader::Environment& env, + bool build_in_parallel); Shader::Profile MakeProfile(const GraphicsPipelineCacheKey& key, Shader::Stage stage); @@ -174,13 +178,15 @@ private: GraphicsPipelineCacheKey graphics_key{}; std::array shader_infos{}; - std::unordered_map compute_cache; - std::unordered_map graphics_cache; + std::unordered_map> compute_cache; + std::unordered_map> graphics_cache; ShaderPools main_pools; Shader::Profile base_profile; std::string pipeline_cache_filename; + + Common::ThreadWorker workers; }; } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index d7d9927dd..f0bd4b8af 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -276,22 +276,11 @@ void RasterizerVulkan::DispatchCompute() { return; } std::scoped_lock lock{texture_cache.mutex, buffer_cache.mutex}; - update_descriptor_queue.Acquire(); - pipeline->ConfigureBufferCache(buffer_cache); - pipeline->ConfigureTextureCache(kepler_compute, gpu_memory, texture_cache); - const VkDescriptorSet descriptor_set{pipeline->UpdateDescriptorSet()}; + pipeline->Configure(kepler_compute, gpu_memory, scheduler, buffer_cache, texture_cache); const auto& qmd{kepler_compute.launch_description}; const std::array dim{qmd.grid_dim_x, qmd.grid_dim_y, qmd.grid_dim_z}; - const VkPipeline pipeline_handle{pipeline->Handle()}; - const VkPipelineLayout pipeline_layout{pipeline->PipelineLayout()}; - scheduler.Record( - [pipeline_handle, pipeline_layout, dim, descriptor_set](vk::CommandBuffer cmdbuf) { - cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, pipeline_handle); - cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, pipeline_layout, 0, - descriptor_set, nullptr); - cmdbuf.Dispatch(dim[0], dim[1], dim[2]); - }); + scheduler.Record([dim](vk::CommandBuffer cmdbuf) { cmdbuf.Dispatch(dim[0], dim[1], dim[2]); }); } void RasterizerVulkan::ResetCounter(VideoCore::QueryType type) { diff --git a/src/video_core/renderer_vulkan/vk_scheduler.cpp b/src/video_core/renderer_vulkan/vk_scheduler.cpp index f35c120b0..25a4933e5 100644 --- a/src/video_core/renderer_vulkan/vk_scheduler.cpp +++ b/src/video_core/renderer_vulkan/vk_scheduler.cpp @@ -124,18 +124,16 @@ void VKScheduler::RequestOutsideRenderPassOperationContext() { EndRenderPass(); } -void VKScheduler::BindGraphicsPipeline(VkPipeline pipeline) { +bool VKScheduler::UpdateGraphicsPipeline(GraphicsPipeline* pipeline) { if (state.graphics_pipeline == pipeline) { - return; + return false; } state.graphics_pipeline = pipeline; - Record([pipeline](vk::CommandBuffer cmdbuf) { - cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline); - }); + return true; } void VKScheduler::WorkerThread() { - Common::SetCurrentThreadPriority(Common::ThreadPriority::High); + Common::SetCurrentThreadName("yuzu:VulkanWorker"); std::unique_lock lock{mutex}; do { cv.wait(lock, [this] { return !chunk_queue.Empty() || quit; }); diff --git a/src/video_core/renderer_vulkan/vk_scheduler.h b/src/video_core/renderer_vulkan/vk_scheduler.h index 3ce48e9d2..a40bb8bcd 100644 --- a/src/video_core/renderer_vulkan/vk_scheduler.h +++ b/src/video_core/renderer_vulkan/vk_scheduler.h @@ -22,6 +22,7 @@ namespace Vulkan { class CommandPool; class Device; class Framebuffer; +class GraphicsPipeline; class StateTracker; class VKQueryCache; @@ -52,8 +53,8 @@ public: /// of a renderpass. void RequestOutsideRenderPassOperationContext(); - /// Binds a pipeline to the current execution context. - void BindGraphicsPipeline(VkPipeline pipeline); + /// Update the pipeline to the current execution context. + bool UpdateGraphicsPipeline(GraphicsPipeline* pipeline); /// Invalidates current command buffer state except for render passes void InvalidateState(); @@ -170,7 +171,7 @@ private: VkRenderPass renderpass = nullptr; VkFramebuffer framebuffer = nullptr; VkExtent2D render_area = {0, 0}; - VkPipeline graphics_pipeline = nullptr; + GraphicsPipeline* graphics_pipeline = nullptr; }; void WorkerThread();