vk_graphics_pipeline: Use VK_KHR_push_descriptor when available
~51% faster on Nvidia compared to previous method.
This commit is contained in:
parent
ccbd24fe00
commit
ca67077ca8
8 changed files with 88 additions and 36 deletions
|
@ -16,38 +16,50 @@
|
|||
#include "video_core/texture_cache/texture_cache.h"
|
||||
#include "video_core/texture_cache/types.h"
|
||||
#include "video_core/textures/texture.h"
|
||||
#include "video_core/vulkan_common/vulkan_device.h"
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
class DescriptorLayoutBuilder {
|
||||
public:
|
||||
DescriptorLayoutBuilder(const vk::Device& device_) : device{&device_} {}
|
||||
DescriptorLayoutBuilder(const Device& device_) : device{&device_} {}
|
||||
|
||||
vk::DescriptorSetLayout CreateDescriptorSetLayout() const {
|
||||
bool CanUsePushDescriptor() const noexcept {
|
||||
return device->IsKhrPushDescriptorSupported() &&
|
||||
num_descriptors <= device->MaxPushDescriptors();
|
||||
}
|
||||
|
||||
vk::DescriptorSetLayout CreateDescriptorSetLayout(bool use_push_descriptor) const {
|
||||
if (bindings.empty()) {
|
||||
return nullptr;
|
||||
}
|
||||
return device->CreateDescriptorSetLayout({
|
||||
const VkDescriptorSetLayoutCreateFlags flags =
|
||||
use_push_descriptor ? VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR : 0;
|
||||
return device->GetLogical().CreateDescriptorSetLayout({
|
||||
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
|
||||
.pNext = nullptr,
|
||||
.flags = 0,
|
||||
.flags = flags,
|
||||
.bindingCount = static_cast<u32>(bindings.size()),
|
||||
.pBindings = bindings.data(),
|
||||
});
|
||||
}
|
||||
|
||||
vk::DescriptorUpdateTemplateKHR CreateTemplate(VkDescriptorSetLayout descriptor_set_layout,
|
||||
VkPipelineLayout pipeline_layout) const {
|
||||
VkPipelineLayout pipeline_layout,
|
||||
bool use_push_descriptor) const {
|
||||
if (entries.empty()) {
|
||||
return nullptr;
|
||||
}
|
||||
return device->CreateDescriptorUpdateTemplateKHR({
|
||||
const VkDescriptorUpdateTemplateType type =
|
||||
use_push_descriptor ? VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_PUSH_DESCRIPTORS_KHR
|
||||
: VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET_KHR;
|
||||
return device->GetLogical().CreateDescriptorUpdateTemplateKHR({
|
||||
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_CREATE_INFO_KHR,
|
||||
.pNext = nullptr,
|
||||
.flags = 0,
|
||||
.descriptorUpdateEntryCount = static_cast<u32>(entries.size()),
|
||||
.pDescriptorUpdateEntries = entries.data(),
|
||||
.templateType = VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET_KHR,
|
||||
.templateType = type,
|
||||
.descriptorSetLayout = descriptor_set_layout,
|
||||
.pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
|
||||
.pipelineLayout = pipeline_layout,
|
||||
|
@ -56,7 +68,7 @@ public:
|
|||
}
|
||||
|
||||
vk::PipelineLayout CreatePipelineLayout(VkDescriptorSetLayout descriptor_set_layout) const {
|
||||
return device->CreatePipelineLayout({
|
||||
return device->GetLogical().CreatePipelineLayout({
|
||||
.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
|
||||
.pNext = nullptr,
|
||||
.flags = 0,
|
||||
|
@ -97,14 +109,16 @@ private:
|
|||
.stride = sizeof(DescriptorUpdateEntry),
|
||||
});
|
||||
++binding;
|
||||
num_descriptors += descriptors[i].count;
|
||||
offset += sizeof(DescriptorUpdateEntry);
|
||||
}
|
||||
}
|
||||
|
||||
const vk::Device* device{};
|
||||
const Device* device{};
|
||||
boost::container::small_vector<VkDescriptorSetLayoutBinding, 32> bindings;
|
||||
boost::container::small_vector<VkDescriptorUpdateTemplateEntryKHR, 32> entries;
|
||||
u32 binding{};
|
||||
u32 num_descriptors{};
|
||||
size_t offset{};
|
||||
};
|
||||
|
||||
|
|
|
@ -37,15 +37,14 @@ ComputePipeline::ComputePipeline(const Device& device_, DescriptorPool& descript
|
|||
uniform_buffer_sizes.begin());
|
||||
|
||||
auto func{[this, &descriptor_pool, shader_notify] {
|
||||
DescriptorLayoutBuilder builder{device.GetLogical()};
|
||||
DescriptorLayoutBuilder builder{device};
|
||||
builder.Add(info, VK_SHADER_STAGE_COMPUTE_BIT);
|
||||
|
||||
descriptor_set_layout = builder.CreateDescriptorSetLayout();
|
||||
descriptor_set_layout = builder.CreateDescriptorSetLayout(false);
|
||||
pipeline_layout = builder.CreatePipelineLayout(*descriptor_set_layout);
|
||||
descriptor_update_template =
|
||||
builder.CreateTemplate(*descriptor_set_layout, *pipeline_layout);
|
||||
builder.CreateTemplate(*descriptor_set_layout, *pipeline_layout, false);
|
||||
descriptor_allocator = descriptor_pool.Allocator(*descriptor_set_layout, info);
|
||||
|
||||
const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT subgroup_size_ci{
|
||||
.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT,
|
||||
.pNext = nullptr,
|
||||
|
@ -186,7 +185,6 @@ void ComputePipeline::Configure(Tegra::Engines::KeplerCompute& kepler_compute,
|
|||
const void* const descriptor_data{update_descriptor_queue.UpdateData()};
|
||||
scheduler.Record([this, descriptor_data](vk::CommandBuffer cmdbuf) {
|
||||
cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline);
|
||||
|
||||
if (!descriptor_set_layout) {
|
||||
return;
|
||||
}
|
||||
|
|
|
@ -40,7 +40,7 @@ constexpr size_t NUM_STAGES = Maxwell::MaxShaderStage;
|
|||
constexpr size_t MAX_IMAGE_ELEMENTS = 64;
|
||||
|
||||
DescriptorLayoutBuilder MakeBuilder(const Device& device, std::span<const Shader::Info> infos) {
|
||||
DescriptorLayoutBuilder builder{device.GetLogical()};
|
||||
DescriptorLayoutBuilder builder{device};
|
||||
for (size_t index = 0; index < infos.size(); ++index) {
|
||||
static constexpr std::array stages{
|
||||
VK_SHADER_STAGE_VERTEX_BIT,
|
||||
|
@ -229,12 +229,15 @@ GraphicsPipeline::GraphicsPipeline(
|
|||
}
|
||||
auto func{[this, shader_notify, &render_pass_cache, &descriptor_pool] {
|
||||
DescriptorLayoutBuilder builder{MakeBuilder(device, stage_infos)};
|
||||
descriptor_set_layout = builder.CreateDescriptorSetLayout();
|
||||
uses_push_descriptor = builder.CanUsePushDescriptor();
|
||||
descriptor_set_layout = builder.CreateDescriptorSetLayout(uses_push_descriptor);
|
||||
if (!uses_push_descriptor) {
|
||||
descriptor_allocator = descriptor_pool.Allocator(*descriptor_set_layout, stage_infos);
|
||||
|
||||
}
|
||||
const VkDescriptorSetLayout set_layout{*descriptor_set_layout};
|
||||
pipeline_layout = builder.CreatePipelineLayout(set_layout);
|
||||
descriptor_update_template = builder.CreateTemplate(set_layout, *pipeline_layout);
|
||||
descriptor_update_template =
|
||||
builder.CreateTemplate(set_layout, *pipeline_layout, uses_push_descriptor);
|
||||
|
||||
const VkRenderPass render_pass{render_pass_cache.Get(MakeRenderPassKey(key.state))};
|
||||
Validate();
|
||||
|
@ -462,11 +465,16 @@ void GraphicsPipeline::ConfigureDraw() {
|
|||
if (!descriptor_set_layout) {
|
||||
return;
|
||||
}
|
||||
if (uses_push_descriptor) {
|
||||
cmdbuf.PushDescriptorSetWithTemplateKHR(*descriptor_update_template, *pipeline_layout,
|
||||
0, descriptor_data);
|
||||
} else {
|
||||
const VkDescriptorSet descriptor_set{descriptor_allocator.Commit()};
|
||||
const vk::Device& dev{device.GetLogical()};
|
||||
dev.UpdateDescriptorSet(descriptor_set, *descriptor_update_template, descriptor_data);
|
||||
cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline_layout, 0,
|
||||
descriptor_set, nullptr);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
|
|
|
@ -148,6 +148,7 @@ private:
|
|||
std::condition_variable build_condvar;
|
||||
std::mutex build_mutex;
|
||||
std::atomic_bool is_built{false};
|
||||
bool uses_push_descriptor{false};
|
||||
};
|
||||
|
||||
} // namespace Vulkan
|
||||
|
|
|
@ -767,6 +767,7 @@ std::vector<const char*> Device::LoadExtensions(bool requires_surface) {
|
|||
test(khr_uniform_buffer_standard_layout,
|
||||
VK_KHR_UNIFORM_BUFFER_STANDARD_LAYOUT_EXTENSION_NAME, true);
|
||||
test(khr_spirv_1_4, VK_KHR_SPIRV_1_4_EXTENSION_NAME, true);
|
||||
test(khr_push_descriptor, VK_KHR_PUSH_DESCRIPTOR_EXTENSION_NAME, true);
|
||||
test(has_khr_shader_float16_int8, VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME, false);
|
||||
test(ext_depth_range_unrestricted, VK_EXT_DEPTH_RANGE_UNRESTRICTED_EXTENSION_NAME, true);
|
||||
test(ext_index_type_uint8, VK_EXT_INDEX_TYPE_UINT8_EXTENSION_NAME, true);
|
||||
|
@ -932,6 +933,16 @@ std::vector<const char*> Device::LoadExtensions(bool requires_surface) {
|
|||
khr_workgroup_memory_explicit_layout = true;
|
||||
}
|
||||
}
|
||||
if (khr_push_descriptor) {
|
||||
VkPhysicalDevicePushDescriptorPropertiesKHR push_descriptor;
|
||||
push_descriptor.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PUSH_DESCRIPTOR_PROPERTIES_KHR;
|
||||
push_descriptor.pNext = nullptr;
|
||||
|
||||
physical_properties.pNext = &push_descriptor;
|
||||
physical.GetProperties2KHR(physical_properties);
|
||||
|
||||
max_push_descriptors = push_descriptor.maxPushDescriptors;
|
||||
}
|
||||
return extensions;
|
||||
}
|
||||
|
||||
|
|
|
@ -154,6 +154,11 @@ public:
|
|||
return guest_warp_stages & stage;
|
||||
}
|
||||
|
||||
/// Returns the maximum number of push descriptors.
|
||||
u32 MaxPushDescriptors() const {
|
||||
return max_push_descriptors;
|
||||
}
|
||||
|
||||
/// Returns true if formatless image load is supported.
|
||||
bool IsFormatlessImageLoadSupported() const {
|
||||
return is_formatless_image_load_supported;
|
||||
|
@ -194,6 +199,11 @@ public:
|
|||
return khr_spirv_1_4;
|
||||
}
|
||||
|
||||
/// Returns true if the device supports VK_KHR_push_descriptor.
|
||||
bool IsKhrPushDescriptorSupported() const {
|
||||
return khr_push_descriptor;
|
||||
}
|
||||
|
||||
/// Returns true if the device supports VK_KHR_workgroup_memory_explicit_layout.
|
||||
bool IsKhrWorkgroupMemoryExplicitLayoutSupported() const {
|
||||
return khr_workgroup_memory_explicit_layout;
|
||||
|
@ -330,6 +340,7 @@ private:
|
|||
VkDriverIdKHR driver_id{}; ///< Driver ID.
|
||||
VkShaderStageFlags guest_warp_stages{}; ///< Stages where the guest warp size can be forced.
|
||||
u64 device_access_memory{}; ///< Total size of device local memory in bytes.
|
||||
u32 max_push_descriptors{}; ///< Maximum number of push descriptors
|
||||
bool is_optimal_astc_supported{}; ///< Support for native ASTC.
|
||||
bool is_float16_supported{}; ///< Support for float16 arithmetics.
|
||||
bool is_warp_potentially_bigger{}; ///< Host warp size can be bigger than guest.
|
||||
|
@ -345,6 +356,7 @@ private:
|
|||
bool khr_uniform_buffer_standard_layout{}; ///< Support for scalar uniform buffer layouts.
|
||||
bool khr_spirv_1_4{}; ///< Support for VK_KHR_spirv_1_4.
|
||||
bool khr_workgroup_memory_explicit_layout{}; ///< Support for explicit workgroup layouts.
|
||||
bool khr_push_descriptor{}; ///< Support for VK_KHR_push_descritor.
|
||||
bool ext_index_type_uint8{}; ///< Support for VK_EXT_index_type_uint8.
|
||||
bool ext_sampler_filter_minmax{}; ///< Support for VK_EXT_sampler_filter_minmax.
|
||||
bool ext_depth_range_unrestricted{}; ///< Support for VK_EXT_depth_range_unrestricted.
|
||||
|
|
|
@ -103,6 +103,7 @@ void Load(VkDevice device, DeviceDispatch& dld) noexcept {
|
|||
X(vkCmdFillBuffer);
|
||||
X(vkCmdPipelineBarrier);
|
||||
X(vkCmdPushConstants);
|
||||
X(vkCmdPushDescriptorSetWithTemplateKHR);
|
||||
X(vkCmdSetBlendConstants);
|
||||
X(vkCmdSetDepthBias);
|
||||
X(vkCmdSetDepthBounds);
|
||||
|
|
|
@ -193,15 +193,16 @@ struct DeviceDispatch : InstanceDispatch {
|
|||
PFN_vkBeginCommandBuffer vkBeginCommandBuffer{};
|
||||
PFN_vkBindBufferMemory vkBindBufferMemory{};
|
||||
PFN_vkBindImageMemory vkBindImageMemory{};
|
||||
PFN_vkCmdBeginDebugUtilsLabelEXT vkCmdBeginDebugUtilsLabelEXT{};
|
||||
PFN_vkCmdBeginQuery vkCmdBeginQuery{};
|
||||
PFN_vkCmdBeginRenderPass vkCmdBeginRenderPass{};
|
||||
PFN_vkCmdBeginTransformFeedbackEXT vkCmdBeginTransformFeedbackEXT{};
|
||||
PFN_vkCmdBeginDebugUtilsLabelEXT vkCmdBeginDebugUtilsLabelEXT{};
|
||||
PFN_vkCmdBindDescriptorSets vkCmdBindDescriptorSets{};
|
||||
PFN_vkCmdBindIndexBuffer vkCmdBindIndexBuffer{};
|
||||
PFN_vkCmdBindPipeline vkCmdBindPipeline{};
|
||||
PFN_vkCmdBindTransformFeedbackBuffersEXT vkCmdBindTransformFeedbackBuffersEXT{};
|
||||
PFN_vkCmdBindVertexBuffers vkCmdBindVertexBuffers{};
|
||||
PFN_vkCmdBindVertexBuffers2EXT vkCmdBindVertexBuffers2EXT{};
|
||||
PFN_vkCmdBlitImage vkCmdBlitImage{};
|
||||
PFN_vkCmdClearAttachments vkCmdClearAttachments{};
|
||||
PFN_vkCmdCopyBuffer vkCmdCopyBuffer{};
|
||||
|
@ -211,35 +212,35 @@ struct DeviceDispatch : InstanceDispatch {
|
|||
PFN_vkCmdDispatch vkCmdDispatch{};
|
||||
PFN_vkCmdDraw vkCmdDraw{};
|
||||
PFN_vkCmdDrawIndexed vkCmdDrawIndexed{};
|
||||
PFN_vkCmdEndDebugUtilsLabelEXT vkCmdEndDebugUtilsLabelEXT{};
|
||||
PFN_vkCmdEndQuery vkCmdEndQuery{};
|
||||
PFN_vkCmdEndRenderPass vkCmdEndRenderPass{};
|
||||
PFN_vkCmdEndTransformFeedbackEXT vkCmdEndTransformFeedbackEXT{};
|
||||
PFN_vkCmdEndDebugUtilsLabelEXT vkCmdEndDebugUtilsLabelEXT{};
|
||||
PFN_vkCmdFillBuffer vkCmdFillBuffer{};
|
||||
PFN_vkCmdPipelineBarrier vkCmdPipelineBarrier{};
|
||||
PFN_vkCmdPushConstants vkCmdPushConstants{};
|
||||
PFN_vkCmdPushDescriptorSetWithTemplateKHR vkCmdPushDescriptorSetWithTemplateKHR{};
|
||||
PFN_vkCmdResolveImage vkCmdResolveImage{};
|
||||
PFN_vkCmdSetBlendConstants vkCmdSetBlendConstants{};
|
||||
PFN_vkCmdSetCullModeEXT vkCmdSetCullModeEXT{};
|
||||
PFN_vkCmdSetDepthBias vkCmdSetDepthBias{};
|
||||
PFN_vkCmdSetDepthBounds vkCmdSetDepthBounds{};
|
||||
PFN_vkCmdSetEvent vkCmdSetEvent{};
|
||||
PFN_vkCmdSetScissor vkCmdSetScissor{};
|
||||
PFN_vkCmdSetStencilCompareMask vkCmdSetStencilCompareMask{};
|
||||
PFN_vkCmdSetStencilReference vkCmdSetStencilReference{};
|
||||
PFN_vkCmdSetStencilWriteMask vkCmdSetStencilWriteMask{};
|
||||
PFN_vkCmdSetViewport vkCmdSetViewport{};
|
||||
PFN_vkCmdWaitEvents vkCmdWaitEvents{};
|
||||
PFN_vkCmdBindVertexBuffers2EXT vkCmdBindVertexBuffers2EXT{};
|
||||
PFN_vkCmdSetCullModeEXT vkCmdSetCullModeEXT{};
|
||||
PFN_vkCmdSetDepthBoundsTestEnableEXT vkCmdSetDepthBoundsTestEnableEXT{};
|
||||
PFN_vkCmdSetDepthCompareOpEXT vkCmdSetDepthCompareOpEXT{};
|
||||
PFN_vkCmdSetDepthTestEnableEXT vkCmdSetDepthTestEnableEXT{};
|
||||
PFN_vkCmdSetDepthWriteEnableEXT vkCmdSetDepthWriteEnableEXT{};
|
||||
PFN_vkCmdSetEvent vkCmdSetEvent{};
|
||||
PFN_vkCmdSetFrontFaceEXT vkCmdSetFrontFaceEXT{};
|
||||
PFN_vkCmdSetPrimitiveTopologyEXT vkCmdSetPrimitiveTopologyEXT{};
|
||||
PFN_vkCmdSetScissor vkCmdSetScissor{};
|
||||
PFN_vkCmdSetStencilCompareMask vkCmdSetStencilCompareMask{};
|
||||
PFN_vkCmdSetStencilOpEXT vkCmdSetStencilOpEXT{};
|
||||
PFN_vkCmdSetStencilReference vkCmdSetStencilReference{};
|
||||
PFN_vkCmdSetStencilTestEnableEXT vkCmdSetStencilTestEnableEXT{};
|
||||
PFN_vkCmdSetStencilWriteMask vkCmdSetStencilWriteMask{};
|
||||
PFN_vkCmdSetVertexInputEXT vkCmdSetVertexInputEXT{};
|
||||
PFN_vkCmdResolveImage vkCmdResolveImage{};
|
||||
PFN_vkCmdSetViewport vkCmdSetViewport{};
|
||||
PFN_vkCmdWaitEvents vkCmdWaitEvents{};
|
||||
PFN_vkCreateBuffer vkCreateBuffer{};
|
||||
PFN_vkCreateBufferView vkCreateBufferView{};
|
||||
PFN_vkCreateCommandPool vkCreateCommandPool{};
|
||||
|
@ -990,6 +991,12 @@ public:
|
|||
dynamic_offsets.size(), dynamic_offsets.data());
|
||||
}
|
||||
|
||||
void PushDescriptorSetWithTemplateKHR(VkDescriptorUpdateTemplateKHR update_template,
|
||||
VkPipelineLayout layout, u32 set,
|
||||
const void* data) const noexcept {
|
||||
dld->vkCmdPushDescriptorSetWithTemplateKHR(handle, update_template, layout, set, data);
|
||||
}
|
||||
|
||||
void BindPipeline(VkPipelineBindPoint bind_point, VkPipeline pipeline) const noexcept {
|
||||
dld->vkCmdBindPipeline(handle, bind_point, pipeline);
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue