Merge pull request #3451 from ReinUsesLisp/indexed-textures

vk_shader_decompiler: Implement indexed textures
This commit is contained in:
bunnei 2020-03-05 11:42:46 -05:00 committed by GitHub
commit 0361aa1915
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 108 additions and 75 deletions

View File

@ -36,6 +36,7 @@ namespace OpenGL {
using Maxwell = Tegra::Engines::Maxwell3D::Regs; using Maxwell = Tegra::Engines::Maxwell3D::Regs;
using Tegra::Engines::ShaderType;
using VideoCore::Surface::PixelFormat; using VideoCore::Surface::PixelFormat;
using VideoCore::Surface::SurfaceTarget; using VideoCore::Surface::SurfaceTarget;
using VideoCore::Surface::SurfaceType; using VideoCore::Surface::SurfaceType;
@ -56,8 +57,7 @@ namespace {
template <typename Engine, typename Entry> template <typename Engine, typename Entry>
Tegra::Texture::FullTextureInfo GetTextureInfo(const Engine& engine, const Entry& entry, Tegra::Texture::FullTextureInfo GetTextureInfo(const Engine& engine, const Entry& entry,
Tegra::Engines::ShaderType shader_type, ShaderType shader_type, std::size_t index = 0) {
std::size_t index = 0) {
if (entry.IsBindless()) { if (entry.IsBindless()) {
const Tegra::Texture::TextureHandle tex_handle = const Tegra::Texture::TextureHandle tex_handle =
engine.AccessConstBuffer32(shader_type, entry.GetBuffer(), entry.GetOffset()); engine.AccessConstBuffer32(shader_type, entry.GetBuffer(), entry.GetOffset());
@ -910,37 +910,25 @@ void RasterizerOpenGL::SetupDrawTextures(std::size_t stage_index, const Shader&
const auto& maxwell3d = system.GPU().Maxwell3D(); const auto& maxwell3d = system.GPU().Maxwell3D();
u32 binding = device.GetBaseBindings(stage_index).sampler; u32 binding = device.GetBaseBindings(stage_index).sampler;
for (const auto& entry : shader->GetShaderEntries().samplers) { for (const auto& entry : shader->GetShaderEntries().samplers) {
const auto shader_type = static_cast<Tegra::Engines::ShaderType>(stage_index); const auto shader_type = static_cast<ShaderType>(stage_index);
if (!entry.IsIndexed()) {
const auto texture = GetTextureInfo(maxwell3d, entry, shader_type);
SetupTexture(binding++, texture, entry);
} else {
for (std::size_t i = 0; i < entry.Size(); ++i) { for (std::size_t i = 0; i < entry.Size(); ++i) {
const auto texture = GetTextureInfo(maxwell3d, entry, shader_type, i); const auto texture = GetTextureInfo(maxwell3d, entry, shader_type, i);
SetupTexture(binding++, texture, entry); SetupTexture(binding++, texture, entry);
} }
} }
} }
}
void RasterizerOpenGL::SetupComputeTextures(const Shader& kernel) { void RasterizerOpenGL::SetupComputeTextures(const Shader& kernel) {
MICROPROFILE_SCOPE(OpenGL_Texture); MICROPROFILE_SCOPE(OpenGL_Texture);
const auto& compute = system.GPU().KeplerCompute(); const auto& compute = system.GPU().KeplerCompute();
u32 binding = 0; u32 binding = 0;
for (const auto& entry : kernel->GetShaderEntries().samplers) { for (const auto& entry : kernel->GetShaderEntries().samplers) {
if (!entry.IsIndexed()) {
const auto texture =
GetTextureInfo(compute, entry, Tegra::Engines::ShaderType::Compute);
SetupTexture(binding++, texture, entry);
} else {
for (std::size_t i = 0; i < entry.Size(); ++i) { for (std::size_t i = 0; i < entry.Size(); ++i) {
const auto texture = const auto texture = GetTextureInfo(compute, entry, ShaderType::Compute, i);
GetTextureInfo(compute, entry, Tegra::Engines::ShaderType::Compute, i);
SetupTexture(binding++, texture, entry); SetupTexture(binding++, texture, entry);
} }
} }
} }
}
void RasterizerOpenGL::SetupTexture(u32 binding, const Tegra::Texture::FullTextureInfo& texture, void RasterizerOpenGL::SetupTexture(u32 binding, const Tegra::Texture::FullTextureInfo& texture,
const GLShader::SamplerEntry& entry) { const GLShader::SamplerEntry& entry) {

View File

@ -73,7 +73,7 @@ UniqueDescriptorUpdateTemplate VKComputePipeline::CreateDescriptorUpdateTemplate
std::vector<vk::DescriptorUpdateTemplateEntry> template_entries; std::vector<vk::DescriptorUpdateTemplateEntry> template_entries;
u32 binding = 0; u32 binding = 0;
u32 offset = 0; u32 offset = 0;
FillDescriptorUpdateTemplateEntries(device, entries, binding, offset, template_entries); FillDescriptorUpdateTemplateEntries(entries, binding, offset, template_entries);
if (template_entries.empty()) { if (template_entries.empty()) {
// If the shader doesn't use descriptor sets, skip template creation. // If the shader doesn't use descriptor sets, skip template creation.
return UniqueDescriptorUpdateTemplate{}; return UniqueDescriptorUpdateTemplate{};

View File

@ -97,8 +97,7 @@ UniqueDescriptorUpdateTemplate VKGraphicsPipeline::CreateDescriptorUpdateTemplat
u32 offset = 0; u32 offset = 0;
for (const auto& stage : program) { for (const auto& stage : program) {
if (stage) { if (stage) {
FillDescriptorUpdateTemplateEntries(device, stage->entries, binding, offset, FillDescriptorUpdateTemplateEntries(stage->entries, binding, offset, template_entries);
template_entries);
} }
} }
if (template_entries.empty()) { if (template_entries.empty()) {

View File

@ -36,6 +36,13 @@ using Tegra::Engines::ShaderType;
namespace { namespace {
// C++20's using enum
constexpr auto eUniformBuffer = vk::DescriptorType::eUniformBuffer;
constexpr auto eStorageBuffer = vk::DescriptorType::eStorageBuffer;
constexpr auto eUniformTexelBuffer = vk::DescriptorType::eUniformTexelBuffer;
constexpr auto eCombinedImageSampler = vk::DescriptorType::eCombinedImageSampler;
constexpr auto eStorageImage = vk::DescriptorType::eStorageImage;
constexpr VideoCommon::Shader::CompilerSettings compiler_settings{ constexpr VideoCommon::Shader::CompilerSettings compiler_settings{
VideoCommon::Shader::CompileDepth::FullDecompile}; VideoCommon::Shader::CompileDepth::FullDecompile};
@ -119,23 +126,32 @@ ShaderType GetShaderType(Maxwell::ShaderProgram program) {
} }
} }
template <vk::DescriptorType descriptor_type, class Container>
void AddBindings(std::vector<vk::DescriptorSetLayoutBinding>& bindings, u32& binding,
vk::ShaderStageFlags stage_flags, const Container& container) {
const u32 num_entries = static_cast<u32>(std::size(container));
for (std::size_t i = 0; i < num_entries; ++i) {
u32 count = 1;
if constexpr (descriptor_type == eCombinedImageSampler) {
// Combined image samplers can be arrayed.
count = container[i].Size();
}
bindings.emplace_back(binding++, descriptor_type, count, stage_flags, nullptr);
}
}
u32 FillDescriptorLayout(const ShaderEntries& entries, u32 FillDescriptorLayout(const ShaderEntries& entries,
std::vector<vk::DescriptorSetLayoutBinding>& bindings, std::vector<vk::DescriptorSetLayoutBinding>& bindings,
Maxwell::ShaderProgram program_type, u32 base_binding) { Maxwell::ShaderProgram program_type, u32 base_binding) {
const ShaderType stage = GetStageFromProgram(program_type); const ShaderType stage = GetStageFromProgram(program_type);
const vk::ShaderStageFlags stage_flags = MaxwellToVK::ShaderStage(stage); const vk::ShaderStageFlags flags = MaxwellToVK::ShaderStage(stage);
u32 binding = base_binding; u32 binding = base_binding;
const auto AddBindings = [&](vk::DescriptorType descriptor_type, std::size_t num_entries) { AddBindings<eUniformBuffer>(bindings, binding, flags, entries.const_buffers);
for (std::size_t i = 0; i < num_entries; ++i) { AddBindings<eStorageBuffer>(bindings, binding, flags, entries.global_buffers);
bindings.emplace_back(binding++, descriptor_type, 1, stage_flags, nullptr); AddBindings<eUniformTexelBuffer>(bindings, binding, flags, entries.texel_buffers);
} AddBindings<eCombinedImageSampler>(bindings, binding, flags, entries.samplers);
}; AddBindings<eStorageImage>(bindings, binding, flags, entries.images);
AddBindings(vk::DescriptorType::eUniformBuffer, entries.const_buffers.size());
AddBindings(vk::DescriptorType::eStorageBuffer, entries.global_buffers.size());
AddBindings(vk::DescriptorType::eUniformTexelBuffer, entries.texel_buffers.size());
AddBindings(vk::DescriptorType::eCombinedImageSampler, entries.samplers.size());
AddBindings(vk::DescriptorType::eStorageImage, entries.images.size());
return binding; return binding;
} }
@ -361,32 +377,45 @@ VKPipelineCache::DecompileShaders(const GraphicsPipelineCacheKey& key) {
return {std::move(program), std::move(bindings)}; return {std::move(program), std::move(bindings)};
} }
void FillDescriptorUpdateTemplateEntries( template <vk::DescriptorType descriptor_type, class Container>
const VKDevice& device, const ShaderEntries& entries, u32& binding, u32& offset, void AddEntry(std::vector<vk::DescriptorUpdateTemplateEntry>& template_entries, u32& binding,
std::vector<vk::DescriptorUpdateTemplateEntry>& template_entries) { u32& offset, const Container& container) {
static constexpr auto entry_size = static_cast<u32>(sizeof(DescriptorUpdateEntry)); static constexpr u32 entry_size = static_cast<u32>(sizeof(DescriptorUpdateEntry));
const auto AddEntry = [&](vk::DescriptorType descriptor_type, std::size_t count_) { const u32 count = static_cast<u32>(std::size(container));
const u32 count = static_cast<u32>(count_);
if (descriptor_type == vk::DescriptorType::eUniformTexelBuffer && if constexpr (descriptor_type == eCombinedImageSampler) {
device.GetDriverID() == vk::DriverIdKHR::eNvidiaProprietary) { for (u32 i = 0; i < count; ++i) {
const u32 num_samplers = container[i].Size();
template_entries.emplace_back(binding, 0, num_samplers, descriptor_type, offset,
entry_size);
++binding;
offset += num_samplers * entry_size;
}
return;
}
if constexpr (descriptor_type == eUniformTexelBuffer) {
// Nvidia has a bug where updating multiple uniform texels at once causes the driver to // Nvidia has a bug where updating multiple uniform texels at once causes the driver to
// crash. // crash.
for (u32 i = 0; i < count; ++i) { for (u32 i = 0; i < count; ++i) {
template_entries.emplace_back(binding + i, 0, 1, descriptor_type, template_entries.emplace_back(binding + i, 0, 1, descriptor_type,
offset + i * entry_size, entry_size); offset + i * entry_size, entry_size);
} }
} else if (count != 0) { } else if (count > 0) {
template_entries.emplace_back(binding, 0, count, descriptor_type, offset, entry_size); template_entries.emplace_back(binding, 0, count, descriptor_type, offset, entry_size);
} }
offset += count * entry_size; offset += count * entry_size;
binding += count; binding += count;
}; }
AddEntry(vk::DescriptorType::eUniformBuffer, entries.const_buffers.size()); void FillDescriptorUpdateTemplateEntries(
AddEntry(vk::DescriptorType::eStorageBuffer, entries.global_buffers.size()); const ShaderEntries& entries, u32& binding, u32& offset,
AddEntry(vk::DescriptorType::eUniformTexelBuffer, entries.texel_buffers.size()); std::vector<vk::DescriptorUpdateTemplateEntry>& template_entries) {
AddEntry(vk::DescriptorType::eCombinedImageSampler, entries.samplers.size()); AddEntry<eUniformBuffer>(template_entries, offset, binding, entries.const_buffers);
AddEntry(vk::DescriptorType::eStorageImage, entries.images.size()); AddEntry<eStorageBuffer>(template_entries, offset, binding, entries.global_buffers);
AddEntry<eUniformTexelBuffer>(template_entries, offset, binding, entries.texel_buffers);
AddEntry<eCombinedImageSampler>(template_entries, offset, binding, entries.samplers);
AddEntry<eStorageImage>(template_entries, offset, binding, entries.images);
} }
} // namespace Vulkan } // namespace Vulkan

View File

@ -194,7 +194,7 @@ private:
}; };
void FillDescriptorUpdateTemplateEntries( void FillDescriptorUpdateTemplateEntries(
const VKDevice& device, const ShaderEntries& entries, u32& binding, u32& offset, const ShaderEntries& entries, u32& binding, u32& offset,
std::vector<vk::DescriptorUpdateTemplateEntry>& template_entries); std::vector<vk::DescriptorUpdateTemplateEntry>& template_entries);
} // namespace Vulkan } // namespace Vulkan

View File

@ -105,17 +105,20 @@ void TransitionImages(const std::vector<ImageView>& views, vk::PipelineStageFlag
template <typename Engine, typename Entry> template <typename Engine, typename Entry>
Tegra::Texture::FullTextureInfo GetTextureInfo(const Engine& engine, const Entry& entry, Tegra::Texture::FullTextureInfo GetTextureInfo(const Engine& engine, const Entry& entry,
std::size_t stage) { std::size_t stage, std::size_t index = 0) {
const auto stage_type = static_cast<Tegra::Engines::ShaderType>(stage); const auto stage_type = static_cast<Tegra::Engines::ShaderType>(stage);
if (entry.IsBindless()) { if (entry.IsBindless()) {
const Tegra::Texture::TextureHandle tex_handle = const Tegra::Texture::TextureHandle tex_handle =
engine.AccessConstBuffer32(stage_type, entry.GetBuffer(), entry.GetOffset()); engine.AccessConstBuffer32(stage_type, entry.GetBuffer(), entry.GetOffset());
return engine.GetTextureInfo(tex_handle); return engine.GetTextureInfo(tex_handle);
} }
const auto& gpu_profile = engine.AccessGuestDriverProfile();
const u32 entry_offset = static_cast<u32>(index * gpu_profile.GetTextureHandlerSize());
const u32 offset = entry.GetOffset() + entry_offset;
if constexpr (std::is_same_v<Engine, Tegra::Engines::Maxwell3D>) { if constexpr (std::is_same_v<Engine, Tegra::Engines::Maxwell3D>) {
return engine.GetStageTexture(stage_type, entry.GetOffset()); return engine.GetStageTexture(stage_type, offset);
} else { } else {
return engine.GetTexture(entry.GetOffset()); return engine.GetTexture(offset);
} }
} }
@ -836,10 +839,12 @@ void RasterizerVulkan::SetupGraphicsTextures(const ShaderEntries& entries, std::
MICROPROFILE_SCOPE(Vulkan_Textures); MICROPROFILE_SCOPE(Vulkan_Textures);
const auto& gpu = system.GPU().Maxwell3D(); const auto& gpu = system.GPU().Maxwell3D();
for (const auto& entry : entries.samplers) { for (const auto& entry : entries.samplers) {
const auto texture = GetTextureInfo(gpu, entry, stage); for (std::size_t i = 0; i < entry.Size(); ++i) {
const auto texture = GetTextureInfo(gpu, entry, stage, i);
SetupTexture(texture, entry); SetupTexture(texture, entry);
} }
} }
}
void RasterizerVulkan::SetupGraphicsImages(const ShaderEntries& entries, std::size_t stage) { void RasterizerVulkan::SetupGraphicsImages(const ShaderEntries& entries, std::size_t stage) {
MICROPROFILE_SCOPE(Vulkan_Images); MICROPROFILE_SCOPE(Vulkan_Images);
@ -886,10 +891,12 @@ void RasterizerVulkan::SetupComputeTextures(const ShaderEntries& entries) {
MICROPROFILE_SCOPE(Vulkan_Textures); MICROPROFILE_SCOPE(Vulkan_Textures);
const auto& gpu = system.GPU().KeplerCompute(); const auto& gpu = system.GPU().KeplerCompute();
for (const auto& entry : entries.samplers) { for (const auto& entry : entries.samplers) {
const auto texture = GetTextureInfo(gpu, entry, ComputeShaderIndex); for (std::size_t i = 0; i < entry.Size(); ++i) {
const auto texture = GetTextureInfo(gpu, entry, ComputeShaderIndex, i);
SetupTexture(texture, entry); SetupTexture(texture, entry);
} }
} }
}
void RasterizerVulkan::SetupComputeImages(const ShaderEntries& entries) { void RasterizerVulkan::SetupComputeImages(const ShaderEntries& entries) {
MICROPROFILE_SCOPE(Vulkan_Images); MICROPROFILE_SCOPE(Vulkan_Images);

View File

@ -69,8 +69,9 @@ struct TexelBuffer {
struct SampledImage { struct SampledImage {
Id image_type{}; Id image_type{};
Id sampled_image_type{}; Id sampler_type{};
Id sampler{}; Id sampler_pointer_type{};
Id variable{};
}; };
struct StorageImage { struct StorageImage {
@ -833,16 +834,20 @@ private:
constexpr int sampled = 1; constexpr int sampled = 1;
constexpr auto format = spv::ImageFormat::Unknown; constexpr auto format = spv::ImageFormat::Unknown;
const Id image_type = TypeImage(t_float, dim, depth, arrayed, ms, sampled, format); const Id image_type = TypeImage(t_float, dim, depth, arrayed, ms, sampled, format);
const Id sampled_image_type = TypeSampledImage(image_type); const Id sampler_type = TypeSampledImage(image_type);
const Id pointer_type = const Id sampler_pointer_type =
TypePointer(spv::StorageClass::UniformConstant, sampled_image_type); TypePointer(spv::StorageClass::UniformConstant, sampler_type);
const Id type = sampler.IsIndexed()
? TypeArray(sampler_type, Constant(t_uint, sampler.Size()))
: sampler_type;
const Id pointer_type = TypePointer(spv::StorageClass::UniformConstant, type);
const Id id = OpVariable(pointer_type, spv::StorageClass::UniformConstant); const Id id = OpVariable(pointer_type, spv::StorageClass::UniformConstant);
AddGlobalVariable(Name(id, fmt::format("sampler_{}", sampler.GetIndex()))); AddGlobalVariable(Name(id, fmt::format("sampler_{}", sampler.GetIndex())));
Decorate(id, spv::Decoration::Binding, binding++); Decorate(id, spv::Decoration::Binding, binding++);
Decorate(id, spv::Decoration::DescriptorSet, DESCRIPTOR_SET); Decorate(id, spv::Decoration::DescriptorSet, DESCRIPTOR_SET);
sampled_images.emplace(sampler.GetIndex(), sampled_images.emplace(sampler.GetIndex(), SampledImage{image_type, sampler_type,
SampledImage{image_type, sampled_image_type, id}); sampler_pointer_type, id});
} }
return binding; return binding;
} }
@ -1525,7 +1530,12 @@ private:
ASSERT(!meta.sampler.IsBuffer()); ASSERT(!meta.sampler.IsBuffer());
const auto& entry = sampled_images.at(meta.sampler.GetIndex()); const auto& entry = sampled_images.at(meta.sampler.GetIndex());
return OpLoad(entry.sampled_image_type, entry.sampler); Id sampler = entry.variable;
if (meta.sampler.IsIndexed()) {
const Id index = AsInt(Visit(meta.index));
sampler = OpAccessChain(entry.sampler_pointer_type, sampler, index);
}
return OpLoad(entry.sampler_type, sampler);
} }
Id GetTextureImage(Operation operation) { Id GetTextureImage(Operation operation) {

View File

@ -299,7 +299,7 @@ private:
u32 index{}; ///< Emulated index given for the this sampler. u32 index{}; ///< Emulated index given for the this sampler.
u32 offset{}; ///< Offset in the const buffer from where the sampler is being read. u32 offset{}; ///< Offset in the const buffer from where the sampler is being read.
u32 buffer{}; ///< Buffer where the bindless sampler is being read (unused on bound samplers). u32 buffer{}; ///< Buffer where the bindless sampler is being read (unused on bound samplers).
u32 size{}; ///< Size of the sampler if indexed. u32 size{1}; ///< Size of the sampler.
Tegra::Shader::TextureType type{}; ///< The type used to sample this texture (Texture2D, etc) Tegra::Shader::TextureType type{}; ///< The type used to sample this texture (Texture2D, etc)
bool is_array{}; ///< Whether the texture is being sampled as an array texture or not. bool is_array{}; ///< Whether the texture is being sampled as an array texture or not.