From d0f8ea1640011003e627a6c8f9df45005682db78 Mon Sep 17 00:00:00 2001 From: pineappleEA Date: Mon, 15 Jan 2024 21:59:08 +0100 Subject: [PATCH] early-access version 4066 --- README.md | 2 +- .../app/src/main/res/values/arrays.xml | 4 +- src/core/arm/nce/patcher.cpp | 83 ++++++++++++------- src/core/arm/nce/patcher.h | 27 +++--- src/core/hle/kernel/k_process.cpp | 4 +- .../loader/deconstructed_rom_directory.cpp | 75 +++++++++++++---- src/core/loader/nso.cpp | 41 ++++----- src/core/loader/nso.h | 3 +- .../backend/spirv/emit_spirv_memory.cpp | 40 +++++++-- .../backend/spirv/spirv_emit_context.cpp | 51 ++++++++++++ .../backend/spirv/spirv_emit_context.h | 3 + 11 files changed, 243 insertions(+), 90 deletions(-) diff --git a/README.md b/README.md index 0b3911817..920a068df 100755 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ yuzu emulator early access ============= -This is the source code for early-access 4065. +This is the source code for early-access 4066. ## Legal Notice diff --git a/src/android/app/src/main/res/values/arrays.xml b/src/android/app/src/main/res/values/arrays.xml index 0363ff3b6..78e855bde 100755 --- a/src/android/app/src/main/res/values/arrays.xml +++ b/src/android/app/src/main/res/values/arrays.xml @@ -228,10 +228,10 @@ R ZL ZR - @string/gamepad_left_stick - @string/gamepad_right_stick L3 R3 + @string/gamepad_left_stick + @string/gamepad_right_stick @string/gamepad_d_pad diff --git a/src/core/arm/nce/patcher.cpp b/src/core/arm/nce/patcher.cpp index 47a7a8880..c7285e3a0 100755 --- a/src/core/arm/nce/patcher.cpp +++ b/src/core/arm/nce/patcher.cpp @@ -22,14 +22,10 @@ using NativeExecutionParameters = Kernel::KThread::NativeExecutionParameters; constexpr size_t MaxRelativeBranch = 128_MiB; constexpr u32 ModuleCodeIndex = 0x24 / sizeof(u32); -Patcher::Patcher() : c(m_patch_instructions) {} - -Patcher::~Patcher() = default; - -void Patcher::PatchText(const Kernel::PhysicalMemory& program_image, - const Kernel::CodeSet::Segment& code) { - // Branch to the first instruction of the module. - this->BranchToModule(0); +Patcher::Patcher() : c(m_patch_instructions) { + // The first word of the patch section is always a branch to the first instruction of the + // module. + c.dw(0); // Write save context helper function. c.l(m_save_context); @@ -38,6 +34,25 @@ void Patcher::PatchText(const Kernel::PhysicalMemory& program_image, // Write load context helper function. c.l(m_load_context); WriteLoadContext(); +} + +Patcher::~Patcher() = default; + +bool Patcher::PatchText(const Kernel::PhysicalMemory& program_image, + const Kernel::CodeSet::Segment& code) { + // If we have patched modules but cannot reach the new module, then it needs its own patcher. + const size_t image_size = program_image.size(); + if (total_program_size + image_size > MaxRelativeBranch && total_program_size > 0) { + return false; + } + + // Add a new module patch to our list + modules.emplace_back(); + curr_patch = &modules.back(); + + // The first word of the patch section is always a branch to the first instruction of the + // module. + curr_patch->m_branch_to_module_relocations.push_back({0, 0}); // Retrieve text segment data. const auto text = std::span{program_image}.subspan(code.offset, code.size); @@ -94,16 +109,17 @@ void Patcher::PatchText(const Kernel::PhysicalMemory& program_image, } if (auto exclusive = Exclusive{inst}; exclusive.Verify()) { - m_exclusives.push_back(i); + curr_patch->m_exclusives.push_back(i); } } // Determine patching mode for the final relocation step - const size_t image_size = program_image.size(); + total_program_size += image_size; this->mode = image_size > MaxRelativeBranch ? PatchMode::PreText : PatchMode::PostData; + return true; } -void Patcher::RelocateAndCopy(Common::ProcessAddress load_base, +bool Patcher::RelocateAndCopy(Common::ProcessAddress load_base, const Kernel::CodeSet::Segment& code, Kernel::PhysicalMemory& program_image, EntryTrampolines* out_trampolines) { @@ -120,7 +136,7 @@ void Patcher::RelocateAndCopy(Common::ProcessAddress load_base, if (mode == PatchMode::PreText) { rc.B(rel.patch_offset - patch_size - rel.module_offset); } else { - rc.B(image_size - rel.module_offset + rel.patch_offset); + rc.B(total_program_size - rel.module_offset + rel.patch_offset); } }; @@ -129,7 +145,7 @@ void Patcher::RelocateAndCopy(Common::ProcessAddress load_base, if (mode == PatchMode::PreText) { rc.B(patch_size - rel.patch_offset + rel.module_offset); } else { - rc.B(rel.module_offset - image_size - rel.patch_offset); + rc.B(rel.module_offset - total_program_size - rel.patch_offset); } }; @@ -137,7 +153,7 @@ void Patcher::RelocateAndCopy(Common::ProcessAddress load_base, if (mode == PatchMode::PreText) { return GetInteger(load_base) + patch_offset; } else { - return GetInteger(load_base) + image_size + patch_offset; + return GetInteger(load_base) + total_program_size + patch_offset; } }; @@ -150,39 +166,50 @@ void Patcher::RelocateAndCopy(Common::ProcessAddress load_base, }; // We are now ready to relocate! - for (const Relocation& rel : m_branch_to_patch_relocations) { + auto& patch = modules[m_relocate_module_index++]; + for (const Relocation& rel : patch.m_branch_to_patch_relocations) { ApplyBranchToPatchRelocation(text_words.data() + rel.module_offset / sizeof(u32), rel); } - for (const Relocation& rel : m_branch_to_module_relocations) { + for (const Relocation& rel : patch.m_branch_to_module_relocations) { ApplyBranchToModuleRelocation(m_patch_instructions.data() + rel.patch_offset / sizeof(u32), rel); } // Rewrite PC constants and record post trampolines - for (const Relocation& rel : m_write_module_pc_relocations) { + for (const Relocation& rel : patch.m_write_module_pc_relocations) { oaknut::CodeGenerator rc{m_patch_instructions.data() + rel.patch_offset / sizeof(u32)}; rc.dx(RebasePc(rel.module_offset)); } - for (const Trampoline& rel : m_trampolines) { + for (const Trampoline& rel : patch.m_trampolines) { out_trampolines->insert({RebasePc(rel.module_offset), RebasePatch(rel.patch_offset)}); } // Cortex-A57 seems to treat all exclusives as ordered, but newer processors do not. // Convert to ordered to preserve this assumption. - for (const ModuleTextAddress i : m_exclusives) { + for (const ModuleTextAddress i : patch.m_exclusives) { auto exclusive = Exclusive{text_words[i]}; text_words[i] = exclusive.AsOrdered(); } - // Copy to program image - if (this->mode == PatchMode::PreText) { - std::memcpy(program_image.data(), m_patch_instructions.data(), - m_patch_instructions.size() * sizeof(u32)); - } else { - program_image.resize(image_size + patch_size); - std::memcpy(program_image.data() + image_size, m_patch_instructions.data(), - m_patch_instructions.size() * sizeof(u32)); + // Remove the patched module size from the total. This is done so total_program_size + // always represents the distance from the currently patched module to the patch section. + total_program_size -= image_size; + + // Only copy to the program image of the last module + if (m_relocate_module_index == modules.size()) { + if (this->mode == PatchMode::PreText) { + ASSERT(image_size == total_program_size); + std::memcpy(program_image.data(), m_patch_instructions.data(), + m_patch_instructions.size() * sizeof(u32)); + } else { + program_image.resize(image_size + patch_size); + std::memcpy(program_image.data() + image_size, m_patch_instructions.data(), + m_patch_instructions.size() * sizeof(u32)); + } + return true; } + + return false; } size_t Patcher::GetSectionSize() const noexcept { @@ -322,7 +349,7 @@ void Patcher::WriteSvcTrampoline(ModuleDestLabel module_dest, u32 svc_id) { // Write the post-SVC trampoline address, which will jump back to the guest after restoring its // state. - m_trampolines.push_back({c.offset(), module_dest}); + curr_patch->m_trampolines.push_back({c.offset(), module_dest}); // Host called this location. Save the return address so we can // unwind the stack properly when jumping back. diff --git a/src/core/arm/nce/patcher.h b/src/core/arm/nce/patcher.h index c6d1608c1..a44f385e2 100755 --- a/src/core/arm/nce/patcher.h +++ b/src/core/arm/nce/patcher.h @@ -31,9 +31,9 @@ public: explicit Patcher(); ~Patcher(); - void PatchText(const Kernel::PhysicalMemory& program_image, + bool PatchText(const Kernel::PhysicalMemory& program_image, const Kernel::CodeSet::Segment& code); - void RelocateAndCopy(Common::ProcessAddress load_base, const Kernel::CodeSet::Segment& code, + bool RelocateAndCopy(Common::ProcessAddress load_base, const Kernel::CodeSet::Segment& code, Kernel::PhysicalMemory& program_image, EntryTrampolines* out_trampolines); size_t GetSectionSize() const noexcept; @@ -61,16 +61,16 @@ private: private: void BranchToPatch(uintptr_t module_dest) { - m_branch_to_patch_relocations.push_back({c.offset(), module_dest}); + curr_patch->m_branch_to_patch_relocations.push_back({c.offset(), module_dest}); } void BranchToModule(uintptr_t module_dest) { - m_branch_to_module_relocations.push_back({c.offset(), module_dest}); + curr_patch->m_branch_to_module_relocations.push_back({c.offset(), module_dest}); c.dw(0); } void WriteModulePc(uintptr_t module_dest) { - m_write_module_pc_relocations.push_back({c.offset(), module_dest}); + curr_patch->m_write_module_pc_relocations.push_back({c.offset(), module_dest}); c.dx(0); } @@ -84,15 +84,22 @@ private: uintptr_t module_offset; ///< Offset in bytes from the start of the text section. }; + struct ModulePatch { + std::vector m_trampolines; + std::vector m_branch_to_patch_relocations{}; + std::vector m_branch_to_module_relocations{}; + std::vector m_write_module_pc_relocations{}; + std::vector m_exclusives{}; + }; + oaknut::VectorCodeGenerator c; - std::vector m_trampolines; - std::vector m_branch_to_patch_relocations{}; - std::vector m_branch_to_module_relocations{}; - std::vector m_write_module_pc_relocations{}; - std::vector m_exclusives{}; oaknut::Label m_save_context{}; oaknut::Label m_load_context{}; PatchMode mode{PatchMode::None}; + size_t total_program_size{}; + size_t m_relocate_module_index{}; + std::vector modules; + ModulePatch* curr_patch; }; } // namespace Core::NCE diff --git a/src/core/hle/kernel/k_process.cpp b/src/core/hle/kernel/k_process.cpp index 850ecc4c0..6cad999aa 100755 --- a/src/core/hle/kernel/k_process.cpp +++ b/src/core/hle/kernel/k_process.cpp @@ -1239,10 +1239,10 @@ void KProcess::LoadModule(CodeSet code_set, KProcessAddress base_addr) { ReprotectSegment(code_set.DataSegment(), Svc::MemoryPermission::ReadWrite); #ifdef HAS_NCE - if (this->IsApplication() && Settings::IsNceEnabled()) { + const auto& patch = code_set.PatchSegment(); + if (this->IsApplication() && Settings::IsNceEnabled() && patch.size != 0) { auto& buffer = m_kernel.System().DeviceMemory().buffer; const auto& code = code_set.CodeSegment(); - const auto& patch = code_set.PatchSegment(); buffer.Protect(GetInteger(base_addr + code.addr), code.size, Common::MemoryPermission::Read | Common::MemoryPermission::Execute); buffer.Protect(GetInteger(base_addr + patch.addr), patch.size, diff --git a/src/core/loader/deconstructed_rom_directory.cpp b/src/core/loader/deconstructed_rom_directory.cpp index 16795902a..f1e2e70f2 100755 --- a/src/core/loader/deconstructed_rom_directory.cpp +++ b/src/core/loader/deconstructed_rom_directory.cpp @@ -19,8 +19,54 @@ #include "core/arm/nce/patcher.h" #endif +#ifndef HAS_NCE +namespace Core::NCE { +class Patcher {}; +} // namespace Core::NCE +#endif + namespace Loader { +struct PatchCollection { + explicit PatchCollection(bool is_application_) : is_application{is_application_} { + module_patcher_indices.fill(-1); + patchers.emplace_back(); + } + + std::vector* GetPatchers() { + if (is_application && Settings::IsNceEnabled()) { + return &patchers; + } + return nullptr; + } + + size_t GetTotalPatchSize() const { + size_t total_size{}; +#ifdef HAS_NCE + for (auto& patcher : patchers) { + total_size += patcher.GetSectionSize(); + } +#endif + return total_size; + } + + void SaveIndex(size_t module) { + module_patcher_indices[module] = static_cast(patchers.size() - 1); + } + + s32 GetIndex(size_t module) const { + return module_patcher_indices[module]; + } + + s32 GetLastIndex() const { + return static_cast(patchers.size()) - 1; + } + + bool is_application; + std::vector patchers; + std::array module_patcher_indices{}; +}; + AppLoader_DeconstructedRomDirectory::AppLoader_DeconstructedRomDirectory(FileSys::VirtualFile file_, bool override_update_) : AppLoader(std::move(file_)), override_update(override_update_), is_hbl(false) { @@ -142,18 +188,7 @@ AppLoader_DeconstructedRomDirectory::LoadResult AppLoader_DeconstructedRomDirect std::size_t code_size{}; // Define an nce patch context for each potential module. -#ifdef HAS_NCE - std::array module_patchers; -#endif - - const auto GetPatcher = [&](size_t i) -> Core::NCE::Patcher* { -#ifdef HAS_NCE - if (is_application && Settings::IsNceEnabled()) { - return &module_patchers[i]; - } -#endif - return nullptr; - }; + PatchCollection patch_ctx{is_application}; // Use the NSO module loader to figure out the code layout for (size_t i = 0; i < static_modules.size(); i++) { @@ -164,13 +199,14 @@ AppLoader_DeconstructedRomDirectory::LoadResult AppLoader_DeconstructedRomDirect } const bool should_pass_arguments = std::strcmp(module, "rtld") == 0; - const auto tentative_next_load_addr = - AppLoader_NSO::LoadModule(process, system, *module_file, code_size, - should_pass_arguments, false, {}, GetPatcher(i)); + const auto tentative_next_load_addr = AppLoader_NSO::LoadModule( + process, system, *module_file, code_size, should_pass_arguments, false, {}, + patch_ctx.GetPatchers(), patch_ctx.GetLastIndex()); if (!tentative_next_load_addr) { return {ResultStatus::ErrorLoadingNSO, {}}; } + patch_ctx.SaveIndex(i); code_size = *tentative_next_load_addr; } @@ -184,6 +220,9 @@ AppLoader_DeconstructedRomDirectory::LoadResult AppLoader_DeconstructedRomDirect return 0; }(); + // Add patch size to the total module size + code_size += patch_ctx.GetTotalPatchSize(); + // Setup the process code layout if (process.LoadFromMetadata(metadata, code_size, fastmem_base, is_hbl).IsError()) { return {ResultStatus::ErrorUnableToParseKernelMetadata, {}}; @@ -204,9 +243,9 @@ AppLoader_DeconstructedRomDirectory::LoadResult AppLoader_DeconstructedRomDirect const VAddr load_addr{next_load_addr}; const bool should_pass_arguments = std::strcmp(module, "rtld") == 0; - const auto tentative_next_load_addr = - AppLoader_NSO::LoadModule(process, system, *module_file, load_addr, - should_pass_arguments, true, pm, GetPatcher(i)); + const auto tentative_next_load_addr = AppLoader_NSO::LoadModule( + process, system, *module_file, load_addr, should_pass_arguments, true, pm, + patch_ctx.GetPatchers(), patch_ctx.GetIndex(i)); if (!tentative_next_load_addr) { return {ResultStatus::ErrorLoadingNSO, {}}; } diff --git a/src/core/loader/nso.cpp b/src/core/loader/nso.cpp index 94434a5d0..5ba6dd4df 100755 --- a/src/core/loader/nso.cpp +++ b/src/core/loader/nso.cpp @@ -77,7 +77,8 @@ std::optional AppLoader_NSO::LoadModule(Kernel::KProcess& process, Core:: const FileSys::VfsFile& nso_file, VAddr load_base, bool should_pass_arguments, bool load_into_process, std::optional pm, - Core::NCE::Patcher* patch) { + std::vector* patches, + s32 patch_index) { if (nso_file.GetSize() < sizeof(NSOHeader)) { return std::nullopt; } @@ -94,8 +95,11 @@ std::optional AppLoader_NSO::LoadModule(Kernel::KProcess& process, Core:: // Allocate some space at the beginning if we are patching in PreText mode. const size_t module_start = [&]() -> size_t { #ifdef HAS_NCE - if (patch && patch->GetPatchMode() == Core::NCE::PatchMode::PreText) { - return patch->GetSectionSize(); + if (patches && load_into_process) { + auto* patch = &patches->operator[](patch_index); + if (patch->GetPatchMode() == Core::NCE::PatchMode::PreText) { + return patch->GetSectionSize(); + } } #endif return 0; @@ -160,27 +164,24 @@ std::optional AppLoader_NSO::LoadModule(Kernel::KProcess& process, Core:: #ifdef HAS_NCE // If we are computing the process code layout and using nce backend, patch. const auto& code = codeset.CodeSegment(); - if (patch && patch->GetPatchMode() == Core::NCE::PatchMode::None) { + auto* patch = patches ? &patches->operator[](patch_index) : nullptr; + if (patch && !load_into_process) { // Patch SVCs and MRS calls in the guest code - patch->PatchText(program_image, code); - - // Add patch section size to the module size. - image_size += static_cast(patch->GetSectionSize()); + while (!patch->PatchText(program_image, code)) { + patch = &patches->emplace_back(); + } } else if (patch) { // Relocate code patch and copy to the program_image. - patch->RelocateAndCopy(load_base, code, program_image, &process.GetPostHandlers()); - - // Update patch section. - auto& patch_segment = codeset.PatchSegment(); - patch_segment.addr = - patch->GetPatchMode() == Core::NCE::PatchMode::PreText ? 0 : image_size; - patch_segment.size = static_cast(patch->GetSectionSize()); - - // Add patch section size to the module size. In PreText mode image_size - // already contains the patch segment as part of module_start. - if (patch->GetPatchMode() == Core::NCE::PatchMode::PostData) { - image_size += patch_segment.size; + if (patch->RelocateAndCopy(load_base, code, program_image, &process.GetPostHandlers())) { + // Update patch section. + auto& patch_segment = codeset.PatchSegment(); + patch_segment.addr = + patch->GetPatchMode() == Core::NCE::PatchMode::PreText ? 0 : image_size; + patch_segment.size = static_cast(patch->GetSectionSize()); } + + // Refresh image_size to take account the patch section if it was added by RelocateAndCopy + image_size = static_cast(program_image.size()); } #endif diff --git a/src/core/loader/nso.h b/src/core/loader/nso.h index b51801e81..e9ca067fe 100755 --- a/src/core/loader/nso.h +++ b/src/core/loader/nso.h @@ -93,7 +93,8 @@ public: const FileSys::VfsFile& nso_file, VAddr load_base, bool should_pass_arguments, bool load_into_process, std::optional pm = {}, - Core::NCE::Patcher* patch = nullptr); + std::vector* patches = nullptr, + s32 patch_index = -1); LoadResult Load(Kernel::KProcess& process, Core::System& system) override; diff --git a/src/shader_recompiler/backend/spirv/emit_spirv_memory.cpp b/src/shader_recompiler/backend/spirv/emit_spirv_memory.cpp index 33314dd13..8635f2690 100755 --- a/src/shader_recompiler/backend/spirv/emit_spirv_memory.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv_memory.cpp @@ -65,6 +65,14 @@ void WriteStorage32(EmitContext& ctx, const IR::Value& binding, const IR::Value& WriteStorage(ctx, binding, offset, value, ctx.storage_types.U32, sizeof(u32), &StorageDefinitions::U32, index_offset); } + +void WriteStorageByCasLoop(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, + Id value, Id bit_offset, Id bit_count) { + const Id pointer{StoragePointer(ctx, binding, offset, ctx.storage_types.U32, sizeof(u32), + &StorageDefinitions::U32)}; + ctx.OpFunctionCall(ctx.TypeVoid(), ctx.write_storage_cas_loop_func, pointer, value, bit_offset, + bit_count); +} } // Anonymous namespace void EmitLoadGlobalU8(EmitContext&) { @@ -219,26 +227,42 @@ Id EmitLoadStorage128(EmitContext& ctx, const IR::Value& binding, const IR::Valu void EmitWriteStorageU8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, Id value) { - WriteStorage(ctx, binding, offset, ctx.OpSConvert(ctx.U8, value), ctx.storage_types.U8, - sizeof(u8), &StorageDefinitions::U8); + if (ctx.profile.support_int8) { + WriteStorage(ctx, binding, offset, ctx.OpSConvert(ctx.U8, value), ctx.storage_types.U8, + sizeof(u8), &StorageDefinitions::U8); + } else { + WriteStorageByCasLoop(ctx, binding, offset, value, ctx.BitOffset8(offset), ctx.Const(8u)); + } } void EmitWriteStorageS8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, Id value) { - WriteStorage(ctx, binding, offset, ctx.OpSConvert(ctx.S8, value), ctx.storage_types.S8, - sizeof(s8), &StorageDefinitions::S8); + if (ctx.profile.support_int8) { + WriteStorage(ctx, binding, offset, ctx.OpSConvert(ctx.S8, value), ctx.storage_types.S8, + sizeof(s8), &StorageDefinitions::S8); + } else { + WriteStorageByCasLoop(ctx, binding, offset, value, ctx.BitOffset8(offset), ctx.Const(8u)); + } } void EmitWriteStorageU16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, Id value) { - WriteStorage(ctx, binding, offset, ctx.OpSConvert(ctx.U16, value), ctx.storage_types.U16, - sizeof(u16), &StorageDefinitions::U16); + if (ctx.profile.support_int16) { + WriteStorage(ctx, binding, offset, ctx.OpSConvert(ctx.U16, value), ctx.storage_types.U16, + sizeof(u16), &StorageDefinitions::U16); + } else { + WriteStorageByCasLoop(ctx, binding, offset, value, ctx.BitOffset16(offset), ctx.Const(16u)); + } } void EmitWriteStorageS16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, Id value) { - WriteStorage(ctx, binding, offset, ctx.OpSConvert(ctx.S16, value), ctx.storage_types.S16, - sizeof(s16), &StorageDefinitions::S16); + if (ctx.profile.support_int16) { + WriteStorage(ctx, binding, offset, ctx.OpSConvert(ctx.S16, value), ctx.storage_types.S16, + sizeof(s16), &StorageDefinitions::S16); + } else { + WriteStorageByCasLoop(ctx, binding, offset, value, ctx.BitOffset16(offset), ctx.Const(16u)); + } } void EmitWriteStorage32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp index 164095b5d..46842c2f5 100755 --- a/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp +++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.cpp @@ -480,6 +480,7 @@ EmitContext::EmitContext(const Profile& profile_, const RuntimeInfo& runtime_inf DefineTextures(program.info, texture_binding, bindings.texture_scaling_index); DefineImages(program.info, image_binding, bindings.image_scaling_index); DefineAttributeMemAccess(program.info); + DefineWriteStorageCasLoopFunction(program.info); DefineGlobalMemoryFunctions(program.info); DefineRescalingInput(program.info); DefineRenderArea(program.info); @@ -877,6 +878,56 @@ void EmitContext::DefineAttributeMemAccess(const Info& info) { } } +void EmitContext::DefineWriteStorageCasLoopFunction(const Info& info) { + if (profile.support_int8 && profile.support_int16) { + return; + } + if (!info.uses_int8 && !info.uses_int16) { + return; + } + + AddCapability(spv::Capability::VariablePointersStorageBuffer); + + const Id ptr_type{TypePointer(spv::StorageClass::StorageBuffer, U32[1])}; + const Id func_type{TypeFunction(void_id, ptr_type, U32[1], U32[1], U32[1])}; + const Id func{OpFunction(void_id, spv::FunctionControlMask::MaskNone, func_type)}; + const Id pointer{OpFunctionParameter(ptr_type)}; + const Id value{OpFunctionParameter(U32[1])}; + const Id bit_offset{OpFunctionParameter(U32[1])}; + const Id bit_count{OpFunctionParameter(U32[1])}; + + AddLabel(); + const Id scope_device{Const(1u)}; + const Id ordering_relaxed{u32_zero_value}; + const Id body_label{OpLabel()}; + const Id continue_label{OpLabel()}; + const Id endloop_label{OpLabel()}; + const Id beginloop_label{OpLabel()}; + OpBranch(beginloop_label); + + AddLabel(beginloop_label); + OpLoopMerge(endloop_label, continue_label, spv::LoopControlMask::MaskNone); + OpBranch(body_label); + + AddLabel(body_label); + const Id expected_value{OpLoad(U32[1], pointer)}; + const Id desired_value{OpBitFieldInsert(U32[1], expected_value, value, bit_offset, bit_count)}; + const Id actual_value{OpAtomicCompareExchange(U32[1], pointer, scope_device, ordering_relaxed, + ordering_relaxed, desired_value, expected_value)}; + const Id store_successful{OpIEqual(U1, expected_value, actual_value)}; + OpBranchConditional(store_successful, endloop_label, continue_label); + + AddLabel(endloop_label); + OpReturn(); + + AddLabel(continue_label); + OpBranch(beginloop_label); + + OpFunctionEnd(); + + write_storage_cas_loop_func = func; +} + void EmitContext::DefineGlobalMemoryFunctions(const Info& info) { if (!info.uses_global_memory || !profile.support_int64) { return; diff --git a/src/shader_recompiler/backend/spirv/spirv_emit_context.h b/src/shader_recompiler/backend/spirv/spirv_emit_context.h index 609e555c5..1769ed8c2 100755 --- a/src/shader_recompiler/backend/spirv/spirv_emit_context.h +++ b/src/shader_recompiler/backend/spirv/spirv_emit_context.h @@ -325,6 +325,8 @@ public: Id f32x2_min_cas{}; Id f32x2_max_cas{}; + Id write_storage_cas_loop_func{}; + Id load_global_func_u32{}; Id load_global_func_u32x2{}; Id load_global_func_u32x4{}; @@ -372,6 +374,7 @@ private: void DefineTextures(const Info& info, u32& binding, u32& scaling_index); void DefineImages(const Info& info, u32& binding, u32& scaling_index); void DefineAttributeMemAccess(const Info& info); + void DefineWriteStorageCasLoopFunction(const Info& info); void DefineGlobalMemoryFunctions(const Info& info); void DefineRescalingInput(const Info& info); void DefineRescalingInputPushConstant();