From d6da91ca90b715cb61a82004e31a71fa0aeb3f15 Mon Sep 17 00:00:00 2001 From: pineappleEA Date: Sun, 27 Aug 2023 06:23:20 +0200 Subject: [PATCH] early-access version 3838 --- README.md | 2 +- .../translate/impl/move_special_register.cpp | 3 +- src/video_core/buffer_cache/buffer_cache.h | 10 ++++++- src/video_core/dma_pusher.cpp | 28 +++++++++++++++---- src/video_core/dma_pusher.h | 5 +++- src/video_core/engines/engine_interface.h | 8 ++++++ src/video_core/engines/engine_upload.h | 8 ++++++ src/video_core/engines/kepler_compute.cpp | 20 ++++++++++++- src/video_core/engines/kepler_compute.h | 17 +++++++++++ src/video_core/engines/puller.cpp | 15 ++++++---- .../renderer_opengl/gl_rasterizer.cpp | 11 ++++++++ .../renderer_vulkan/vk_pipeline_cache.cpp | 13 +++++++++ .../renderer_vulkan/vk_rasterizer.cpp | 14 ++++++++++ .../renderer_vulkan/vk_texture_cache.cpp | 6 ---- .../vulkan_common/vulkan_wrapper.cpp | 1 + src/video_core/vulkan_common/vulkan_wrapper.h | 5 ++++ 16 files changed, 144 insertions(+), 22 deletions(-) diff --git a/README.md b/README.md index e51047940..052230f8c 100755 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ yuzu emulator early access ============= -This is the source code for early-access 3837. +This is the source code for early-access 3838. ## Legal Notice diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp index dc0be9653..48178bc00 100755 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/move_special_register.cpp @@ -161,7 +161,8 @@ enum class SpecialRegister : u64 { LOG_WARNING(Shader, "(STUBBED) SR_AFFINITY"); return ir.Imm32(0); // This is the default value hardware returns. default: - throw NotImplementedException("S2R special register {}", special_register); + LOG_CRITICAL(Shader, "(STUBBED) Special register {}", special_register); + return ir.Imm32(0); // This is the default value hardware returns. } } } // Anonymous namespace diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index d2ebc279b..9f7d72762 100755 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -295,8 +295,11 @@ std::pair BufferCache

::ObtainCPUBuffer( MarkWrittenBuffer(buffer_id, cpu_addr, size); break; case ObtainBufferOperation::DiscardWrite: { - IntervalType interval{cpu_addr, size}; + VAddr cpu_addr_start = Common::AlignDown(cpu_addr, 64); + VAddr cpu_addr_end = Common::AlignUp(cpu_addr + size, 64); + IntervalType interval{cpu_addr_start, cpu_addr_end}; ClearDownload(interval); + common_ranges.subtract(interval); break; } default: @@ -1165,6 +1168,11 @@ void BufferCache

::UpdateDrawIndirect() { .size = static_cast(size), .buffer_id = FindBuffer(*cpu_addr, static_cast(size)), }; + VAddr cpu_addr_start = Common::AlignDown(*cpu_addr, 64); + VAddr cpu_addr_end = Common::AlignUp(*cpu_addr + size, 64); + IntervalType interval{cpu_addr_start, cpu_addr_end}; + ClearDownload(interval); + common_ranges.subtract(interval); }; if (current_draw_indirect->include_count) { update(current_draw_indirect->count_start_address, sizeof(u32), diff --git a/src/video_core/dma_pusher.cpp b/src/video_core/dma_pusher.cpp index 3ff55f60e..d3b701102 100755 --- a/src/video_core/dma_pusher.cpp +++ b/src/video_core/dma_pusher.cpp @@ -14,6 +14,7 @@ namespace Tegra { constexpr u32 MacroRegistersStart = 0xE00; +constexpr u32 ComputeInline = 0x6D; DmaPusher::DmaPusher(Core::System& system_, GPU& gpu_, MemoryManager& memory_manager_, Control::ChannelState& channel_state_) @@ -83,20 +84,35 @@ bool DmaPusher::Step() { dma_state.dma_get, command_list_header.size * sizeof(u32)); } } - if (Settings::IsGPULevelHigh() && dma_state.method < MacroRegistersStart) { + const auto safe_process = [&] { Core::Memory::GpuGuestMemory headers(memory_manager, dma_state.dma_get, command_list_header.size, &command_headers); ProcessCommands(headers); + }; + const auto unsafe_process = [&] { + Core::Memory::GpuGuestMemory + headers(memory_manager, dma_state.dma_get, command_list_header.size, + &command_headers); + ProcessCommands(headers); + }; + if (Settings::IsGPULevelHigh()) { + if (dma_state.method >= MacroRegistersStart) { + unsafe_process(); + return true; + } + if (subchannel_type[dma_state.subchannel] == Engines::EngineTypes::KeplerCompute && + dma_state.method == ComputeInline) { + unsafe_process(); + return true; + } + safe_process(); return true; } - Core::Memory::GpuGuestMemory - headers(memory_manager, dma_state.dma_get, command_list_header.size, &command_headers); - ProcessCommands(headers); + unsafe_process(); } - return true; } diff --git a/src/video_core/dma_pusher.h b/src/video_core/dma_pusher.h index 44a0df467..981fcffbe 100755 --- a/src/video_core/dma_pusher.h +++ b/src/video_core/dma_pusher.h @@ -130,8 +130,10 @@ public: void DispatchCalls(); - void BindSubchannel(Engines::EngineInterface* engine, u32 subchannel_id) { + void BindSubchannel(Engines::EngineInterface* engine, u32 subchannel_id, + Engines::EngineTypes engine_type) { subchannels[subchannel_id] = engine; + subchannel_type[subchannel_id] = engine_type; } void BindRasterizer(VideoCore::RasterizerInterface* rasterizer); @@ -170,6 +172,7 @@ private: const bool ib_enable{true}; ///< IB mode enabled std::array subchannels{}; + std::array subchannel_type; GPU& gpu; Core::System& system; diff --git a/src/video_core/engines/engine_interface.h b/src/video_core/engines/engine_interface.h index fbe666ac2..9c9998ed4 100755 --- a/src/video_core/engines/engine_interface.h +++ b/src/video_core/engines/engine_interface.h @@ -11,6 +11,14 @@ namespace Tegra::Engines { +enum class EngineTypes : u32 { + KeplerCompute, + Maxwell3D, + Fermi2D, + MaxwellDMA, + KeplerMemory, +}; + class EngineInterface { public: virtual ~EngineInterface() = default; diff --git a/src/video_core/engines/engine_upload.h b/src/video_core/engines/engine_upload.h index 9cf7fd6fd..a487b5b65 100755 --- a/src/video_core/engines/engine_upload.h +++ b/src/video_core/engines/engine_upload.h @@ -69,6 +69,14 @@ public: /// Binds a rasterizer to this engine. void BindRasterizer(VideoCore::RasterizerInterface* rasterizer); + GPUVAddr ExecTargetAddress() const { + return regs.dest.Address(); + } + + u32 GetUploadSize() const { + return copy_size; + } + private: void ProcessData(std::span read_buffer); diff --git a/src/video_core/engines/kepler_compute.cpp b/src/video_core/engines/kepler_compute.cpp index e1de1042c..670dc87be 100755 --- a/src/video_core/engines/kepler_compute.cpp +++ b/src/video_core/engines/kepler_compute.cpp @@ -43,16 +43,33 @@ void KeplerCompute::CallMethod(u32 method, u32 method_argument, bool is_last_cal switch (method) { case KEPLER_COMPUTE_REG_INDEX(exec_upload): { + UploadInfo info{.upload_address = upload_address, + .exec_address = upload_state.ExecTargetAddress(), + .copy_size = upload_state.GetUploadSize()}; + uploads.push_back(info); upload_state.ProcessExec(regs.exec_upload.linear != 0); break; } case KEPLER_COMPUTE_REG_INDEX(data_upload): { + upload_address = current_dma_segment; upload_state.ProcessData(method_argument, is_last_call); break; } - case KEPLER_COMPUTE_REG_INDEX(launch): + case KEPLER_COMPUTE_REG_INDEX(launch): { + const GPUVAddr launch_desc_loc = regs.launch_desc_loc.Address(); + + for (auto& data : uploads) { + const GPUVAddr offset = data.exec_address - launch_desc_loc; + if (offset / sizeof(u32) == LAUNCH_REG_INDEX(grid_dim_x) && + memory_manager.IsMemoryDirty(data.upload_address, data.copy_size)) { + indirect_compute = {data.upload_address}; + } + } + uploads.clear(); ProcessLaunch(); + indirect_compute = std::nullopt; break; + } default: break; } @@ -62,6 +79,7 @@ void KeplerCompute::CallMultiMethod(u32 method, const u32* base_start, u32 amoun u32 methods_pending) { switch (method) { case KEPLER_COMPUTE_REG_INDEX(data_upload): + upload_address = current_dma_segment; upload_state.ProcessData(base_start, amount); return; default: diff --git a/src/video_core/engines/kepler_compute.h b/src/video_core/engines/kepler_compute.h index 5d819dd49..e5dd216cb 100755 --- a/src/video_core/engines/kepler_compute.h +++ b/src/video_core/engines/kepler_compute.h @@ -5,6 +5,7 @@ #include #include +#include #include #include "common/bit_field.h" #include "common/common_funcs.h" @@ -36,6 +37,9 @@ namespace Tegra::Engines { #define KEPLER_COMPUTE_REG_INDEX(field_name) \ (offsetof(Tegra::Engines::KeplerCompute::Regs, field_name) / sizeof(u32)) +#define LAUNCH_REG_INDEX(field_name) \ + (offsetof(Tegra::Engines::KeplerCompute::LaunchParams, field_name) / sizeof(u32)) + class KeplerCompute final : public EngineInterface { public: explicit KeplerCompute(Core::System& system, MemoryManager& memory_manager); @@ -201,6 +205,10 @@ public: void CallMultiMethod(u32 method, const u32* base_start, u32 amount, u32 methods_pending) override; + std::optional GetIndirectComputeAddress() const { + return indirect_compute; + } + private: void ProcessLaunch(); @@ -216,6 +224,15 @@ private: MemoryManager& memory_manager; VideoCore::RasterizerInterface* rasterizer = nullptr; Upload::State upload_state; + GPUVAddr upload_address; + + struct UploadInfo { + GPUVAddr upload_address; + GPUVAddr exec_address; + u32 copy_size; + }; + std::vector uploads; + std::optional indirect_compute{}; }; #define ASSERT_REG_POSITION(field_name, position) \ diff --git a/src/video_core/engines/puller.cpp b/src/video_core/engines/puller.cpp index 3fe06cc88..79d84d662 100755 --- a/src/video_core/engines/puller.cpp +++ b/src/video_core/engines/puller.cpp @@ -34,19 +34,24 @@ void Puller::ProcessBindMethod(const MethodCall& method_call) { bound_engines[method_call.subchannel] = engine_id; switch (engine_id) { case EngineID::FERMI_TWOD_A: - dma_pusher.BindSubchannel(channel_state.fermi_2d.get(), method_call.subchannel); + dma_pusher.BindSubchannel(channel_state.fermi_2d.get(), method_call.subchannel, + EngineTypes::Fermi2D); break; case EngineID::MAXWELL_B: - dma_pusher.BindSubchannel(channel_state.maxwell_3d.get(), method_call.subchannel); + dma_pusher.BindSubchannel(channel_state.maxwell_3d.get(), method_call.subchannel, + EngineTypes::Maxwell3D); break; case EngineID::KEPLER_COMPUTE_B: - dma_pusher.BindSubchannel(channel_state.kepler_compute.get(), method_call.subchannel); + dma_pusher.BindSubchannel(channel_state.kepler_compute.get(), method_call.subchannel, + EngineTypes::KeplerCompute); break; case EngineID::MAXWELL_DMA_COPY_A: - dma_pusher.BindSubchannel(channel_state.maxwell_dma.get(), method_call.subchannel); + dma_pusher.BindSubchannel(channel_state.maxwell_dma.get(), method_call.subchannel, + EngineTypes::MaxwellDMA); break; case EngineID::KEPLER_INLINE_TO_MEMORY_B: - dma_pusher.BindSubchannel(channel_state.kepler_memory.get(), method_call.subchannel); + dma_pusher.BindSubchannel(channel_state.kepler_memory.get(), method_call.subchannel, + EngineTypes::KeplerMemory); break; default: UNIMPLEMENTED_MSG("Unimplemented engine {:04X}", engine_id); diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 2cc52003f..a85c6c34b 100755 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -380,6 +380,17 @@ void RasterizerOpenGL::DispatchCompute() { pipeline->SetEngine(kepler_compute, gpu_memory); pipeline->Configure(); const auto& qmd{kepler_compute->launch_description}; + auto indirect_address = kepler_compute->GetIndirectComputeAddress(); + if (indirect_address) { + // DispatchIndirect + static constexpr auto sync_info = VideoCommon::ObtainBufferSynchronize::FullSynchronize; + const auto post_op = VideoCommon::ObtainBufferOperation::DiscardWrite; + const auto [buffer, offset] = + buffer_cache.ObtainBuffer(*indirect_address, 12, sync_info, post_op); + glBindBuffer(GL_DISPATCH_INDIRECT_BUFFER, buffer->Handle()); + glDispatchComputeIndirect(static_cast(offset)); + return; + } glDispatchCompute(qmd.grid_dim_x, qmd.grid_dim_y, qmd.grid_dim_z); ++num_queued_commands; has_written_global_memory |= pipeline->WritesGlobalMemory(); diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 7659b355a..ba38d5434 100755 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -665,6 +665,19 @@ std::unique_ptr PipelineCache::CreateGraphicsPipeline( std::move(modules), infos); } catch (const Shader::Exception& exception) { + auto hash = key.Hash(); + size_t env_index{0}; + for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { + if (key.unique_hashes[index] == 0) { + continue; + } + Shader::Environment& env{*envs[env_index]}; + ++env_index; + + const u32 cfg_offset{static_cast(env.StartAddress() + sizeof(Shader::ProgramHeader))}; + Shader::Maxwell::Flow::CFG cfg(env, pools.flow_block, cfg_offset, index == 0); + env.Dump(hash, key.unique_hashes[index]); + } LOG_ERROR(Render_Vulkan, "{}", exception.what()); return nullptr; } diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 64cd81864..b25511237 100755 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -456,6 +456,20 @@ void RasterizerVulkan::DispatchCompute() { pipeline->Configure(*kepler_compute, *gpu_memory, scheduler, buffer_cache, texture_cache); const auto& qmd{kepler_compute->launch_description}; + auto indirect_address = kepler_compute->GetIndirectComputeAddress(); + if (indirect_address) { + // DispatchIndirect + static constexpr auto sync_info = VideoCommon::ObtainBufferSynchronize::FullSynchronize; + const auto post_op = VideoCommon::ObtainBufferOperation::DiscardWrite; + const auto [buffer, offset] = + buffer_cache.ObtainBuffer(*indirect_address, 12, sync_info, post_op); + scheduler.RequestOutsideRenderPassOperationContext(); + scheduler.Record([indirect_buffer = buffer->Handle(), + indirect_offset = offset](vk::CommandBuffer cmdbuf) { + cmdbuf.DispatchIndirect(indirect_buffer, indirect_offset); + }); + return; + } const std::array dim{qmd.grid_dim_x, qmd.grid_dim_y, qmd.grid_dim_z}; scheduler.RequestOutsideRenderPassOperationContext(); scheduler.Record([dim](vk::CommandBuffer cmdbuf) { cmdbuf.Dispatch(dim[0], dim[1], dim[2]); }); diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp index 8f86597ef..7ac73d41b 100755 --- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp @@ -1193,12 +1193,6 @@ void TextureCacheRuntime::CopyImage(Image& dst, Image& src, const VkImageAspectFlags aspect_mask = dst.AspectMask(); ASSERT(aspect_mask == src.AspectMask()); - if (VideoCore::Surface::BytesPerBlock(src.info.format) != - VideoCore::Surface::BytesPerBlock(dst.info.format)) { - ReinterpretImage(dst, src, copies); - return; - } - std::ranges::transform(copies, vk_copies.begin(), [aspect_mask](const auto& copy) { return MakeImageCopy(copy, aspect_mask); }); diff --git a/src/video_core/vulkan_common/vulkan_wrapper.cpp b/src/video_core/vulkan_common/vulkan_wrapper.cpp index bde632acb..83b96ab7f 100755 --- a/src/video_core/vulkan_common/vulkan_wrapper.cpp +++ b/src/video_core/vulkan_common/vulkan_wrapper.cpp @@ -94,6 +94,7 @@ void Load(VkDevice device, DeviceDispatch& dld) noexcept { X(vkCmdCopyImageToBuffer); X(vkCmdCopyQueryPoolResults); X(vkCmdDispatch); + X(vkCmdDispatchIndirect); X(vkCmdDraw); X(vkCmdDrawIndexed); X(vkCmdDrawIndirect); diff --git a/src/video_core/vulkan_common/vulkan_wrapper.h b/src/video_core/vulkan_common/vulkan_wrapper.h index e848f8f7f..606ffb359 100755 --- a/src/video_core/vulkan_common/vulkan_wrapper.h +++ b/src/video_core/vulkan_common/vulkan_wrapper.h @@ -205,6 +205,7 @@ struct DeviceDispatch : InstanceDispatch { PFN_vkCmdCopyImageToBuffer vkCmdCopyImageToBuffer{}; PFN_vkCmdCopyQueryPoolResults vkCmdCopyQueryPoolResults{}; PFN_vkCmdDispatch vkCmdDispatch{}; + PFN_vkCmdDispatchIndirect vkCmdDispatchIndirect{}; PFN_vkCmdDraw vkCmdDraw{}; PFN_vkCmdDrawIndexed vkCmdDrawIndexed{}; PFN_vkCmdDrawIndirect vkCmdDrawIndirect{}; @@ -1220,6 +1221,10 @@ public: dld->vkCmdDispatch(handle, x, y, z); } + void DispatchIndirect(VkBuffer indirect_buffer, VkDeviceSize offset) const noexcept { + dld->vkCmdDispatchIndirect(handle, indirect_buffer, offset); + } + void PipelineBarrier(VkPipelineStageFlags src_stage_mask, VkPipelineStageFlags dst_stage_mask, VkDependencyFlags dependency_flags, Span memory_barriers, Span buffer_barriers,