From 81e086b5ac490f41498030ee3127c086116b4af2 Mon Sep 17 00:00:00 2001 From: bunnei Date: Mon, 7 Jan 2019 21:42:32 -0500 Subject: [PATCH 01/10] core: Set is_powered_on before GPU is initialized. --- src/core/core.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/core/core.cpp b/src/core/core.cpp index 6dda20faa..11094a87a 100644 --- a/src/core/core.cpp +++ b/src/core/core.cpp @@ -129,10 +129,12 @@ struct System::Impl { return ResultStatus::ErrorVideoCore; } + is_powered_on = true; + gpu_core = std::make_unique(system, renderer->Rasterizer()); cpu_core_manager.Initialize(system); - is_powered_on = true; + LOG_DEBUG(Core, "Initialized OK"); // Reset counters and set time origin to current frame From d6015ee211b04ac948a30abbaf2f623e1d903945 Mon Sep 17 00:00:00 2001 From: bunnei Date: Mon, 7 Jan 2019 21:46:33 -0500 Subject: [PATCH 02/10] settings: Add new graphics setting for use_asynchronous_gpu_emulation. --- src/core/settings.h | 1 + src/core/telemetry_session.cpp | 2 ++ src/yuzu/configuration/config.cpp | 4 ++++ src/yuzu/configuration/configure_graphics.cpp | 4 ++++ src/yuzu/configuration/configure_graphics.ui | 7 +++++++ src/yuzu_cmd/config.cpp | 2 ++ src/yuzu_cmd/default_ini.h | 4 ++++ 7 files changed, 24 insertions(+) diff --git a/src/core/settings.h b/src/core/settings.h index 7e76e0466..cdfb2f742 100644 --- a/src/core/settings.h +++ b/src/core/settings.h @@ -393,6 +393,7 @@ struct Values { u16 frame_limit; bool use_disk_shader_cache; bool use_accurate_gpu_emulation; + bool use_asynchronous_gpu_emulation; float bg_red; float bg_green; diff --git a/src/core/telemetry_session.cpp b/src/core/telemetry_session.cpp index 58dfcc4df..e1db06811 100644 --- a/src/core/telemetry_session.cpp +++ b/src/core/telemetry_session.cpp @@ -162,6 +162,8 @@ TelemetrySession::TelemetrySession() { Settings::values.use_disk_shader_cache); AddField(Telemetry::FieldType::UserConfig, "Renderer_UseAccurateGpuEmulation", Settings::values.use_accurate_gpu_emulation); + AddField(Telemetry::FieldType::UserConfig, "Renderer_UseAsynchronousGpuEmulation", + Settings::values.use_asynchronous_gpu_emulation); AddField(Telemetry::FieldType::UserConfig, "System_UseDockedMode", Settings::values.use_docked_mode); } diff --git a/src/yuzu/configuration/config.cpp b/src/yuzu/configuration/config.cpp index e9546dadf..74dc6bb28 100644 --- a/src/yuzu/configuration/config.cpp +++ b/src/yuzu/configuration/config.cpp @@ -374,6 +374,8 @@ void Config::ReadValues() { qt_config->value("use_disk_shader_cache", false).toBool(); Settings::values.use_accurate_gpu_emulation = qt_config->value("use_accurate_gpu_emulation", false).toBool(); + Settings::values.use_asynchronous_gpu_emulation = + qt_config->value("use_asynchronous_gpu_emulation", false).toBool(); Settings::values.bg_red = qt_config->value("bg_red", 0.0).toFloat(); Settings::values.bg_green = qt_config->value("bg_green", 0.0).toFloat(); @@ -633,6 +635,8 @@ void Config::SaveValues() { qt_config->setValue("frame_limit", Settings::values.frame_limit); qt_config->setValue("use_disk_shader_cache", Settings::values.use_disk_shader_cache); qt_config->setValue("use_accurate_gpu_emulation", Settings::values.use_accurate_gpu_emulation); + qt_config->setValue("use_asynchronous_gpu_emulation", + Settings::values.use_asynchronous_gpu_emulation); // Cast to double because Qt's written float values are not human-readable qt_config->setValue("bg_red", (double)Settings::values.bg_red); diff --git a/src/yuzu/configuration/configure_graphics.cpp b/src/yuzu/configuration/configure_graphics.cpp index 0f5dd534b..dd1d67488 100644 --- a/src/yuzu/configuration/configure_graphics.cpp +++ b/src/yuzu/configuration/configure_graphics.cpp @@ -75,6 +75,8 @@ void ConfigureGraphics::setConfiguration() { ui->frame_limit->setValue(Settings::values.frame_limit); ui->use_disk_shader_cache->setChecked(Settings::values.use_disk_shader_cache); ui->use_accurate_gpu_emulation->setChecked(Settings::values.use_accurate_gpu_emulation); + ui->use_asynchronous_gpu_emulation->setEnabled(!Core::System::GetInstance().IsPoweredOn()); + ui->use_asynchronous_gpu_emulation->setChecked(Settings::values.use_asynchronous_gpu_emulation); UpdateBackgroundColorButton(QColor::fromRgbF(Settings::values.bg_red, Settings::values.bg_green, Settings::values.bg_blue)); } @@ -86,6 +88,8 @@ void ConfigureGraphics::applyConfiguration() { Settings::values.frame_limit = ui->frame_limit->value(); Settings::values.use_disk_shader_cache = ui->use_disk_shader_cache->isChecked(); Settings::values.use_accurate_gpu_emulation = ui->use_accurate_gpu_emulation->isChecked(); + Settings::values.use_asynchronous_gpu_emulation = + ui->use_asynchronous_gpu_emulation->isChecked(); Settings::values.bg_red = static_cast(bg_color.redF()); Settings::values.bg_green = static_cast(bg_color.greenF()); Settings::values.bg_blue = static_cast(bg_color.blueF()); diff --git a/src/yuzu/configuration/configure_graphics.ui b/src/yuzu/configuration/configure_graphics.ui index 824f5810a..c6767e0ca 100644 --- a/src/yuzu/configuration/configure_graphics.ui +++ b/src/yuzu/configuration/configure_graphics.ui @@ -63,6 +63,13 @@ + + + + Use asynchronous GPU emulation + + + diff --git a/src/yuzu_cmd/config.cpp b/src/yuzu_cmd/config.cpp index ff05b3179..ca880dc65 100644 --- a/src/yuzu_cmd/config.cpp +++ b/src/yuzu_cmd/config.cpp @@ -354,6 +354,8 @@ void Config::ReadValues() { sdl2_config->GetBoolean("Renderer", "use_disk_shader_cache", false); Settings::values.use_accurate_gpu_emulation = sdl2_config->GetBoolean("Renderer", "use_accurate_gpu_emulation", false); + Settings::values.use_asynchronous_gpu_emulation = + sdl2_config->GetBoolean("Renderer", "use_asynchronous_gpu_emulation", false); Settings::values.bg_red = (float)sdl2_config->GetReal("Renderer", "bg_red", 0.0); Settings::values.bg_green = (float)sdl2_config->GetReal("Renderer", "bg_green", 0.0); diff --git a/src/yuzu_cmd/default_ini.h b/src/yuzu_cmd/default_ini.h index a81986f8e..6538af098 100644 --- a/src/yuzu_cmd/default_ini.h +++ b/src/yuzu_cmd/default_ini.h @@ -118,6 +118,10 @@ use_disk_shader_cache = # 0 (default): Off (fast), 1 : On (slow) use_accurate_gpu_emulation = +# Whether to use asynchronous GPU emulation +# 0 : Off (slow), 1 (default): On (fast) +use_asynchronous_gpu_emulation = + # The clear color for the renderer. What shows up on the sides of the bottom screen. # Must be in range of 0.0-1.0. Defaults to 1.0 for all. bg_red = From 4483089d704cd4913a748d2198359cc0cf7b32c5 Mon Sep 17 00:00:00 2001 From: bunnei Date: Mon, 7 Jan 2019 23:32:02 -0500 Subject: [PATCH 03/10] gpu: Refactor to take RendererBase instead of RasterizerInterface. --- src/core/core.cpp | 2 +- src/video_core/gpu.cpp | 5 +++-- src/video_core/gpu.h | 34 +++++++++++++++++++--------------- 3 files changed, 23 insertions(+), 18 deletions(-) diff --git a/src/core/core.cpp b/src/core/core.cpp index 11094a87a..9e5d167c3 100644 --- a/src/core/core.cpp +++ b/src/core/core.cpp @@ -131,7 +131,7 @@ struct System::Impl { is_powered_on = true; - gpu_core = std::make_unique(system, renderer->Rasterizer()); + gpu_core = std::make_unique(system, *renderer); cpu_core_manager.Initialize(system); diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp index ac30d1a89..08abf8ac9 100644 --- a/src/video_core/gpu.cpp +++ b/src/video_core/gpu.cpp @@ -12,7 +12,7 @@ #include "video_core/engines/maxwell_3d.h" #include "video_core/engines/maxwell_dma.h" #include "video_core/gpu.h" -#include "video_core/rasterizer_interface.h" +#include "video_core/renderer_base.h" namespace Tegra { @@ -28,7 +28,8 @@ u32 FramebufferConfig::BytesPerPixel(PixelFormat format) { UNREACHABLE(); } -GPU::GPU(Core::System& system, VideoCore::RasterizerInterface& rasterizer) { +GPU::GPU(Core::System& system, VideoCore::RendererBase& renderer) : renderer{renderer} { + auto& rasterizer{renderer.Rasterizer()}; memory_manager = std::make_unique(); dma_pusher = std::make_unique(*this); maxwell_3d = std::make_unique(system, rasterizer, *memory_manager); diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h index 6313702f2..ac7aec6a4 100644 --- a/src/video_core/gpu.h +++ b/src/video_core/gpu.h @@ -16,8 +16,8 @@ class System; } namespace VideoCore { -class RasterizerInterface; -} +class RendererBase; +} // namespace VideoCore namespace Tegra { @@ -121,7 +121,8 @@ enum class EngineID { class GPU final { public: - explicit GPU(Core::System& system, VideoCore::RasterizerInterface& rasterizer); + explicit GPU(Core::System& system, VideoCore::RendererBase& renderer); + ~GPU(); struct MethodCall { @@ -200,9 +201,24 @@ public: }; } regs{}; +private: + void ProcessBindMethod(const MethodCall& method_call); + void ProcessSemaphoreTriggerMethod(); + void ProcessSemaphoreRelease(); + void ProcessSemaphoreAcquire(); + + // Calls a GPU puller method. + void CallPullerMethod(const MethodCall& method_call); + // Calls a GPU engine method. + void CallEngineMethod(const MethodCall& method_call); + // Determines where the method should be executed. + bool ExecuteMethodOnEngine(const MethodCall& method_call); + private: std::unique_ptr dma_pusher; std::unique_ptr memory_manager; + + VideoCore::RendererBase& renderer; /// Mapping of command subchannels to their bound engine ids. std::array bound_engines = {}; @@ -217,18 +233,6 @@ private: std::unique_ptr maxwell_dma; /// Inline memory engine std::unique_ptr kepler_memory; - - void ProcessBindMethod(const MethodCall& method_call); - void ProcessSemaphoreTriggerMethod(); - void ProcessSemaphoreRelease(); - void ProcessSemaphoreAcquire(); - - // Calls a GPU puller method. - void CallPullerMethod(const MethodCall& method_call); - // Calls a GPU engine method. - void CallEngineMethod(const MethodCall& method_call); - // Determines where the method should be executed. - bool ExecuteMethodOnEngine(const MethodCall& method_call); }; #define ASSERT_REG_POSITION(field_name, position) \ From ac51d048a91593a3da124aeea32dc5b0898f1dd6 Mon Sep 17 00:00:00 2001 From: bunnei Date: Mon, 21 Jan 2019 15:18:09 -0500 Subject: [PATCH 04/10] gpu: Refactor command and swap buffers interface for asynch. --- .../hle/service/nvdrv/devices/nvdisp_disp0.cpp | 2 +- src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp | 14 ++------------ src/core/hle/service/nvflinger/nvflinger.cpp | 2 +- src/video_core/gpu.cpp | 10 ++++++++++ src/video_core/gpu.h | 15 ++++++++++++--- 5 files changed, 26 insertions(+), 17 deletions(-) diff --git a/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp b/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp index dbe7ee6e8..20c7c39aa 100644 --- a/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp +++ b/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp @@ -36,7 +36,7 @@ void nvdisp_disp0::flip(u32 buffer_handle, u32 offset, u32 format, u32 width, u3 auto& instance = Core::System::GetInstance(); instance.GetPerfStats().EndGameFrame(); - instance.Renderer().SwapBuffers(framebuffer); + instance.GPU().SwapBuffers(framebuffer); } } // namespace Service::Nvidia::Devices diff --git a/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp b/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp index 0a650f36c..8ce7bc7a5 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp +++ b/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp @@ -136,16 +136,6 @@ u32 nvhost_gpu::AllocateObjectContext(const std::vector& input, std::vector< return 0; } -static void PushGPUEntries(Tegra::CommandList&& entries) { - if (entries.empty()) { - return; - } - - auto& dma_pusher{Core::System::GetInstance().GPU().DmaPusher()}; - dma_pusher.Push(std::move(entries)); - dma_pusher.DispatchCalls(); -} - u32 nvhost_gpu::SubmitGPFIFO(const std::vector& input, std::vector& output) { if (input.size() < sizeof(IoctlSubmitGpfifo)) { UNIMPLEMENTED(); @@ -163,7 +153,7 @@ u32 nvhost_gpu::SubmitGPFIFO(const std::vector& input, std::vector& outp std::memcpy(entries.data(), &input[sizeof(IoctlSubmitGpfifo)], params.num_entries * sizeof(Tegra::CommandListHeader)); - PushGPUEntries(std::move(entries)); + Core::System::GetInstance().GPU().PushGPUEntries(std::move(entries)); params.fence_out.id = 0; params.fence_out.value = 0; @@ -184,7 +174,7 @@ u32 nvhost_gpu::KickoffPB(const std::vector& input, std::vector& output) Memory::ReadBlock(params.address, entries.data(), params.num_entries * sizeof(Tegra::CommandListHeader)); - PushGPUEntries(std::move(entries)); + Core::System::GetInstance().GPU().PushGPUEntries(std::move(entries)); params.fence_out.id = 0; params.fence_out.value = 0; diff --git a/src/core/hle/service/nvflinger/nvflinger.cpp b/src/core/hle/service/nvflinger/nvflinger.cpp index 56f31e2ac..fc496b654 100644 --- a/src/core/hle/service/nvflinger/nvflinger.cpp +++ b/src/core/hle/service/nvflinger/nvflinger.cpp @@ -186,7 +186,7 @@ void NVFlinger::Compose() { // There was no queued buffer to draw, render previous frame system_instance.GetPerfStats().EndGameFrame(); - system_instance.Renderer().SwapBuffers({}); + system_instance.GPU().SwapBuffers({}); continue; } diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp index 08abf8ac9..b0f3310e5 100644 --- a/src/video_core/gpu.cpp +++ b/src/video_core/gpu.cpp @@ -65,6 +65,16 @@ const DmaPusher& GPU::DmaPusher() const { return *dma_pusher; } +void GPU::PushGPUEntries(Tegra::CommandList&& entries) { + dma_pusher->Push(std::move(entries)); + dma_pusher->DispatchCalls(); +} + +void GPU::SwapBuffers( + std::optional> framebuffer) { + renderer.SwapBuffers(std::move(framebuffer)); +} + u32 RenderTargetBytesPerPixel(RenderTargetFormat format) { ASSERT(format != RenderTargetFormat::NONE); diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h index ac7aec6a4..62649bd6e 100644 --- a/src/video_core/gpu.h +++ b/src/video_core/gpu.h @@ -200,6 +200,13 @@ public: std::array reg_array; }; } regs{}; + + /// Push GPU command entries to be processed + void PushGPUEntries(Tegra::CommandList&& entries); + + /// Swap buffers (render frame) + void SwapBuffers( + std::optional> framebuffer); private: void ProcessBindMethod(const MethodCall& method_call); @@ -207,11 +214,13 @@ private: void ProcessSemaphoreRelease(); void ProcessSemaphoreAcquire(); - // Calls a GPU puller method. + /// Calls a GPU puller method. void CallPullerMethod(const MethodCall& method_call); - // Calls a GPU engine method. + + /// Calls a GPU engine method. void CallEngineMethod(const MethodCall& method_call); - // Determines where the method should be executed. + + /// Determines where the method should be executed. bool ExecuteMethodOnEngine(const MethodCall& method_call); private: From 65651078e5ae60ff14bf6d6e8e367ec7262c4120 Mon Sep 17 00:00:00 2001 From: bunnei Date: Wed, 23 Jan 2019 22:09:22 -0500 Subject: [PATCH 05/10] bootmanager: Ensure that we have a context for shader loading. --- src/yuzu/bootmanager.cpp | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/src/yuzu/bootmanager.cpp b/src/yuzu/bootmanager.cpp index 73b04b749..3b070bfbb 100644 --- a/src/yuzu/bootmanager.cpp +++ b/src/yuzu/bootmanager.cpp @@ -20,10 +20,7 @@ EmuThread::EmuThread(GRenderWindow* render_window) : render_window(render_window) {} void EmuThread::run() { - if (!Settings::values.use_multi_core) { - // Single core mode must acquire OpenGL context for entire emulation session - render_window->MakeCurrent(); - } + render_window->MakeCurrent(); MicroProfileOnThreadCreate("EmuThread"); @@ -38,6 +35,11 @@ void EmuThread::run() { emit LoadProgress(VideoCore::LoadCallbackStage::Complete, 0, 0); + if (Settings::values.use_asynchronous_gpu_emulation) { + // Release OpenGL context for the GPU thread + render_window->DoneCurrent(); + } + // holds whether the cpu was running during the last iteration, // so that the DebugModeLeft signal can be emitted before the // next execution step From 7b574f406b25c02a0e0efd8b7ec13d68ecb55497 Mon Sep 17 00:00:00 2001 From: bunnei Date: Wed, 23 Jan 2019 22:17:55 -0500 Subject: [PATCH 06/10] gpu: Move command processing to another thread. --- .../service/nvdrv/devices/nvhost_as_gpu.cpp | 2 +- src/core/memory.cpp | 8 +- src/video_core/CMakeLists.txt | 2 + src/video_core/engines/kepler_memory.cpp | 2 +- src/video_core/engines/maxwell_dma.cpp | 4 +- src/video_core/gpu.cpp | 44 ++++- src/video_core/gpu.h | 22 ++- src/video_core/gpu_thread.cpp | 154 ++++++++++++++++++ src/video_core/gpu_thread.h | 135 +++++++++++++++ 9 files changed, 358 insertions(+), 15 deletions(-) create mode 100644 src/video_core/gpu_thread.cpp create mode 100644 src/video_core/gpu_thread.h diff --git a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp index 466db7ccd..a34b9e753 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp +++ b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp @@ -178,7 +178,7 @@ u32 nvhost_as_gpu::UnmapBuffer(const std::vector& input, std::vector& ou auto& gpu = system_instance.GPU(); auto cpu_addr = gpu.MemoryManager().GpuToCpuAddress(params.offset); ASSERT(cpu_addr); - system_instance.Renderer().Rasterizer().FlushAndInvalidateRegion(*cpu_addr, itr->second.size); + gpu.FlushAndInvalidateRegion(*cpu_addr, itr->second.size); params.offset = gpu.MemoryManager().UnmapBuffer(params.offset, itr->second.size); diff --git a/src/core/memory.cpp b/src/core/memory.cpp index ec279cef8..6591c45d2 100644 --- a/src/core/memory.cpp +++ b/src/core/memory.cpp @@ -356,16 +356,16 @@ void RasterizerFlushVirtualRegion(VAddr start, u64 size, FlushMode mode) { const VAddr overlap_end = std::min(end, region_end); const VAddr overlap_size = overlap_end - overlap_start; - auto& rasterizer = system_instance.Renderer().Rasterizer(); + auto& gpu = system_instance.GPU(); switch (mode) { case FlushMode::Flush: - rasterizer.FlushRegion(overlap_start, overlap_size); + gpu.FlushRegion(overlap_start, overlap_size); break; case FlushMode::Invalidate: - rasterizer.InvalidateRegion(overlap_start, overlap_size); + gpu.InvalidateRegion(overlap_start, overlap_size); break; case FlushMode::FlushAndInvalidate: - rasterizer.FlushAndInvalidateRegion(overlap_start, overlap_size); + gpu.FlushAndInvalidateRegion(overlap_start, overlap_size); break; } }; diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 3e9d2b3be..3bb5d0ed7 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -17,6 +17,8 @@ add_library(video_core STATIC engines/shader_header.h gpu.cpp gpu.h + gpu_thread.cpp + gpu_thread.h macro_interpreter.cpp macro_interpreter.h memory_manager.cpp diff --git a/src/video_core/engines/kepler_memory.cpp b/src/video_core/engines/kepler_memory.cpp index 4f6126116..aae2a4019 100644 --- a/src/video_core/engines/kepler_memory.cpp +++ b/src/video_core/engines/kepler_memory.cpp @@ -48,7 +48,7 @@ void KeplerMemory::ProcessData(u32 data) { // We have to invalidate the destination region to evict any outdated surfaces from the cache. // We do this before actually writing the new data because the destination address might contain // a dirty surface that will have to be written back to memory. - rasterizer.InvalidateRegion(*dest_address, sizeof(u32)); + Core::System::GetInstance().GPU().InvalidateRegion(*dest_address, sizeof(u32)); Memory::Write32(*dest_address, data); system.GPU().Maxwell3D().dirty_flags.OnMemoryWrite(); diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp index 0474c7ba3..9dfea5999 100644 --- a/src/video_core/engines/maxwell_dma.cpp +++ b/src/video_core/engines/maxwell_dma.cpp @@ -92,12 +92,12 @@ void MaxwellDMA::HandleCopy() { const auto FlushAndInvalidate = [&](u32 src_size, u64 dst_size) { // TODO(Subv): For now, manually flush the regions until we implement GPU-accelerated // copying. - rasterizer.FlushRegion(*source_cpu, src_size); + Core::System::GetInstance().GPU().FlushRegion(*source_cpu, src_size); // We have to invalidate the destination region to evict any outdated surfaces from the // cache. We do this before actually writing the new data because the destination address // might contain a dirty surface that will have to be written back to memory. - rasterizer.InvalidateRegion(*dest_cpu, dst_size); + Core::System::GetInstance().GPU().InvalidateRegion(*dest_cpu, dst_size); }; if (regs.exec.is_dst_linear && !regs.exec.is_src_linear) { diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp index b0f3310e5..0d7a052dd 100644 --- a/src/video_core/gpu.cpp +++ b/src/video_core/gpu.cpp @@ -6,12 +6,14 @@ #include "core/core.h" #include "core/core_timing.h" #include "core/memory.h" +#include "core/settings.h" #include "video_core/engines/fermi_2d.h" #include "video_core/engines/kepler_compute.h" #include "video_core/engines/kepler_memory.h" #include "video_core/engines/maxwell_3d.h" #include "video_core/engines/maxwell_dma.h" #include "video_core/gpu.h" +#include "video_core/gpu_thread.h" #include "video_core/renderer_base.h" namespace Tegra { @@ -37,6 +39,10 @@ GPU::GPU(Core::System& system, VideoCore::RendererBase& renderer) : renderer{ren kepler_compute = std::make_unique(*memory_manager); maxwell_dma = std::make_unique(system, rasterizer, *memory_manager); kepler_memory = std::make_unique(system, rasterizer, *memory_manager); + + if (Settings::values.use_asynchronous_gpu_emulation) { + gpu_thread = std::make_unique(renderer, *dma_pusher); + } } GPU::~GPU() = default; @@ -66,13 +72,45 @@ const DmaPusher& GPU::DmaPusher() const { } void GPU::PushGPUEntries(Tegra::CommandList&& entries) { - dma_pusher->Push(std::move(entries)); - dma_pusher->DispatchCalls(); + if (Settings::values.use_asynchronous_gpu_emulation) { + gpu_thread->SubmitList(std::move(entries)); + } else { + dma_pusher->Push(std::move(entries)); + dma_pusher->DispatchCalls(); + } } void GPU::SwapBuffers( std::optional> framebuffer) { - renderer.SwapBuffers(std::move(framebuffer)); + if (Settings::values.use_asynchronous_gpu_emulation) { + gpu_thread->SwapBuffers(std::move(framebuffer)); + } else { + renderer.SwapBuffers(std::move(framebuffer)); + } +} + +void GPU::FlushRegion(VAddr addr, u64 size) { + if (Settings::values.use_asynchronous_gpu_emulation) { + gpu_thread->FlushRegion(addr, size); + } else { + renderer.Rasterizer().FlushRegion(addr, size); + } +} + +void GPU::InvalidateRegion(VAddr addr, u64 size) { + if (Settings::values.use_asynchronous_gpu_emulation) { + gpu_thread->InvalidateRegion(addr, size); + } else { + renderer.Rasterizer().InvalidateRegion(addr, size); + } +} + +void GPU::FlushAndInvalidateRegion(VAddr addr, u64 size) { + if (Settings::values.use_asynchronous_gpu_emulation) { + gpu_thread->FlushAndInvalidateRegion(addr, size); + } else { + renderer.Rasterizer().FlushAndInvalidateRegion(addr, size); + } } u32 RenderTargetBytesPerPixel(RenderTargetFormat format) { diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h index 62649bd6e..3f3098bf1 100644 --- a/src/video_core/gpu.h +++ b/src/video_core/gpu.h @@ -19,6 +19,10 @@ namespace VideoCore { class RendererBase; } // namespace VideoCore +namespace VideoCommon::GPUThread { +class ThreadManager; +} // namespace VideoCommon::GPUThread + namespace Tegra { enum class RenderTargetFormat : u32 { @@ -200,7 +204,7 @@ public: std::array reg_array; }; } regs{}; - + /// Push GPU command entries to be processed void PushGPUEntries(Tegra::CommandList&& entries); @@ -208,6 +212,15 @@ public: void SwapBuffers( std::optional> framebuffer); + /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory + void FlushRegion(VAddr addr, u64 size); + + /// Notify rasterizer that any caches of the specified region should be invalidated + void InvalidateRegion(VAddr addr, u64 size); + + /// Notify rasterizer that any caches of the specified region should be flushed and invalidated + void FlushAndInvalidateRegion(VAddr addr, u64 size); + private: void ProcessBindMethod(const MethodCall& method_call); void ProcessSemaphoreTriggerMethod(); @@ -216,17 +229,18 @@ private: /// Calls a GPU puller method. void CallPullerMethod(const MethodCall& method_call); - + /// Calls a GPU engine method. void CallEngineMethod(const MethodCall& method_call); - + /// Determines where the method should be executed. bool ExecuteMethodOnEngine(const MethodCall& method_call); private: std::unique_ptr dma_pusher; std::unique_ptr memory_manager; - + std::unique_ptr gpu_thread; + VideoCore::RendererBase& renderer; /// Mapping of command subchannels to their bound engine ids. diff --git a/src/video_core/gpu_thread.cpp b/src/video_core/gpu_thread.cpp new file mode 100644 index 000000000..22c4cca4d --- /dev/null +++ b/src/video_core/gpu_thread.cpp @@ -0,0 +1,154 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/assert.h" +#include "common/microprofile.h" +#include "core/frontend/scope_acquire_window_context.h" +#include "core/settings.h" +#include "video_core/dma_pusher.h" +#include "video_core/gpu.h" +#include "video_core/gpu_thread.h" +#include "video_core/renderer_base.h" + +namespace VideoCommon::GPUThread { + +/// Executes a single GPU thread command +static void ExecuteCommand(CommandData* command, VideoCore::RendererBase& renderer, + Tegra::DmaPusher& dma_pusher) { + if (const auto submit_list = std::get_if(command)) { + dma_pusher.Push(std::move(submit_list->entries)); + dma_pusher.DispatchCalls(); + } else if (const auto data = std::get_if(command)) { + renderer.SwapBuffers(data->framebuffer); + } else if (const auto data = std::get_if(command)) { + renderer.Rasterizer().FlushRegion(data->addr, data->size); + } else if (const auto data = std::get_if(command)) { + renderer.Rasterizer().InvalidateRegion(data->addr, data->size); + } else if (const auto data = std::get_if(command)) { + renderer.Rasterizer().FlushAndInvalidateRegion(data->addr, data->size); + } else { + UNREACHABLE(); + } +} + +/// Runs the GPU thread +static void RunThread(VideoCore::RendererBase& renderer, Tegra::DmaPusher& dma_pusher, + SynchState& state) { + + MicroProfileOnThreadCreate("GpuThread"); + + auto WaitForWakeup = [&]() { + std::unique_lock lock{state.signal_mutex}; + state.signal_condition.wait(lock, [&] { return !state.IsIdle() || !state.is_running; }); + }; + + // Wait for first GPU command before acquiring the window context + WaitForWakeup(); + + // If emulation was stopped during disk shader loading, abort before trying to acquire context + if (!state.is_running) { + return; + } + + Core::Frontend::ScopeAcquireWindowContext acquire_context{renderer.GetRenderWindow()}; + + while (state.is_running) { + if (!state.is_running) { + return; + } + + { + // Thread has been woken up, so make the previous write queue the next read queue + std::lock_guard lock{state.signal_mutex}; + std::swap(state.push_queue, state.pop_queue); + } + + // Execute all of the GPU commands + while (!state.pop_queue->empty()) { + ExecuteCommand(&state.pop_queue->front(), renderer, dma_pusher); + state.pop_queue->pop(); + } + + // Signal that the GPU thread has finished processing commands + if (state.IsIdle()) { + state.idle_condition.notify_one(); + } + + // Wait for CPU thread to send more GPU commands + WaitForWakeup(); + } +} + +ThreadManager::ThreadManager(VideoCore::RendererBase& renderer, Tegra::DmaPusher& dma_pusher) + : renderer{renderer}, dma_pusher{dma_pusher}, thread{RunThread, std::ref(renderer), + std::ref(dma_pusher), std::ref(state)}, + thread_id{thread.get_id()} {} + +ThreadManager::~ThreadManager() { + { + // Notify GPU thread that a shutdown is pending + std::lock_guard lock{state.signal_mutex}; + state.is_running = false; + } + + state.signal_condition.notify_one(); + thread.join(); +} + +void ThreadManager::SubmitList(Tegra::CommandList&& entries) { + if (entries.empty()) { + return; + } + + PushCommand(SubmitListCommand(std::move(entries)), false, false); +} + +void ThreadManager::SwapBuffers( + std::optional> framebuffer) { + PushCommand(SwapBuffersCommand(std::move(framebuffer)), true, false); +} + +void ThreadManager::FlushRegion(VAddr addr, u64 size) { + if (Settings::values.use_accurate_gpu_emulation) { + PushCommand(FlushRegionCommand(addr, size), true, false); + } +} + +void ThreadManager::InvalidateRegion(VAddr addr, u64 size) { + PushCommand(InvalidateRegionCommand(addr, size), true, true); +} + +void ThreadManager::FlushAndInvalidateRegion(VAddr addr, u64 size) { + if (Settings::values.use_accurate_gpu_emulation) { + PushCommand(FlushAndInvalidateRegionCommand(addr, size), true, false); + } else { + InvalidateRegion(addr, size); + } +} + +void ThreadManager::PushCommand(CommandData&& command_data, bool wait_for_idle, bool allow_on_cpu) { + { + std::lock_guard lock{state.signal_mutex}; + + if ((allow_on_cpu && state.IsIdle()) || IsGpuThread()) { + // Execute the command synchronously on the current thread + ExecuteCommand(&command_data, renderer, dma_pusher); + return; + } + + // Push the command to the GPU thread + state.push_queue->emplace(command_data); + } + + // Signal the GPU thread that commands are pending + state.signal_condition.notify_one(); + + if (wait_for_idle) { + // Wait for the GPU to be idle (all commands to be executed) + std::unique_lock lock{state.idle_mutex}; + state.idle_condition.wait(lock, [this] { return state.IsIdle(); }); + } +} + +} // namespace VideoCommon::GPUThread diff --git a/src/video_core/gpu_thread.h b/src/video_core/gpu_thread.h new file mode 100644 index 000000000..ad9f9462b --- /dev/null +++ b/src/video_core/gpu_thread.h @@ -0,0 +1,135 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include + +namespace Tegra { +struct FramebufferConfig; +class DmaPusher; +} // namespace Tegra + +namespace VideoCore { +class RendererBase; +} // namespace VideoCore + +namespace VideoCommon::GPUThread { + +/// Command to signal to the GPU thread that a command list is ready for processing +struct SubmitListCommand final { + explicit SubmitListCommand(Tegra::CommandList&& entries) : entries{std::move(entries)} {} + + Tegra::CommandList entries; +}; + +/// Command to signal to the GPU thread that a swap buffers is pending +struct SwapBuffersCommand final { + explicit SwapBuffersCommand(std::optional framebuffer) + : framebuffer{std::move(framebuffer)} {} + + std::optional framebuffer; +}; + +/// Command to signal to the GPU thread to flush a region +struct FlushRegionCommand final { + explicit constexpr FlushRegionCommand(VAddr addr, u64 size) : addr{addr}, size{size} {} + + const VAddr addr; + const u64 size; +}; + +/// Command to signal to the GPU thread to invalidate a region +struct InvalidateRegionCommand final { + explicit constexpr InvalidateRegionCommand(VAddr addr, u64 size) : addr{addr}, size{size} {} + + const VAddr addr; + const u64 size; +}; + +/// Command to signal to the GPU thread to flush and invalidate a region +struct FlushAndInvalidateRegionCommand final { + explicit constexpr FlushAndInvalidateRegionCommand(VAddr addr, u64 size) + : addr{addr}, size{size} {} + + const VAddr addr; + const u64 size; +}; + +using CommandData = std::variant; + +/// Struct used to synchronize the GPU thread +struct SynchState final { + std::atomic is_running{true}; + std::condition_variable signal_condition; + std::mutex signal_mutex; + std::condition_variable idle_condition; + std::mutex idle_mutex; + + // We use two queues for sending commands to the GPU thread, one for writing (push_queue) to and + // one for reading from (pop_queue). These are swapped whenever the current pop_queue becomes + // empty. This allows for efficient thread-safe access, as it does not require any copies. + + using CommandQueue = std::queue; + std::array command_queues; + CommandQueue* push_queue{&command_queues[0]}; + CommandQueue* pop_queue{&command_queues[1]}; + + /// Returns true if the GPU thread should be idle, meaning there are no commands to process + bool IsIdle() const { + return command_queues[0].empty() && command_queues[1].empty(); + } +}; + +/// Class used to manage the GPU thread +class ThreadManager final { +public: + explicit ThreadManager(VideoCore::RendererBase& renderer, Tegra::DmaPusher& dma_pusher); + ~ThreadManager(); + + /// Push GPU command entries to be processed + void SubmitList(Tegra::CommandList&& entries); + + /// Swap buffers (render frame) + void SwapBuffers( + std::optional> framebuffer); + + /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory + void FlushRegion(VAddr addr, u64 size); + + /// Notify rasterizer that any caches of the specified region should be invalidated + void InvalidateRegion(VAddr addr, u64 size); + + /// Notify rasterizer that any caches of the specified region should be flushed and invalidated + void FlushAndInvalidateRegion(VAddr addr, u64 size); + + /// Waits the caller until the GPU thread is idle, used for synchronization + void WaitForIdle(); + +private: + /// Pushes a command to be executed by the GPU thread + void PushCommand(CommandData&& command_data, bool wait_for_idle, bool allow_on_cpu); + + /// Returns true if this is called by the GPU thread + bool IsGpuThread() const { + return std::this_thread::get_id() == thread_id; + } + +private: + SynchState state; + std::thread thread; + std::thread::id thread_id; + VideoCore::RendererBase& renderer; + Tegra::DmaPusher& dma_pusher; +}; + +} // namespace VideoCommon::GPUThread From aaa373585cd55bd03fcc589d2ad9f749e2cb99d4 Mon Sep 17 00:00:00 2001 From: bunnei Date: Fri, 8 Feb 2019 23:21:53 -0500 Subject: [PATCH 07/10] gpu: Refactor a/synchronous implementations into their own classes. --- src/core/core.cpp | 9 +++++-- src/video_core/CMakeLists.txt | 4 +++ src/video_core/gpu.cpp | 48 ----------------------------------- src/video_core/gpu.h | 26 ++++++++----------- src/video_core/gpu_asynch.cpp | 37 +++++++++++++++++++++++++++ src/video_core/gpu_asynch.h | 37 +++++++++++++++++++++++++++ src/video_core/gpu_synch.cpp | 37 +++++++++++++++++++++++++++ src/video_core/gpu_synch.h | 29 +++++++++++++++++++++ 8 files changed, 162 insertions(+), 65 deletions(-) create mode 100644 src/video_core/gpu_asynch.cpp create mode 100644 src/video_core/gpu_asynch.h create mode 100644 src/video_core/gpu_synch.cpp create mode 100644 src/video_core/gpu_synch.h diff --git a/src/core/core.cpp b/src/core/core.cpp index 9e5d167c3..1d83e9e11 100644 --- a/src/core/core.cpp +++ b/src/core/core.cpp @@ -36,7 +36,8 @@ #include "frontend/applets/software_keyboard.h" #include "frontend/applets/web_browser.h" #include "video_core/debug_utils/debug_utils.h" -#include "video_core/gpu.h" +#include "video_core/gpu_asynch.h" +#include "video_core/gpu_synch.h" #include "video_core/renderer_base.h" #include "video_core/video_core.h" @@ -131,7 +132,11 @@ struct System::Impl { is_powered_on = true; - gpu_core = std::make_unique(system, *renderer); + if (Settings::values.use_asynchronous_gpu_emulation) { + gpu_core = std::make_unique(system, *renderer); + } else { + gpu_core = std::make_unique(system, *renderer); + } cpu_core_manager.Initialize(system); diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 3bb5d0ed7..a4cb33c17 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -17,6 +17,10 @@ add_library(video_core STATIC engines/shader_header.h gpu.cpp gpu.h + gpu_asynch.cpp + gpu_asynch.h + gpu_synch.cpp + gpu_synch.h gpu_thread.cpp gpu_thread.h macro_interpreter.cpp diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp index 0d7a052dd..08abf8ac9 100644 --- a/src/video_core/gpu.cpp +++ b/src/video_core/gpu.cpp @@ -6,14 +6,12 @@ #include "core/core.h" #include "core/core_timing.h" #include "core/memory.h" -#include "core/settings.h" #include "video_core/engines/fermi_2d.h" #include "video_core/engines/kepler_compute.h" #include "video_core/engines/kepler_memory.h" #include "video_core/engines/maxwell_3d.h" #include "video_core/engines/maxwell_dma.h" #include "video_core/gpu.h" -#include "video_core/gpu_thread.h" #include "video_core/renderer_base.h" namespace Tegra { @@ -39,10 +37,6 @@ GPU::GPU(Core::System& system, VideoCore::RendererBase& renderer) : renderer{ren kepler_compute = std::make_unique(*memory_manager); maxwell_dma = std::make_unique(system, rasterizer, *memory_manager); kepler_memory = std::make_unique(system, rasterizer, *memory_manager); - - if (Settings::values.use_asynchronous_gpu_emulation) { - gpu_thread = std::make_unique(renderer, *dma_pusher); - } } GPU::~GPU() = default; @@ -71,48 +65,6 @@ const DmaPusher& GPU::DmaPusher() const { return *dma_pusher; } -void GPU::PushGPUEntries(Tegra::CommandList&& entries) { - if (Settings::values.use_asynchronous_gpu_emulation) { - gpu_thread->SubmitList(std::move(entries)); - } else { - dma_pusher->Push(std::move(entries)); - dma_pusher->DispatchCalls(); - } -} - -void GPU::SwapBuffers( - std::optional> framebuffer) { - if (Settings::values.use_asynchronous_gpu_emulation) { - gpu_thread->SwapBuffers(std::move(framebuffer)); - } else { - renderer.SwapBuffers(std::move(framebuffer)); - } -} - -void GPU::FlushRegion(VAddr addr, u64 size) { - if (Settings::values.use_asynchronous_gpu_emulation) { - gpu_thread->FlushRegion(addr, size); - } else { - renderer.Rasterizer().FlushRegion(addr, size); - } -} - -void GPU::InvalidateRegion(VAddr addr, u64 size) { - if (Settings::values.use_asynchronous_gpu_emulation) { - gpu_thread->InvalidateRegion(addr, size); - } else { - renderer.Rasterizer().InvalidateRegion(addr, size); - } -} - -void GPU::FlushAndInvalidateRegion(VAddr addr, u64 size) { - if (Settings::values.use_asynchronous_gpu_emulation) { - gpu_thread->FlushAndInvalidateRegion(addr, size); - } else { - renderer.Rasterizer().FlushAndInvalidateRegion(addr, size); - } -} - u32 RenderTargetBytesPerPixel(RenderTargetFormat format) { ASSERT(format != RenderTargetFormat::NONE); diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h index 3f3098bf1..14a421cc1 100644 --- a/src/video_core/gpu.h +++ b/src/video_core/gpu.h @@ -19,10 +19,6 @@ namespace VideoCore { class RendererBase; } // namespace VideoCore -namespace VideoCommon::GPUThread { -class ThreadManager; -} // namespace VideoCommon::GPUThread - namespace Tegra { enum class RenderTargetFormat : u32 { @@ -123,7 +119,7 @@ enum class EngineID { MAXWELL_DMA_COPY_A = 0xB0B5, }; -class GPU final { +class GPU { public: explicit GPU(Core::System& system, VideoCore::RendererBase& renderer); @@ -206,20 +202,20 @@ public: } regs{}; /// Push GPU command entries to be processed - void PushGPUEntries(Tegra::CommandList&& entries); + virtual void PushGPUEntries(Tegra::CommandList&& entries) = 0; /// Swap buffers (render frame) - void SwapBuffers( - std::optional> framebuffer); + virtual void SwapBuffers( + std::optional> framebuffer) = 0; /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory - void FlushRegion(VAddr addr, u64 size); + virtual void FlushRegion(VAddr addr, u64 size) = 0; /// Notify rasterizer that any caches of the specified region should be invalidated - void InvalidateRegion(VAddr addr, u64 size); + virtual void InvalidateRegion(VAddr addr, u64 size) = 0; /// Notify rasterizer that any caches of the specified region should be flushed and invalidated - void FlushAndInvalidateRegion(VAddr addr, u64 size); + virtual void FlushAndInvalidateRegion(VAddr addr, u64 size) = 0; private: void ProcessBindMethod(const MethodCall& method_call); @@ -236,13 +232,13 @@ private: /// Determines where the method should be executed. bool ExecuteMethodOnEngine(const MethodCall& method_call); -private: +protected: std::unique_ptr dma_pusher; - std::unique_ptr memory_manager; - std::unique_ptr gpu_thread; - VideoCore::RendererBase& renderer; +private: + std::unique_ptr memory_manager; + /// Mapping of command subchannels to their bound engine ids. std::array bound_engines = {}; diff --git a/src/video_core/gpu_asynch.cpp b/src/video_core/gpu_asynch.cpp new file mode 100644 index 000000000..ad0a747e3 --- /dev/null +++ b/src/video_core/gpu_asynch.cpp @@ -0,0 +1,37 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "video_core/gpu_asynch.h" +#include "video_core/gpu_thread.h" +#include "video_core/renderer_base.h" + +namespace VideoCommon { + +GPUAsynch::GPUAsynch(Core::System& system, VideoCore::RendererBase& renderer) + : Tegra::GPU(system, renderer), gpu_thread{renderer, *dma_pusher} {} + +GPUAsynch::~GPUAsynch() = default; + +void GPUAsynch::PushGPUEntries(Tegra::CommandList&& entries) { + gpu_thread.SubmitList(std::move(entries)); +} + +void GPUAsynch::SwapBuffers( + std::optional> framebuffer) { + gpu_thread.SwapBuffers(std::move(framebuffer)); +} + +void GPUAsynch::FlushRegion(VAddr addr, u64 size) { + gpu_thread.FlushRegion(addr, size); +} + +void GPUAsynch::InvalidateRegion(VAddr addr, u64 size) { + gpu_thread.InvalidateRegion(addr, size); +} + +void GPUAsynch::FlushAndInvalidateRegion(VAddr addr, u64 size) { + gpu_thread.FlushAndInvalidateRegion(addr, size); +} + +} // namespace VideoCommon diff --git a/src/video_core/gpu_asynch.h b/src/video_core/gpu_asynch.h new file mode 100644 index 000000000..58046f3e9 --- /dev/null +++ b/src/video_core/gpu_asynch.h @@ -0,0 +1,37 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include "video_core/gpu.h" +#include "video_core/gpu_thread.h" + +namespace VideoCore { +class RendererBase; +} // namespace VideoCore + +namespace VideoCommon { + +namespace GPUThread { +class ThreadManager; +} // namespace GPUThread + +/// Implementation of GPU interface that runs the GPU asynchronously +class GPUAsynch : public Tegra::GPU { +public: + explicit GPUAsynch(Core::System& system, VideoCore::RendererBase& renderer); + ~GPUAsynch(); + + void PushGPUEntries(Tegra::CommandList&& entries) override; + void SwapBuffers( + std::optional> framebuffer) override; + void FlushRegion(VAddr addr, u64 size) override; + void InvalidateRegion(VAddr addr, u64 size) override; + void FlushAndInvalidateRegion(VAddr addr, u64 size) override; + +private: + GPUThread::ThreadManager gpu_thread; +}; + +} // namespace VideoCommon diff --git a/src/video_core/gpu_synch.cpp b/src/video_core/gpu_synch.cpp new file mode 100644 index 000000000..4c00b96c7 --- /dev/null +++ b/src/video_core/gpu_synch.cpp @@ -0,0 +1,37 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "video_core/gpu_synch.h" +#include "video_core/renderer_base.h" + +namespace VideoCommon { + +GPUSynch::GPUSynch(Core::System& system, VideoCore::RendererBase& renderer) + : Tegra::GPU(system, renderer) {} + +GPUSynch::~GPUSynch() = default; + +void GPUSynch::PushGPUEntries(Tegra::CommandList&& entries) { + dma_pusher->Push(std::move(entries)); + dma_pusher->DispatchCalls(); +} + +void GPUSynch::SwapBuffers( + std::optional> framebuffer) { + renderer.SwapBuffers(std::move(framebuffer)); +} + +void GPUSynch::FlushRegion(VAddr addr, u64 size) { + renderer.Rasterizer().FlushRegion(addr, size); +} + +void GPUSynch::InvalidateRegion(VAddr addr, u64 size) { + renderer.Rasterizer().InvalidateRegion(addr, size); +} + +void GPUSynch::FlushAndInvalidateRegion(VAddr addr, u64 size) { + renderer.Rasterizer().FlushAndInvalidateRegion(addr, size); +} + +} // namespace VideoCommon diff --git a/src/video_core/gpu_synch.h b/src/video_core/gpu_synch.h new file mode 100644 index 000000000..658f683e2 --- /dev/null +++ b/src/video_core/gpu_synch.h @@ -0,0 +1,29 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include "video_core/gpu.h" + +namespace VideoCore { +class RendererBase; +} // namespace VideoCore + +namespace VideoCommon { + +/// Implementation of GPU interface that runs the GPU synchronously +class GPUSynch : public Tegra::GPU { +public: + explicit GPUSynch(Core::System& system, VideoCore::RendererBase& renderer); + ~GPUSynch(); + + void PushGPUEntries(Tegra::CommandList&& entries) override; + void SwapBuffers( + std::optional> framebuffer) override; + void FlushRegion(VAddr addr, u64 size) override; + void InvalidateRegion(VAddr addr, u64 size) override; + void FlushAndInvalidateRegion(VAddr addr, u64 size) override; +}; + +} // namespace VideoCommon From 3f1b4fb23ad7e689941b5a01afa15780bc50b77b Mon Sep 17 00:00:00 2001 From: bunnei Date: Sat, 9 Feb 2019 00:06:35 -0500 Subject: [PATCH 08/10] gpu: Always flush. --- src/video_core/gpu_thread.cpp | 13 +++++-------- src/video_core/renderer_opengl/gl_rasterizer.cpp | 6 +----- 2 files changed, 6 insertions(+), 13 deletions(-) diff --git a/src/video_core/gpu_thread.cpp b/src/video_core/gpu_thread.cpp index 22c4cca4d..7640da6c0 100644 --- a/src/video_core/gpu_thread.cpp +++ b/src/video_core/gpu_thread.cpp @@ -110,9 +110,8 @@ void ThreadManager::SwapBuffers( } void ThreadManager::FlushRegion(VAddr addr, u64 size) { - if (Settings::values.use_accurate_gpu_emulation) { - PushCommand(FlushRegionCommand(addr, size), true, false); - } + // Block the CPU when using accurate emulation + PushCommand(FlushRegionCommand(addr, size), Settings::values.use_accurate_gpu_emulation, false); } void ThreadManager::InvalidateRegion(VAddr addr, u64 size) { @@ -120,11 +119,9 @@ void ThreadManager::InvalidateRegion(VAddr addr, u64 size) { } void ThreadManager::FlushAndInvalidateRegion(VAddr addr, u64 size) { - if (Settings::values.use_accurate_gpu_emulation) { - PushCommand(FlushAndInvalidateRegionCommand(addr, size), true, false); - } else { - InvalidateRegion(addr, size); - } + // Block the CPU when using accurate emulation + PushCommand(FlushAndInvalidateRegionCommand(addr, size), + Settings::values.use_accurate_gpu_emulation, false); } void ThreadManager::PushCommand(CommandData&& command_data, bool wait_for_idle, bool allow_on_cpu) { diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 321d9dd3d..168288088 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -749,11 +749,7 @@ void RasterizerOpenGL::FlushAll() {} void RasterizerOpenGL::FlushRegion(VAddr addr, u64 size) { MICROPROFILE_SCOPE(OpenGL_CacheManagement); - - if (Settings::values.use_accurate_gpu_emulation) { - // Only flush if use_accurate_gpu_emulation is enabled, as it incurs a performance hit - res_cache.FlushRegion(addr, size); - } + res_cache.FlushRegion(addr, size); } void RasterizerOpenGL::InvalidateRegion(VAddr addr, u64 size) { From 63aa08acbe1da5b78d9f0b37088081a24591849f Mon Sep 17 00:00:00 2001 From: bunnei Date: Sat, 9 Feb 2019 02:55:45 -0500 Subject: [PATCH 09/10] gpu_thread: (HACK) Ignore flush on FlushAndInvalidateRegion. --- src/video_core/gpu_thread.cpp | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/video_core/gpu_thread.cpp b/src/video_core/gpu_thread.cpp index 7640da6c0..4c25380eb 100644 --- a/src/video_core/gpu_thread.cpp +++ b/src/video_core/gpu_thread.cpp @@ -119,9 +119,7 @@ void ThreadManager::InvalidateRegion(VAddr addr, u64 size) { } void ThreadManager::FlushAndInvalidateRegion(VAddr addr, u64 size) { - // Block the CPU when using accurate emulation - PushCommand(FlushAndInvalidateRegionCommand(addr, size), - Settings::values.use_accurate_gpu_emulation, false); + InvalidateRegion(addr, size); } void ThreadManager::PushCommand(CommandData&& command_data, bool wait_for_idle, bool allow_on_cpu) { From 84ad81ee6798ece6c66016c4581b5fe57ce7b20e Mon Sep 17 00:00:00 2001 From: bunnei Date: Sat, 9 Feb 2019 11:37:11 -0500 Subject: [PATCH 10/10] gpu_thread: Fix deadlock with threading idle state check. --- src/video_core/gpu_thread.cpp | 11 +++++++---- src/video_core/gpu_thread.h | 7 ++++--- 2 files changed, 11 insertions(+), 7 deletions(-) diff --git a/src/video_core/gpu_thread.cpp b/src/video_core/gpu_thread.cpp index 4c25380eb..c5bdd2a17 100644 --- a/src/video_core/gpu_thread.cpp +++ b/src/video_core/gpu_thread.cpp @@ -40,7 +40,7 @@ static void RunThread(VideoCore::RendererBase& renderer, Tegra::DmaPusher& dma_p auto WaitForWakeup = [&]() { std::unique_lock lock{state.signal_mutex}; - state.signal_condition.wait(lock, [&] { return !state.IsIdle() || !state.is_running; }); + state.signal_condition.wait(lock, [&] { return !state.is_idle || !state.is_running; }); }; // Wait for first GPU command before acquiring the window context @@ -70,8 +70,10 @@ static void RunThread(VideoCore::RendererBase& renderer, Tegra::DmaPusher& dma_p state.pop_queue->pop(); } + state.UpdateIdleState(); + // Signal that the GPU thread has finished processing commands - if (state.IsIdle()) { + if (state.is_idle) { state.idle_condition.notify_one(); } @@ -126,13 +128,14 @@ void ThreadManager::PushCommand(CommandData&& command_data, bool wait_for_idle, { std::lock_guard lock{state.signal_mutex}; - if ((allow_on_cpu && state.IsIdle()) || IsGpuThread()) { + if ((allow_on_cpu && state.is_idle) || IsGpuThread()) { // Execute the command synchronously on the current thread ExecuteCommand(&command_data, renderer, dma_pusher); return; } // Push the command to the GPU thread + state.UpdateIdleState(); state.push_queue->emplace(command_data); } @@ -142,7 +145,7 @@ void ThreadManager::PushCommand(CommandData&& command_data, bool wait_for_idle, if (wait_for_idle) { // Wait for the GPU to be idle (all commands to be executed) std::unique_lock lock{state.idle_mutex}; - state.idle_condition.wait(lock, [this] { return state.IsIdle(); }); + state.idle_condition.wait(lock, [this] { return static_cast(state.is_idle); }); } } diff --git a/src/video_core/gpu_thread.h b/src/video_core/gpu_thread.h index ad9f9462b..2ad8214cc 100644 --- a/src/video_core/gpu_thread.h +++ b/src/video_core/gpu_thread.h @@ -70,6 +70,7 @@ using CommandData = std::variant is_running{true}; + std::atomic is_idle{true}; std::condition_variable signal_condition; std::mutex signal_mutex; std::condition_variable idle_condition; @@ -84,9 +85,9 @@ struct SynchState final { CommandQueue* push_queue{&command_queues[0]}; CommandQueue* pop_queue{&command_queues[1]}; - /// Returns true if the GPU thread should be idle, meaning there are no commands to process - bool IsIdle() const { - return command_queues[0].empty() && command_queues[1].empty(); + void UpdateIdleState() { + std::lock_guard lock{idle_mutex}; + is_idle = command_queues[0].empty() && command_queues[1].empty(); } };