From e20abbf9cc93167aa073a81a65c7e8fd60f259b1 Mon Sep 17 00:00:00 2001 From: riperiperi Date: Thu, 29 Dec 2022 14:39:04 +0000 Subject: [PATCH] Vulkan: Don't flush commands when creating most sync (#4087) * Vulkan: Don't flush commands when creating most sync When the WaitForIdle method is called, we create sync as some internal GPU method may read back written buffer data. Some games randomly intersperse compute dispatch into their render passes, which result in this happening an unbounded number of times depending on how many times they run compute. Creating sync in Vulkan is expensive, as we need to flush the current command buffer so that it can be waited on. We have a limited number of active command buffers due to how we track resource usage, so submitting too many command buffers will force us to wait for them to return to the pool. This PR allows less "important" sync (things which are less likely to be waited on) to wait on a command buffer's result without submitting it, instead relying on AutoFlush or another, more important sync to flush it later on. Because of the possibility of us waiting for a command buffer that hasn't submitted yet, any thread needs to be able to force the active command buffer to submit. The ability to do this has been added to the backend multithreading via an "Interrupt", though it is not supported without multithreading. OpenGL drivers should already be doing something similar so they don't blow up when creating lots of sync, which is why this hasn't been a problem for these games over there. Improves Vulkan performance on Xenoblade DE, Pokemon Scarlet/Violet, and Zelda BOTW (still another large issue here) * Add strict argument This is technically a separate concern from whether the sync is a host syncpoint. * Remove _interrupted variable * Actually wait for the invoke This is required by AMD GPUs, and also may have caused some issues on other GPUs. * Remove unused using. * I don't know why it added these ones. * Address Feedback * Fix typo --- Ryujinx.Graphics.GAL/IRenderer.cs | 4 +- .../Commands/Renderer/CreateSyncCommand.cs | 6 ++- .../Multithreading/ThreadedRenderer.cs | 47 +++++++++++++++++-- .../Engine/GPFifo/GPFifoClass.cs | 6 +-- .../Engine/Threed/ThreedClass.cs | 2 +- Ryujinx.Graphics.Gpu/GpuContext.cs | 5 +- Ryujinx.Graphics.OpenGL/OpenGLRenderer.cs | 7 ++- Ryujinx.Graphics.Vulkan/CommandBufferPool.cs | 16 +++++++ Ryujinx.Graphics.Vulkan/PipelineFull.cs | 1 + Ryujinx.Graphics.Vulkan/SyncManager.cs | 46 +++++++++++++++--- Ryujinx.Graphics.Vulkan/VulkanRenderer.cs | 15 +++++- 11 files changed, 134 insertions(+), 21 deletions(-) diff --git a/Ryujinx.Graphics.GAL/IRenderer.cs b/Ryujinx.Graphics.GAL/IRenderer.cs index c72320a2..1f2af559 100644 --- a/Ryujinx.Graphics.GAL/IRenderer.cs +++ b/Ryujinx.Graphics.GAL/IRenderer.cs @@ -22,7 +22,7 @@ namespace Ryujinx.Graphics.GAL ISampler CreateSampler(SamplerCreateInfo info); ITexture CreateTexture(TextureCreateInfo info, float scale); - void CreateSync(ulong id); + void CreateSync(ulong id, bool strict); void DeleteBuffer(BufferHandle buffer); @@ -53,6 +53,8 @@ namespace Ryujinx.Graphics.GAL void Initialize(GraphicsDebugLevel logLevel); + void SetInterruptAction(Action interruptAction); + void Screenshot(); } } diff --git a/Ryujinx.Graphics.GAL/Multithreading/Commands/Renderer/CreateSyncCommand.cs b/Ryujinx.Graphics.GAL/Multithreading/Commands/Renderer/CreateSyncCommand.cs index 2e23760e..66f5cf06 100644 --- a/Ryujinx.Graphics.GAL/Multithreading/Commands/Renderer/CreateSyncCommand.cs +++ b/Ryujinx.Graphics.GAL/Multithreading/Commands/Renderer/CreateSyncCommand.cs @@ -4,15 +4,17 @@ { public CommandType CommandType => CommandType.CreateSync; private ulong _id; + private bool _strict; - public void Set(ulong id) + public void Set(ulong id, bool strict) { _id = id; + _strict = strict; } public static void Run(ref CreateSyncCommand command, ThreadedRenderer threaded, IRenderer renderer) { - renderer.CreateSync(command._id); + renderer.CreateSync(command._id, command._strict); threaded.Sync.AssignSync(command._id); } diff --git a/Ryujinx.Graphics.GAL/Multithreading/ThreadedRenderer.cs b/Ryujinx.Graphics.GAL/Multithreading/ThreadedRenderer.cs index 62a7dae7..58058be2 100644 --- a/Ryujinx.Graphics.GAL/Multithreading/ThreadedRenderer.cs +++ b/Ryujinx.Graphics.GAL/Multithreading/ThreadedRenderer.cs @@ -29,6 +29,7 @@ namespace Ryujinx.Graphics.GAL.Multithreading private int _elementSize; private IRenderer _baseRenderer; private Thread _gpuThread; + private Thread _backendThread; private bool _disposed; private bool _running; @@ -38,6 +39,7 @@ namespace Ryujinx.Graphics.GAL.Multithreading private CircularSpanPool _spanPool; private ManualResetEventSlim _invokeRun; + private AutoResetEvent _interruptRun; private bool _lastSampleCounterClear = true; @@ -54,6 +56,8 @@ namespace Ryujinx.Graphics.GAL.Multithreading private int _refProducerPtr; private int _refConsumerPtr; + private Action _interruptAction; + public event EventHandler ScreenCaptured; internal BufferMap Buffers { get; } @@ -73,6 +77,7 @@ namespace Ryujinx.Graphics.GAL.Multithreading _baseRenderer = renderer; renderer.ScreenCaptured += (sender, info) => ScreenCaptured?.Invoke(this, info); + renderer.SetInterruptAction(Interrupt); Pipeline = new ThreadedPipeline(this, renderer.Pipeline); Window = new ThreadedWindow(this, renderer); @@ -82,6 +87,7 @@ namespace Ryujinx.Graphics.GAL.Multithreading _galWorkAvailable = new ManualResetEventSlim(false); _invokeRun = new ManualResetEventSlim(); + _interruptRun = new AutoResetEvent(false); _spanPool = new CircularSpanPool(this, SpanPoolBytes); SpanPool = _spanPool; @@ -95,6 +101,8 @@ namespace Ryujinx.Graphics.GAL.Multithreading { _running = true; + _backendThread = Thread.CurrentThread; + _gpuThread = new Thread(() => { gpuLoop(); _running = false; @@ -116,10 +124,18 @@ namespace Ryujinx.Graphics.GAL.Multithreading _galWorkAvailable.Wait(); _galWorkAvailable.Reset(); + if (Volatile.Read(ref _interruptAction) != null) + { + _interruptAction(); + _interruptRun.Set(); + + Interlocked.Exchange(ref _interruptAction, null); + } + // The other thread can only increase the command count. // We can assume that if it is above 0, it will stay there or get higher. - while (_commandCount > 0) + while (_commandCount > 0 && Volatile.Read(ref _interruptAction) == null) { int commandPtr = _consumerPtr; @@ -281,10 +297,10 @@ namespace Ryujinx.Graphics.GAL.Multithreading return sampler; } - public void CreateSync(ulong id) + public void CreateSync(ulong id, bool strict) { Sync.CreateSyncHandle(id); - New().Set(id); + New().Set(id, strict); QueueCommand(); } @@ -421,6 +437,30 @@ namespace Ryujinx.Graphics.GAL.Multithreading _baseRenderer.WaitSync(id); } + private void Interrupt(Action action) + { + // Interrupt the backend thread from any external thread and invoke the given action. + + if (Thread.CurrentThread == _backendThread) + { + // If this is called from the backend thread, the action can run immediately. + action(); + } + else + { + while (Interlocked.CompareExchange(ref _interruptAction, action, null) != null) { } + + _galWorkAvailable.Set(); + + _interruptRun.WaitOne(); + } + } + + public void SetInterruptAction(Action interruptAction) + { + // Threaded renderer ignores given interrupt action, as it provides its own to the child renderer. + } + public void Dispose() { // Dispose must happen from the render thread, after all commands have completed. @@ -440,6 +480,7 @@ namespace Ryujinx.Graphics.GAL.Multithreading _frameComplete.Dispose(); _galWorkAvailable.Dispose(); _invokeRun.Dispose(); + _interruptRun.Dispose(); Sync.Dispose(); } diff --git a/Ryujinx.Graphics.Gpu/Engine/GPFifo/GPFifoClass.cs b/Ryujinx.Graphics.Gpu/Engine/GPFifo/GPFifoClass.cs index 9cb97983..e80d98a1 100644 --- a/Ryujinx.Graphics.Gpu/Engine/GPFifo/GPFifoClass.cs +++ b/Ryujinx.Graphics.Gpu/Engine/GPFifo/GPFifoClass.cs @@ -59,7 +59,7 @@ namespace Ryujinx.Graphics.Gpu.Engine.GPFifo if (_createSyncPending) { _createSyncPending = false; - _context.CreateHostSyncIfNeeded(false); + _context.CreateHostSyncIfNeeded(false, false); } } @@ -157,7 +157,7 @@ namespace Ryujinx.Graphics.Gpu.Engine.GPFifo } else if (operation == SyncpointbOperation.Incr) { - _context.CreateHostSyncIfNeeded(true); + _context.CreateHostSyncIfNeeded(true, true); _context.Synchronization.IncrementSyncpoint(syncpointId); } @@ -184,7 +184,7 @@ namespace Ryujinx.Graphics.Gpu.Engine.GPFifo { _context.Renderer.Pipeline.CommandBufferBarrier(); - _context.CreateHostSyncIfNeeded(false); + _context.CreateHostSyncIfNeeded(false, true); } /// diff --git a/Ryujinx.Graphics.Gpu/Engine/Threed/ThreedClass.cs b/Ryujinx.Graphics.Gpu/Engine/Threed/ThreedClass.cs index 87dd1d8e..a38c0987 100644 --- a/Ryujinx.Graphics.Gpu/Engine/Threed/ThreedClass.cs +++ b/Ryujinx.Graphics.Gpu/Engine/Threed/ThreedClass.cs @@ -250,7 +250,7 @@ namespace Ryujinx.Graphics.Gpu.Engine.Threed uint syncpointId = (uint)argument & 0xFFFF; _context.AdvanceSequence(); - _context.CreateHostSyncIfNeeded(true); + _context.CreateHostSyncIfNeeded(true, true); _context.Renderer.UpdateCounters(); // Poll the query counters, the game may want an updated result. _context.Synchronization.IncrementSyncpoint(syncpointId); } diff --git a/Ryujinx.Graphics.Gpu/GpuContext.cs b/Ryujinx.Graphics.Gpu/GpuContext.cs index 52733165..91758863 100644 --- a/Ryujinx.Graphics.Gpu/GpuContext.cs +++ b/Ryujinx.Graphics.Gpu/GpuContext.cs @@ -316,7 +316,8 @@ namespace Ryujinx.Graphics.Gpu /// If no actions are present, a host sync object is not created. /// /// True if host sync is being created by a syncpoint - public void CreateHostSyncIfNeeded(bool syncpoint) + /// True if the sync should signal as soon as possible + public void CreateHostSyncIfNeeded(bool syncpoint, bool strict) { if (BufferMigrations.Count > 0) { @@ -337,7 +338,7 @@ namespace Ryujinx.Graphics.Gpu if (_pendingSync || (syncpoint && SyncpointActions.Count > 0)) { - Renderer.CreateSync(SyncNumber); + Renderer.CreateSync(SyncNumber, strict); SyncNumber++; diff --git a/Ryujinx.Graphics.OpenGL/OpenGLRenderer.cs b/Ryujinx.Graphics.OpenGL/OpenGLRenderer.cs index de1ce4a3..1733c6f2 100644 --- a/Ryujinx.Graphics.OpenGL/OpenGLRenderer.cs +++ b/Ryujinx.Graphics.OpenGL/OpenGLRenderer.cs @@ -232,7 +232,7 @@ namespace Ryujinx.Graphics.OpenGL return new Program(programBinary, hasFragmentShader, info.FragmentOutputMap); } - public void CreateSync(ulong id) + public void CreateSync(ulong id, bool strict) { _sync.Create(id); } @@ -247,6 +247,11 @@ namespace Ryujinx.Graphics.OpenGL return _sync.GetCurrent(); } + public void SetInterruptAction(Action interruptAction) + { + // Currently no need for an interrupt action. + } + public void Screenshot() { _window.ScreenCaptureRequested = true; diff --git a/Ryujinx.Graphics.Vulkan/CommandBufferPool.cs b/Ryujinx.Graphics.Vulkan/CommandBufferPool.cs index c77b0040..4cbb24ef 100644 --- a/Ryujinx.Graphics.Vulkan/CommandBufferPool.cs +++ b/Ryujinx.Graphics.Vulkan/CommandBufferPool.cs @@ -116,6 +116,22 @@ namespace Ryujinx.Graphics.Vulkan } } + public void AddInUseWaitable(MultiFenceHolder waitable) + { + lock (_commandBuffers) + { + for (int i = 0; i < _totalCommandBuffers; i++) + { + ref var entry = ref _commandBuffers[i]; + + if (entry.InUse) + { + AddWaitable(i, waitable); + } + } + } + } + public void AddDependency(int cbIndex, CommandBufferScoped dependencyCbs) { Debug.Assert(_commandBuffers[cbIndex].InUse); diff --git a/Ryujinx.Graphics.Vulkan/PipelineFull.cs b/Ryujinx.Graphics.Vulkan/PipelineFull.cs index 56a49184..2256c542 100644 --- a/Ryujinx.Graphics.Vulkan/PipelineFull.cs +++ b/Ryujinx.Graphics.Vulkan/PipelineFull.cs @@ -227,6 +227,7 @@ namespace Ryujinx.Graphics.Vulkan } CommandBuffer = (Cbs = Gd.CommandBufferPool.ReturnAndRent(Cbs)).CommandBuffer; + Gd.RegisterFlush(); // Restore per-command buffer state. diff --git a/Ryujinx.Graphics.Vulkan/SyncManager.cs b/Ryujinx.Graphics.Vulkan/SyncManager.cs index 35e3adf1..c046dc3c 100644 --- a/Ryujinx.Graphics.Vulkan/SyncManager.cs +++ b/Ryujinx.Graphics.Vulkan/SyncManager.cs @@ -11,7 +11,13 @@ namespace Ryujinx.Graphics.Vulkan { public ulong ID; public MultiFenceHolder Waitable; + public ulong FlushId; public bool Signalled; + + public bool NeedsFlush(ulong currentFlushId) + { + return (long)(FlushId - currentFlushId) >= 0; + } } private ulong _firstHandle = 0; @@ -19,6 +25,7 @@ namespace Ryujinx.Graphics.Vulkan private readonly VulkanRenderer _gd; private readonly Device _device; private List _handles; + private ulong FlushId; public SyncManager(VulkanRenderer gd, Device device) { @@ -27,17 +34,33 @@ namespace Ryujinx.Graphics.Vulkan _handles = new List(); } - public void Create(ulong id) + public void RegisterFlush() { - MultiFenceHolder waitable = new MultiFenceHolder(); + FlushId++; + } - _gd.FlushAllCommands(); - _gd.CommandBufferPool.AddWaitable(waitable); + public void Create(ulong id, bool strict) + { + ulong flushId = FlushId; + MultiFenceHolder waitable = new MultiFenceHolder(); + if (strict || _gd.InterruptAction == null) + { + _gd.FlushAllCommands(); + _gd.CommandBufferPool.AddWaitable(waitable); + } + else + { + // Don't flush commands, instead wait for the current command buffer to finish. + // If this sync is waited on before the command buffer is submitted, interrupt the gpu thread and flush it manually. + + _gd.CommandBufferPool.AddInUseWaitable(waitable); + } SyncHandle handle = new SyncHandle { ID = id, - Waitable = waitable + Waitable = waitable, + FlushId = flushId }; lock (_handles) @@ -107,6 +130,17 @@ namespace Ryujinx.Graphics.Vulkan return; } + if (result.NeedsFlush(FlushId)) + { + _gd.InterruptAction(() => + { + if (result.NeedsFlush(FlushId)) + { + _gd.FlushAllCommands(); + } + }); + } + bool signaled = result.Signalled || result.Waitable.WaitForFences(_gd.Api, _device, 1000000000); if (!signaled) { @@ -132,7 +166,7 @@ namespace Ryujinx.Graphics.Vulkan first = _handles.FirstOrDefault(); } - if (first == null) break; + if (first == null || first.NeedsFlush(FlushId)) break; bool signaled = first.Waitable.WaitForFences(_gd.Api, _device, 0); if (signaled) diff --git a/Ryujinx.Graphics.Vulkan/VulkanRenderer.cs b/Ryujinx.Graphics.Vulkan/VulkanRenderer.cs index 3c446abf..5c77cb00 100644 --- a/Ryujinx.Graphics.Vulkan/VulkanRenderer.cs +++ b/Ryujinx.Graphics.Vulkan/VulkanRenderer.cs @@ -48,6 +48,7 @@ namespace Ryujinx.Graphics.Vulkan internal DescriptorSetManager DescriptorSetManager { get; private set; } internal PipelineLayoutCache PipelineLayoutCache { get; private set; } internal BackgroundResources BackgroundResources { get; private set; } + internal Action InterruptAction { get; private set; } internal BufferManager BufferManager { get; private set; } @@ -354,6 +355,11 @@ namespace Ryujinx.Graphics.Vulkan _pipeline?.FlushCommandsImpl(); } + internal void RegisterFlush() + { + _syncManager.RegisterFlush(); + } + public ReadOnlySpan GetBufferData(BufferHandle buffer, int offset, int size) { return BufferManager.GetData(buffer, offset, size); @@ -593,9 +599,9 @@ namespace Ryujinx.Graphics.Vulkan action(); } - public void CreateSync(ulong id) + public void CreateSync(ulong id, bool strict) { - _syncManager.Create(id); + _syncManager.Create(id, strict); } public IProgram LoadProgramBinary(byte[] programBinary, bool isFragment, ShaderInfo info) @@ -613,6 +619,11 @@ namespace Ryujinx.Graphics.Vulkan return _syncManager.GetCurrent(); } + public void SetInterruptAction(Action interruptAction) + { + InterruptAction = interruptAction; + } + public void Screenshot() { _window.ScreenCaptureRequested = true;