From eb1ce41b00e415fe84537bc872ddbf13996055d5 Mon Sep 17 00:00:00 2001 From: riperiperi Date: Sun, 19 May 2024 20:53:37 +0100 Subject: [PATCH] GPU: Migrate buffers on GPU project, pre-emptively flush device local mappings (#6794) * GPU: Migrate buffers on GPU project, pre-emptively flush device local mappings Essentially retreading #4540, but it's on the GPU project now instead of the backend. This allows us to have a lot more control + knowledge of where the buffer backing has been changed and allows us to pre-emptively flush pages to host memory for quicker readback. It will allow us to do other stuff in the future, but we'll get there when we get there. Performance greatly improved in Hyrule Warriors: Age of Calamity. Performance notably improved in TOTK (average). Performance for BOTW restored to how it was before #4911, perhaps a bit better. - Rewrites a bunch of buffer migration stuff. Might want to tighten up how dispose stuff works. - Fixed an issue where the copy for texture pre-flush would happen _after_ the syncpoint. TODO: remove a page from pre-flush if it isn't flushed after a certain number of copies. * Add copy deactivation * Fix dependent virtual buffers * Remove logging * Fix format issues (maybe) * Vulkan: Remove backing swap * Add explicit memory access types for most buffers * Fix typo * Add device local force expiry, change buffer inheritance behaviour * General cleanup, OGL fix * BufferPreFlush comments * BufferBackingState comments * Add an extra precaution to BufferMigration This is very unlikely, but it's important to cover loose ends like this. * Address some feedback * Docs --- src/Ryujinx.Graphics.GAL/BufferAccess.cs | 11 +- src/Ryujinx.Graphics.GAL/Capabilities.cs | 3 + src/Ryujinx.Graphics.GAL/IRenderer.cs | 1 - .../Multithreading/CommandHelper.cs | 1 - .../Multithreading/CommandType.cs | 1 - .../Commands/Renderer/CreateBufferCommand.cs | 31 -- .../Multithreading/ThreadedRenderer.cs | 9 - src/Ryujinx.Graphics.GAL/SystemMemoryType.cs | 29 ++ .../Engine/MME/MacroHLE.cs | 5 +- .../Threed/ComputeDraw/VtgAsComputeContext.cs | 8 +- .../Threed/ComputeDraw/VtgAsComputeState.cs | 7 +- .../Engine/Threed/DrawManager.cs | 6 +- src/Ryujinx.Graphics.Gpu/GpuContext.cs | 5 +- .../Image/TextureBindingsArrayCache.cs | 8 +- .../Image/TextureGroup.cs | 2 +- src/Ryujinx.Graphics.Gpu/Memory/Buffer.cs | 174 ++++++++++- .../Memory/BufferBackingState.cs | 294 +++++++++++++++++ .../Memory/BufferCache.cs | 118 ++++--- .../Memory/BufferManager.cs | 47 +-- .../Memory/BufferMigration.cs | 250 +++++++++++---- .../Memory/BufferModifiedRangeList.cs | 138 +++++--- .../Memory/BufferPreFlush.cs | 295 ++++++++++++++++++ .../Memory/BufferStage.cs | 99 ++++++ src/Ryujinx.Graphics.OpenGL/OpenGLRenderer.cs | 10 +- src/Ryujinx.Graphics.Vulkan/BufferHolder.cs | 227 +------------- src/Ryujinx.Graphics.Vulkan/BufferManager.cs | 37 +-- src/Ryujinx.Graphics.Vulkan/EnumConversion.cs | 12 +- src/Ryujinx.Graphics.Vulkan/PipelineFull.cs | 16 - src/Ryujinx.Graphics.Vulkan/VulkanRenderer.cs | 21 +- 29 files changed, 1342 insertions(+), 523 deletions(-) delete mode 100644 src/Ryujinx.Graphics.GAL/Multithreading/Commands/Renderer/CreateBufferCommand.cs create mode 100644 src/Ryujinx.Graphics.GAL/SystemMemoryType.cs create mode 100644 src/Ryujinx.Graphics.Gpu/Memory/BufferBackingState.cs create mode 100644 src/Ryujinx.Graphics.Gpu/Memory/BufferPreFlush.cs create mode 100644 src/Ryujinx.Graphics.Gpu/Memory/BufferStage.cs diff --git a/src/Ryujinx.Graphics.GAL/BufferAccess.cs b/src/Ryujinx.Graphics.GAL/BufferAccess.cs index faefa518..1e7736f8 100644 --- a/src/Ryujinx.Graphics.GAL/BufferAccess.cs +++ b/src/Ryujinx.Graphics.GAL/BufferAccess.cs @@ -6,8 +6,13 @@ namespace Ryujinx.Graphics.GAL public enum BufferAccess { Default = 0, - FlushPersistent = 1 << 0, - Stream = 1 << 1, - SparseCompatible = 1 << 2, + HostMemory = 1, + DeviceMemory = 2, + DeviceMemoryMapped = 3, + + MemoryTypeMask = 0xf, + + Stream = 1 << 4, + SparseCompatible = 1 << 5, } } diff --git a/src/Ryujinx.Graphics.GAL/Capabilities.cs b/src/Ryujinx.Graphics.GAL/Capabilities.cs index 779ce5b5..d758586a 100644 --- a/src/Ryujinx.Graphics.GAL/Capabilities.cs +++ b/src/Ryujinx.Graphics.GAL/Capabilities.cs @@ -6,6 +6,7 @@ namespace Ryujinx.Graphics.GAL { public readonly TargetApi Api; public readonly string VendorName; + public readonly SystemMemoryType MemoryType; public readonly bool HasFrontFacingBug; public readonly bool HasVectorIndexingBug; @@ -66,6 +67,7 @@ namespace Ryujinx.Graphics.GAL public Capabilities( TargetApi api, string vendorName, + SystemMemoryType memoryType, bool hasFrontFacingBug, bool hasVectorIndexingBug, bool needsFragmentOutputSpecialization, @@ -120,6 +122,7 @@ namespace Ryujinx.Graphics.GAL { Api = api; VendorName = vendorName; + MemoryType = memoryType; HasFrontFacingBug = hasFrontFacingBug; HasVectorIndexingBug = hasVectorIndexingBug; NeedsFragmentOutputSpecialization = needsFragmentOutputSpecialization; diff --git a/src/Ryujinx.Graphics.GAL/IRenderer.cs b/src/Ryujinx.Graphics.GAL/IRenderer.cs index a3466e39..85d0bd72 100644 --- a/src/Ryujinx.Graphics.GAL/IRenderer.cs +++ b/src/Ryujinx.Graphics.GAL/IRenderer.cs @@ -17,7 +17,6 @@ namespace Ryujinx.Graphics.GAL void BackgroundContextAction(Action action, bool alwaysBackground = false); BufferHandle CreateBuffer(int size, BufferAccess access = BufferAccess.Default); - BufferHandle CreateBuffer(int size, BufferAccess access, BufferHandle storageHint); BufferHandle CreateBuffer(nint pointer, int size); BufferHandle CreateBufferSparse(ReadOnlySpan storageBuffers); diff --git a/src/Ryujinx.Graphics.GAL/Multithreading/CommandHelper.cs b/src/Ryujinx.Graphics.GAL/Multithreading/CommandHelper.cs index fd2919be..23f1a64e 100644 --- a/src/Ryujinx.Graphics.GAL/Multithreading/CommandHelper.cs +++ b/src/Ryujinx.Graphics.GAL/Multithreading/CommandHelper.cs @@ -44,7 +44,6 @@ namespace Ryujinx.Graphics.GAL.Multithreading } Register(CommandType.Action); - Register(CommandType.CreateBuffer); Register(CommandType.CreateBufferAccess); Register(CommandType.CreateBufferSparse); Register(CommandType.CreateHostBuffer); diff --git a/src/Ryujinx.Graphics.GAL/Multithreading/CommandType.cs b/src/Ryujinx.Graphics.GAL/Multithreading/CommandType.cs index a5e7336c..f95aab05 100644 --- a/src/Ryujinx.Graphics.GAL/Multithreading/CommandType.cs +++ b/src/Ryujinx.Graphics.GAL/Multithreading/CommandType.cs @@ -3,7 +3,6 @@ namespace Ryujinx.Graphics.GAL.Multithreading enum CommandType : byte { Action, - CreateBuffer, CreateBufferAccess, CreateBufferSparse, CreateHostBuffer, diff --git a/src/Ryujinx.Graphics.GAL/Multithreading/Commands/Renderer/CreateBufferCommand.cs b/src/Ryujinx.Graphics.GAL/Multithreading/Commands/Renderer/CreateBufferCommand.cs deleted file mode 100644 index 60a6e4bf..00000000 --- a/src/Ryujinx.Graphics.GAL/Multithreading/Commands/Renderer/CreateBufferCommand.cs +++ /dev/null @@ -1,31 +0,0 @@ -namespace Ryujinx.Graphics.GAL.Multithreading.Commands.Renderer -{ - struct CreateBufferCommand : IGALCommand, IGALCommand - { - public readonly CommandType CommandType => CommandType.CreateBuffer; - private BufferHandle _threadedHandle; - private int _size; - private BufferAccess _access; - private BufferHandle _storageHint; - - public void Set(BufferHandle threadedHandle, int size, BufferAccess access, BufferHandle storageHint) - { - _threadedHandle = threadedHandle; - _size = size; - _access = access; - _storageHint = storageHint; - } - - public static void Run(ref CreateBufferCommand command, ThreadedRenderer threaded, IRenderer renderer) - { - BufferHandle hint = BufferHandle.Null; - - if (command._storageHint != BufferHandle.Null) - { - hint = threaded.Buffers.MapBuffer(command._storageHint); - } - - threaded.Buffers.AssignBuffer(command._threadedHandle, renderer.CreateBuffer(command._size, command._access, hint)); - } - } -} diff --git a/src/Ryujinx.Graphics.GAL/Multithreading/ThreadedRenderer.cs b/src/Ryujinx.Graphics.GAL/Multithreading/ThreadedRenderer.cs index 5e17bcd2..cc3d2e5c 100644 --- a/src/Ryujinx.Graphics.GAL/Multithreading/ThreadedRenderer.cs +++ b/src/Ryujinx.Graphics.GAL/Multithreading/ThreadedRenderer.cs @@ -272,15 +272,6 @@ namespace Ryujinx.Graphics.GAL.Multithreading return handle; } - public BufferHandle CreateBuffer(int size, BufferAccess access, BufferHandle storageHint) - { - BufferHandle handle = Buffers.CreateBufferHandle(); - New().Set(handle, size, access, storageHint); - QueueCommand(); - - return handle; - } - public BufferHandle CreateBuffer(nint pointer, int size) { BufferHandle handle = Buffers.CreateBufferHandle(); diff --git a/src/Ryujinx.Graphics.GAL/SystemMemoryType.cs b/src/Ryujinx.Graphics.GAL/SystemMemoryType.cs new file mode 100644 index 00000000..53292129 --- /dev/null +++ b/src/Ryujinx.Graphics.GAL/SystemMemoryType.cs @@ -0,0 +1,29 @@ +namespace Ryujinx.Graphics.GAL +{ + public enum SystemMemoryType + { + /// + /// The backend manages the ownership of memory. This mode never supports host imported memory. + /// + BackendManaged, + + /// + /// Device memory has similar performance to host memory, usually because it's shared between CPU/GPU. + /// Use host memory whenever possible. + /// + UnifiedMemory, + + /// + /// GPU storage to host memory goes though a slow interconnect, but it would still be preferable to use it if the data is flushed back often. + /// Assumes constant buffer access to host memory is rather fast. + /// + DedicatedMemory, + + /// + /// GPU storage to host memory goes though a slow interconnect, that is very slow when doing access from storage. + /// When frequently accessed, copy buffers to host memory using DMA. + /// Assumes constant buffer access to host memory is rather fast. + /// + DedicatedMemorySlowStorage + } +} diff --git a/src/Ryujinx.Graphics.Gpu/Engine/MME/MacroHLE.cs b/src/Ryujinx.Graphics.Gpu/Engine/MME/MacroHLE.cs index 7f3772f4..475d1ee4 100644 --- a/src/Ryujinx.Graphics.Gpu/Engine/MME/MacroHLE.cs +++ b/src/Ryujinx.Graphics.Gpu/Engine/MME/MacroHLE.cs @@ -5,6 +5,7 @@ using Ryujinx.Graphics.GAL; using Ryujinx.Graphics.Gpu.Engine.GPFifo; using Ryujinx.Graphics.Gpu.Engine.Threed; using Ryujinx.Graphics.Gpu.Engine.Types; +using Ryujinx.Graphics.Gpu.Memory; using Ryujinx.Memory.Range; using System; using System.Collections.Generic; @@ -495,8 +496,8 @@ namespace Ryujinx.Graphics.Gpu.Engine.MME ulong indirectBufferSize = (ulong)maxDrawCount * (ulong)stride; - MultiRange indirectBufferRange = bufferCache.TranslateAndCreateMultiBuffers(_processor.MemoryManager, indirectBufferGpuVa, indirectBufferSize); - MultiRange parameterBufferRange = bufferCache.TranslateAndCreateMultiBuffers(_processor.MemoryManager, parameterBufferGpuVa, 4); + MultiRange indirectBufferRange = bufferCache.TranslateAndCreateMultiBuffers(_processor.MemoryManager, indirectBufferGpuVa, indirectBufferSize, BufferStage.Indirect); + MultiRange parameterBufferRange = bufferCache.TranslateAndCreateMultiBuffers(_processor.MemoryManager, parameterBufferGpuVa, 4, BufferStage.Indirect); _processor.ThreedClass.DrawIndirect( topology, diff --git a/src/Ryujinx.Graphics.Gpu/Engine/Threed/ComputeDraw/VtgAsComputeContext.cs b/src/Ryujinx.Graphics.Gpu/Engine/Threed/ComputeDraw/VtgAsComputeContext.cs index f9cb40b0..6de50fb2 100644 --- a/src/Ryujinx.Graphics.Gpu/Engine/Threed/ComputeDraw/VtgAsComputeContext.cs +++ b/src/Ryujinx.Graphics.Gpu/Engine/Threed/ComputeDraw/VtgAsComputeContext.cs @@ -438,7 +438,7 @@ namespace Ryujinx.Graphics.Gpu.Engine.Threed.ComputeDraw ReadOnlySpan dataBytes = MemoryMarshal.Cast(data); - BufferHandle buffer = _context.Renderer.CreateBuffer(dataBytes.Length); + BufferHandle buffer = _context.Renderer.CreateBuffer(dataBytes.Length, BufferAccess.DeviceMemory); _context.Renderer.SetBufferData(buffer, 0, dataBytes); return new IndexBuffer(buffer, count, dataBytes.Length); @@ -529,7 +529,7 @@ namespace Ryujinx.Graphics.Gpu.Engine.Threed.ComputeDraw { if (_dummyBuffer == BufferHandle.Null) { - _dummyBuffer = _context.Renderer.CreateBuffer(DummyBufferSize); + _dummyBuffer = _context.Renderer.CreateBuffer(DummyBufferSize, BufferAccess.DeviceMemory); _context.Renderer.Pipeline.ClearBuffer(_dummyBuffer, 0, DummyBufferSize, 0); } @@ -550,7 +550,7 @@ namespace Ryujinx.Graphics.Gpu.Engine.Threed.ComputeDraw _context.Renderer.DeleteBuffer(_sequentialIndexBuffer); } - _sequentialIndexBuffer = _context.Renderer.CreateBuffer(count * sizeof(uint)); + _sequentialIndexBuffer = _context.Renderer.CreateBuffer(count * sizeof(uint), BufferAccess.DeviceMemory); _sequentialIndexBufferCount = count; Span data = new int[count]; @@ -583,7 +583,7 @@ namespace Ryujinx.Graphics.Gpu.Engine.Threed.ComputeDraw _context.Renderer.DeleteBuffer(buffer.Handle); } - buffer.Handle = _context.Renderer.CreateBuffer(newSize); + buffer.Handle = _context.Renderer.CreateBuffer(newSize, BufferAccess.DeviceMemory); buffer.Size = newSize; } diff --git a/src/Ryujinx.Graphics.Gpu/Engine/Threed/ComputeDraw/VtgAsComputeState.cs b/src/Ryujinx.Graphics.Gpu/Engine/Threed/ComputeDraw/VtgAsComputeState.cs index 6324e6a1..73682866 100644 --- a/src/Ryujinx.Graphics.Gpu/Engine/Threed/ComputeDraw/VtgAsComputeState.cs +++ b/src/Ryujinx.Graphics.Gpu/Engine/Threed/ComputeDraw/VtgAsComputeState.cs @@ -3,6 +3,7 @@ using Ryujinx.Common.Logging; using Ryujinx.Graphics.GAL; using Ryujinx.Graphics.Gpu.Engine.Types; using Ryujinx.Graphics.Gpu.Image; +using Ryujinx.Graphics.Gpu.Memory; using Ryujinx.Graphics.Gpu.Shader; using Ryujinx.Graphics.Shader; using Ryujinx.Graphics.Shader.Translation; @@ -370,7 +371,7 @@ namespace Ryujinx.Graphics.Gpu.Engine.Threed.ComputeDraw { var memoryManager = _channel.MemoryManager; - BufferRange range = memoryManager.Physical.BufferCache.GetBufferRange(memoryManager.GetPhysicalRegions(address, size)); + BufferRange range = memoryManager.Physical.BufferCache.GetBufferRange(memoryManager.GetPhysicalRegions(address, size), BufferStage.VertexBuffer); ITexture bufferTexture = _vacContext.EnsureBufferTexture(index + 2, format); bufferTexture.SetStorage(range); @@ -412,7 +413,9 @@ namespace Ryujinx.Graphics.Gpu.Engine.Threed.ComputeDraw var memoryManager = _channel.MemoryManager; ulong misalign = address & ((ulong)_context.Capabilities.TextureBufferOffsetAlignment - 1); - BufferRange range = memoryManager.Physical.BufferCache.GetBufferRange(memoryManager.GetPhysicalRegions(address + indexOffset - misalign, size + misalign)); + BufferRange range = memoryManager.Physical.BufferCache.GetBufferRange( + memoryManager.GetPhysicalRegions(address + indexOffset - misalign, size + misalign), + BufferStage.IndexBuffer); misalignedOffset = (int)misalign >> shift; SetIndexBufferTexture(reservations, range, format); diff --git a/src/Ryujinx.Graphics.Gpu/Engine/Threed/DrawManager.cs b/src/Ryujinx.Graphics.Gpu/Engine/Threed/DrawManager.cs index d8de14de..56ef64c6 100644 --- a/src/Ryujinx.Graphics.Gpu/Engine/Threed/DrawManager.cs +++ b/src/Ryujinx.Graphics.Gpu/Engine/Threed/DrawManager.cs @@ -684,8 +684,8 @@ namespace Ryujinx.Graphics.Gpu.Engine.Threed if (hasCount) { - var indirectBuffer = memory.BufferCache.GetBufferRange(indirectBufferRange); - var parameterBuffer = memory.BufferCache.GetBufferRange(parameterBufferRange); + var indirectBuffer = memory.BufferCache.GetBufferRange(indirectBufferRange, BufferStage.Indirect); + var parameterBuffer = memory.BufferCache.GetBufferRange(parameterBufferRange, BufferStage.Indirect); if (indexed) { @@ -698,7 +698,7 @@ namespace Ryujinx.Graphics.Gpu.Engine.Threed } else { - var indirectBuffer = memory.BufferCache.GetBufferRange(indirectBufferRange); + var indirectBuffer = memory.BufferCache.GetBufferRange(indirectBufferRange, BufferStage.Indirect); if (indexed) { diff --git a/src/Ryujinx.Graphics.Gpu/GpuContext.cs b/src/Ryujinx.Graphics.Gpu/GpuContext.cs index 53ea8cb2..048d32fb 100644 --- a/src/Ryujinx.Graphics.Gpu/GpuContext.cs +++ b/src/Ryujinx.Graphics.Gpu/GpuContext.cs @@ -393,17 +393,18 @@ namespace Ryujinx.Graphics.Gpu if (force || _pendingSync || (syncpoint && SyncpointActions.Count > 0)) { - Renderer.CreateSync(SyncNumber, strict); - foreach (var action in SyncActions) { action.SyncPreAction(syncpoint); } + foreach (var action in SyncpointActions) { action.SyncPreAction(syncpoint); } + Renderer.CreateSync(SyncNumber, strict); + SyncNumber++; SyncActions.RemoveAll(action => action.SyncAction(syncpoint)); diff --git a/src/Ryujinx.Graphics.Gpu/Image/TextureBindingsArrayCache.cs b/src/Ryujinx.Graphics.Gpu/Image/TextureBindingsArrayCache.cs index 7e486e0a..a54d0700 100644 --- a/src/Ryujinx.Graphics.Gpu/Image/TextureBindingsArrayCache.cs +++ b/src/Ryujinx.Graphics.Gpu/Image/TextureBindingsArrayCache.cs @@ -708,11 +708,11 @@ namespace Ryujinx.Graphics.Gpu.Image format = texture.Format; } - _channel.BufferManager.SetBufferTextureStorage(entry.ImageArray, hostTexture, texture.Range, bindingInfo, index, format); + _channel.BufferManager.SetBufferTextureStorage(stage, entry.ImageArray, hostTexture, texture.Range, bindingInfo, index, format); } else { - _channel.BufferManager.SetBufferTextureStorage(entry.TextureArray, hostTexture, texture.Range, bindingInfo, index, format); + _channel.BufferManager.SetBufferTextureStorage(stage, entry.TextureArray, hostTexture, texture.Range, bindingInfo, index, format); } } else if (isImage) @@ -921,11 +921,11 @@ namespace Ryujinx.Graphics.Gpu.Image format = texture.Format; } - _channel.BufferManager.SetBufferTextureStorage(entry.ImageArray, hostTexture, texture.Range, bindingInfo, index, format); + _channel.BufferManager.SetBufferTextureStorage(stage, entry.ImageArray, hostTexture, texture.Range, bindingInfo, index, format); } else { - _channel.BufferManager.SetBufferTextureStorage(entry.TextureArray, hostTexture, texture.Range, bindingInfo, index, format); + _channel.BufferManager.SetBufferTextureStorage(stage, entry.TextureArray, hostTexture, texture.Range, bindingInfo, index, format); } } else if (isImage) diff --git a/src/Ryujinx.Graphics.Gpu/Image/TextureGroup.cs b/src/Ryujinx.Graphics.Gpu/Image/TextureGroup.cs index 4e1133d1..06ca2c59 100644 --- a/src/Ryujinx.Graphics.Gpu/Image/TextureGroup.cs +++ b/src/Ryujinx.Graphics.Gpu/Image/TextureGroup.cs @@ -645,7 +645,7 @@ namespace Ryujinx.Graphics.Gpu.Image } else { - _flushBuffer = _context.Renderer.CreateBuffer((int)Storage.Size, BufferAccess.FlushPersistent); + _flushBuffer = _context.Renderer.CreateBuffer((int)Storage.Size, BufferAccess.HostMemory); _flushBufferImported = false; } diff --git a/src/Ryujinx.Graphics.Gpu/Memory/Buffer.cs b/src/Ryujinx.Graphics.Gpu/Memory/Buffer.cs index d293060b..e060e0b4 100644 --- a/src/Ryujinx.Graphics.Gpu/Memory/Buffer.cs +++ b/src/Ryujinx.Graphics.Gpu/Memory/Buffer.cs @@ -10,6 +10,8 @@ using System.Threading; namespace Ryujinx.Graphics.Gpu.Memory { + delegate void BufferFlushAction(ulong address, ulong size, ulong syncNumber); + /// /// Buffer, used to store vertex and index data, uniform and storage buffers, and others. /// @@ -23,7 +25,7 @@ namespace Ryujinx.Graphics.Gpu.Memory /// /// Host buffer handle. /// - public BufferHandle Handle { get; } + public BufferHandle Handle { get; private set; } /// /// Start address of the buffer in guest memory. @@ -60,6 +62,17 @@ namespace Ryujinx.Graphics.Gpu.Memory /// private BufferModifiedRangeList _modifiedRanges = null; + /// + /// A structure that is used to flush buffer data back to a host mapped buffer for cached readback. + /// Only used if the buffer data is explicitly owned by device local memory. + /// + private BufferPreFlush _preFlush = null; + + /// + /// Usage tracking state that determines what type of backing the buffer should use. + /// + public BufferBackingState BackingState; + private readonly MultiRegionHandle _memoryTrackingGranular; private readonly RegionHandle _memoryTracking; @@ -87,6 +100,7 @@ namespace Ryujinx.Graphics.Gpu.Memory /// Physical memory where the buffer is mapped /// Start address of the buffer /// Size of the buffer in bytes + /// The type of usage that created the buffer /// Indicates if the buffer can be used in a sparse buffer mapping /// Buffers which this buffer contains, and will inherit tracking handles from public Buffer( @@ -94,6 +108,7 @@ namespace Ryujinx.Graphics.Gpu.Memory PhysicalMemory physicalMemory, ulong address, ulong size, + BufferStage stage, bool sparseCompatible, IEnumerable baseBuffers = null) { @@ -103,9 +118,11 @@ namespace Ryujinx.Graphics.Gpu.Memory Size = size; SparseCompatible = sparseCompatible; - BufferAccess access = sparseCompatible ? BufferAccess.SparseCompatible : BufferAccess.Default; + BackingState = new BufferBackingState(_context, this, stage, baseBuffers); - Handle = context.Renderer.CreateBuffer((int)size, access, baseBuffers?.MaxBy(x => x.Size).Handle ?? BufferHandle.Null); + BufferAccess access = BackingState.SwitchAccess(this); + + Handle = context.Renderer.CreateBuffer((int)size, access); _useGranular = size > GranularBufferThreshold; @@ -161,6 +178,29 @@ namespace Ryujinx.Graphics.Gpu.Memory _virtualDependenciesLock = new ReaderWriterLockSlim(); } + /// + /// Recreates the backing buffer based on the desired access type + /// reported by the backing state struct. + /// + private void ChangeBacking() + { + BufferAccess access = BackingState.SwitchAccess(this); + + BufferHandle newHandle = _context.Renderer.CreateBuffer((int)Size, access); + + _context.Renderer.Pipeline.CopyBuffer(Handle, newHandle, 0, 0, (int)Size); + + _modifiedRanges?.SelfMigration(); + + // If swtiching from device local to host mapped, pre-flushing data no longer makes sense. + // This is set to null and disposed when the migration fully completes. + _preFlush = null; + + Handle = newHandle; + + _physicalMemory.BufferCache.BufferBackingChanged(this); + } + /// /// Gets a sub-range from the buffer, from a start address til a page boundary after the given size. /// @@ -246,6 +286,7 @@ namespace Ryujinx.Graphics.Gpu.Memory } else { + BackingState.RecordSet(); _context.Renderer.SetBufferData(Handle, 0, _physicalMemory.GetSpan(Address, (int)Size)); CopyToDependantVirtualBuffers(); } @@ -283,15 +324,35 @@ namespace Ryujinx.Graphics.Gpu.Memory _modifiedRanges ??= new BufferModifiedRangeList(_context, this, Flush); } + /// + /// Checks if a backing change is deemed necessary from the given usage. + /// If it is, queues a backing change to happen on the next sync action. + /// + /// Buffer stage that can change backing type + private void TryQueueBackingChange(BufferStage stage) + { + if (BackingState.ShouldChangeBacking(stage)) + { + if (!_syncActionRegistered) + { + _context.RegisterSyncAction(this); + _syncActionRegistered = true; + } + } + } + /// /// Signal that the given region of the buffer has been modified. /// /// The start address of the modified region /// The size of the modified region - public void SignalModified(ulong address, ulong size) + /// Buffer stage that triggered the modification + public void SignalModified(ulong address, ulong size, BufferStage stage) { EnsureRangeList(); + TryQueueBackingChange(stage); + _modifiedRanges.SignalModified(address, size); if (!_syncActionRegistered) @@ -311,6 +372,37 @@ namespace Ryujinx.Graphics.Gpu.Memory _modifiedRanges?.Clear(address, size); } + /// + /// Action to be performed immediately before sync is created. + /// This will copy any buffer ranges designated for pre-flushing. + /// + /// True if the action is a guest syncpoint + public void SyncPreAction(bool syncpoint) + { + if (_referenceCount == 0) + { + return; + } + + if (BackingState.ShouldChangeBacking()) + { + ChangeBacking(); + } + + if (BackingState.IsDeviceLocal) + { + _preFlush ??= new BufferPreFlush(_context, this, FlushImpl); + + if (_preFlush.ShouldCopy) + { + _modifiedRanges?.GetRangesAtSync(Address, Size, _context.SyncNumber, (address, size) => + { + _preFlush.CopyModified(address, size); + }); + } + } + } + /// /// Action to be performed when a syncpoint is reached after modification. /// This will register read/write tracking to flush the buffer from GPU when its memory is used. @@ -466,6 +558,8 @@ namespace Ryujinx.Graphics.Gpu.Memory /// Size of the modified region private void LoadRegion(ulong mAddress, ulong mSize) { + BackingState.RecordSet(); + int offset = (int)(mAddress - Address); _context.Renderer.SetBufferData(Handle, offset, _physicalMemory.GetSpan(mAddress, (int)mSize)); @@ -539,18 +633,84 @@ namespace Ryujinx.Graphics.Gpu.Memory /// Flushes a range of the buffer. /// This writes the range data back into guest memory. /// + /// Buffer handle to flush data from /// Start address of the range /// Size in bytes of the range - public void Flush(ulong address, ulong size) + private void FlushImpl(BufferHandle handle, ulong address, ulong size) { int offset = (int)(address - Address); - using PinnedSpan data = _context.Renderer.GetBufferData(Handle, offset, (int)size); + using PinnedSpan data = _context.Renderer.GetBufferData(handle, offset, (int)size); // TODO: When write tracking shaders, they will need to be aware of changes in overlapping buffers. _physicalMemory.WriteUntracked(address, CopyFromDependantVirtualBuffers(data.Get(), address, size)); } + /// + /// Flushes a range of the buffer. + /// This writes the range data back into guest memory. + /// + /// Start address of the range + /// Size in bytes of the range + private void FlushImpl(ulong address, ulong size) + { + FlushImpl(Handle, address, size); + } + + /// + /// Flushes a range of the buffer from the most optimal source. + /// This writes the range data back into guest memory. + /// + /// Start address of the range + /// Size in bytes of the range + /// Sync number waited for before flushing the data + public void Flush(ulong address, ulong size, ulong syncNumber) + { + BackingState.RecordFlush(); + + BufferPreFlush preFlush = _preFlush; + + if (preFlush != null) + { + preFlush.FlushWithAction(address, size, syncNumber); + } + else + { + FlushImpl(address, size); + } + } + /// + /// Gets an action that disposes the backing buffer using its current handle. + /// Useful for deleting an old copy of the buffer after the handle changes. + /// + /// An action that flushes data from the specified range, using the buffer handle at the time the method is generated + public Action GetSnapshotDisposeAction() + { + BufferHandle handle = Handle; + BufferPreFlush preFlush = _preFlush; + + return () => + { + _context.Renderer.DeleteBuffer(handle); + preFlush?.Dispose(); + }; + } + + /// + /// Gets an action that flushes a range of the buffer using its current handle. + /// Useful for flushing data from old copies of the buffer after the handle changes. + /// + /// An action that flushes data from the specified range, using the buffer handle at the time the method is generated + public BufferFlushAction GetSnapshotFlushAction() + { + BufferHandle handle = Handle; + + return (ulong address, ulong size, ulong _) => + { + FlushImpl(handle, address, size); + }; + } + /// /// Align a given address and size region to page boundaries. /// @@ -857,6 +1017,8 @@ namespace Ryujinx.Graphics.Gpu.Memory _modifiedRanges?.Clear(); _context.Renderer.DeleteBuffer(Handle); + _preFlush?.Dispose(); + _preFlush = null; UnmappedSequence++; } diff --git a/src/Ryujinx.Graphics.Gpu/Memory/BufferBackingState.cs b/src/Ryujinx.Graphics.Gpu/Memory/BufferBackingState.cs new file mode 100644 index 00000000..3f65131e --- /dev/null +++ b/src/Ryujinx.Graphics.Gpu/Memory/BufferBackingState.cs @@ -0,0 +1,294 @@ +using Ryujinx.Graphics.GAL; +using System; +using System.Collections.Generic; + +namespace Ryujinx.Graphics.Gpu.Memory +{ + /// + /// Type of backing memory. + /// In ascending order of priority when merging multiple buffer backing states. + /// + internal enum BufferBackingType + { + HostMemory, + DeviceMemory, + DeviceMemoryWithFlush + } + + /// + /// Keeps track of buffer usage to decide what memory heap that buffer memory is placed on. + /// Dedicated GPUs prefer certain types of resources to be device local, + /// and if we need data to be read back, we might prefer that they're in host memory. + /// + /// The measurements recorded here compare to a set of heruristics (thresholds and conditions) + /// that appear to produce good performance in most software. + /// + internal struct BufferBackingState + { + private const int DeviceLocalSizeThreshold = 256 * 1024; // 256kb + + private const int SetCountThreshold = 100; + private const int WriteCountThreshold = 50; + private const int FlushCountThreshold = 5; + private const int DeviceLocalForceExpiry = 100; + + public readonly bool IsDeviceLocal => _activeType != BufferBackingType.HostMemory; + + private readonly SystemMemoryType _systemMemoryType; + private BufferBackingType _activeType; + private BufferBackingType _desiredType; + + private bool _canSwap; + + private int _setCount; + private int _writeCount; + private int _flushCount; + private int _flushTemp; + private int _lastFlushWrite; + private int _deviceLocalForceCount; + + private readonly int _size; + + /// + /// Initialize the buffer backing state for a given parent buffer. + /// + /// GPU context + /// Parent buffer + /// Initial buffer stage + /// Buffers to inherit state from + public BufferBackingState(GpuContext context, Buffer parent, BufferStage stage, IEnumerable baseBuffers = null) + { + _size = (int)parent.Size; + _systemMemoryType = context.Capabilities.MemoryType; + + // Backend managed is always auto, unified memory is always host. + _desiredType = BufferBackingType.HostMemory; + _canSwap = _systemMemoryType != SystemMemoryType.BackendManaged && _systemMemoryType != SystemMemoryType.UnifiedMemory; + + if (_canSwap) + { + // Might want to start certain buffers as being device local, + // and the usage might also lock those buffers into being device local. + + BufferStage storageFlags = stage & BufferStage.StorageMask; + + if (parent.Size > DeviceLocalSizeThreshold && baseBuffers == null) + { + _desiredType = BufferBackingType.DeviceMemory; + } + + if (storageFlags != 0) + { + // Storage buffer bindings may require special treatment. + + var rawStage = stage & BufferStage.StageMask; + + if (rawStage == BufferStage.Fragment) + { + // Fragment read should start device local. + + _desiredType = BufferBackingType.DeviceMemory; + + if (storageFlags != BufferStage.StorageRead) + { + // Fragment write should stay device local until the use doesn't happen anymore. + + _deviceLocalForceCount = DeviceLocalForceExpiry; + } + } + + // TODO: Might be nice to force atomic access to be device local for any stage. + } + + if (baseBuffers != null) + { + foreach (Buffer buffer in baseBuffers) + { + CombineState(buffer.BackingState); + } + } + } + } + + /// + /// Combine buffer backing types, selecting the one with highest priority. + /// + /// First buffer backing type + /// Second buffer backing type + /// Combined buffer backing type + private static BufferBackingType CombineTypes(BufferBackingType left, BufferBackingType right) + { + return (BufferBackingType)Math.Max((int)left, (int)right); + } + + /// + /// Combine the state from the given buffer backing state with this one, + /// so that the state isn't lost when migrating buffers. + /// + /// Buffer state to combine into this state + private void CombineState(BufferBackingState oldState) + { + _setCount += oldState._setCount; + _writeCount += oldState._writeCount; + _flushCount += oldState._flushCount; + _flushTemp += oldState._flushTemp; + _lastFlushWrite = -1; + _deviceLocalForceCount = Math.Max(_deviceLocalForceCount, oldState._deviceLocalForceCount); + + _canSwap &= oldState._canSwap; + + _desiredType = CombineTypes(_desiredType, oldState._desiredType); + } + + /// + /// Get the buffer access for the desired backing type, and record that type as now being active. + /// + /// Parent buffer + /// Buffer access + public BufferAccess SwitchAccess(Buffer parent) + { + BufferAccess access = parent.SparseCompatible ? BufferAccess.SparseCompatible : BufferAccess.Default; + + bool isBackendManaged = _systemMemoryType == SystemMemoryType.BackendManaged; + + if (!isBackendManaged) + { + switch (_desiredType) + { + case BufferBackingType.HostMemory: + access |= BufferAccess.HostMemory; + break; + case BufferBackingType.DeviceMemory: + access |= BufferAccess.DeviceMemory; + break; + case BufferBackingType.DeviceMemoryWithFlush: + access |= BufferAccess.DeviceMemoryMapped; + break; + } + } + + _activeType = _desiredType; + + return access; + } + + /// + /// Record when data has been uploaded to the buffer. + /// + public void RecordSet() + { + _setCount++; + + ConsiderUseCounts(); + } + + /// + /// Record when data has been flushed from the buffer. + /// + public void RecordFlush() + { + if (_lastFlushWrite != _writeCount) + { + // If it's on the same page as the last flush, ignore it. + _lastFlushWrite = _writeCount; + _flushCount++; + } + } + + /// + /// Determine if the buffer backing should be changed. + /// + /// True if the desired backing type is different from the current type + public readonly bool ShouldChangeBacking() + { + return _desiredType != _activeType; + } + + /// + /// Determine if the buffer backing should be changed, considering a new use with the given buffer stage. + /// + /// Buffer stage for the use + /// True if the desired backing type is different from the current type + public bool ShouldChangeBacking(BufferStage stage) + { + if (!_canSwap) + { + return false; + } + + BufferStage storageFlags = stage & BufferStage.StorageMask; + + if (storageFlags != 0) + { + if (storageFlags != BufferStage.StorageRead) + { + // Storage write. + _writeCount++; + + var rawStage = stage & BufferStage.StageMask; + + if (rawStage == BufferStage.Fragment) + { + // Switch to device memory, swap back only if this use disappears. + + _desiredType = CombineTypes(_desiredType, BufferBackingType.DeviceMemory); + _deviceLocalForceCount = DeviceLocalForceExpiry; + + // TODO: Might be nice to force atomic access to be device local for any stage. + } + } + + ConsiderUseCounts(); + } + + return _desiredType != _activeType; + } + + /// + /// Evaluate the current counts to determine what the buffer's desired backing type is. + /// This method depends on heuristics devised by testing a variety of software. + /// + private void ConsiderUseCounts() + { + if (_canSwap) + { + if (_writeCount >= WriteCountThreshold || _setCount >= SetCountThreshold || _flushCount >= FlushCountThreshold) + { + if (_deviceLocalForceCount > 0 && --_deviceLocalForceCount != 0) + { + // Some buffer usage demanded that the buffer stay device local. + // The desired type was selected when this counter was set. + } + else if (_flushCount > 0 || _flushTemp-- > 0) + { + // Buffers that flush should ideally be mapped in host address space for easy copies. + // If the buffer is large it will do better on GPU memory, as there will be more writes than data flushes (typically individual pages). + // If it is small, then it's likely most of the buffer will be flushed so we want it on host memory, as access is cached. + _desiredType = _size > DeviceLocalSizeThreshold ? BufferBackingType.DeviceMemoryWithFlush : BufferBackingType.HostMemory; + } + else if (_writeCount >= WriteCountThreshold) + { + // Buffers that are written often should ideally be in the device local heap. (Storage buffers) + _desiredType = BufferBackingType.DeviceMemory; + } + else if (_setCount > SetCountThreshold) + { + // Buffers that have their data set often should ideally be host mapped. (Constant buffers) + _desiredType = BufferBackingType.HostMemory; + } + + // It's harder for a buffer that is flushed to revert to another type of mapping. + if (_flushCount > 0) + { + _flushTemp = 1000; + } + + _lastFlushWrite = -1; + _flushCount = 0; + _writeCount = 0; + _setCount = 0; + } + } + } + } +} diff --git a/src/Ryujinx.Graphics.Gpu/Memory/BufferCache.cs b/src/Ryujinx.Graphics.Gpu/Memory/BufferCache.cs index c6284780..66d2cdb6 100644 --- a/src/Ryujinx.Graphics.Gpu/Memory/BufferCache.cs +++ b/src/Ryujinx.Graphics.Gpu/Memory/BufferCache.cs @@ -107,8 +107,9 @@ namespace Ryujinx.Graphics.Gpu.Memory /// GPU memory manager where the buffer is mapped /// Start GPU virtual address of the buffer /// Size in bytes of the buffer + /// The type of usage that created the buffer /// Contiguous physical range of the buffer, after address translation - public MultiRange TranslateAndCreateBuffer(MemoryManager memoryManager, ulong gpuVa, ulong size) + public MultiRange TranslateAndCreateBuffer(MemoryManager memoryManager, ulong gpuVa, ulong size, BufferStage stage) { if (gpuVa == 0) { @@ -119,7 +120,7 @@ namespace Ryujinx.Graphics.Gpu.Memory if (address != MemoryManager.PteUnmapped) { - CreateBuffer(address, size); + CreateBuffer(address, size, stage); } return new MultiRange(address, size); @@ -132,8 +133,9 @@ namespace Ryujinx.Graphics.Gpu.Memory /// GPU memory manager where the buffer is mapped /// Start GPU virtual address of the buffer /// Size in bytes of the buffer + /// The type of usage that created the buffer /// Physical ranges of the buffer, after address translation - public MultiRange TranslateAndCreateMultiBuffers(MemoryManager memoryManager, ulong gpuVa, ulong size) + public MultiRange TranslateAndCreateMultiBuffers(MemoryManager memoryManager, ulong gpuVa, ulong size, BufferStage stage) { if (gpuVa == 0) { @@ -149,7 +151,7 @@ namespace Ryujinx.Graphics.Gpu.Memory return range; } - CreateBuffer(range); + CreateBuffer(range, stage); return range; } @@ -161,8 +163,9 @@ namespace Ryujinx.Graphics.Gpu.Memory /// GPU memory manager where the buffer is mapped /// Start GPU virtual address of the buffer /// Size in bytes of the buffer + /// The type of usage that created the buffer /// Physical ranges of the buffer, after address translation - public MultiRange TranslateAndCreateMultiBuffersPhysicalOnly(MemoryManager memoryManager, ulong gpuVa, ulong size) + public MultiRange TranslateAndCreateMultiBuffersPhysicalOnly(MemoryManager memoryManager, ulong gpuVa, ulong size, BufferStage stage) { if (gpuVa == 0) { @@ -186,11 +189,11 @@ namespace Ryujinx.Graphics.Gpu.Memory { if (range.Count > 1) { - CreateBuffer(subRange.Address, subRange.Size, SparseBufferAlignmentSize); + CreateBuffer(subRange.Address, subRange.Size, stage, SparseBufferAlignmentSize); } else { - CreateBuffer(subRange.Address, subRange.Size); + CreateBuffer(subRange.Address, subRange.Size, stage); } } } @@ -203,11 +206,12 @@ namespace Ryujinx.Graphics.Gpu.Memory /// This can be used to ensure the existance of a buffer. /// /// Physical ranges of memory where the buffer data is located - public void CreateBuffer(MultiRange range) + /// The type of usage that created the buffer + public void CreateBuffer(MultiRange range, BufferStage stage) { if (range.Count > 1) { - CreateMultiRangeBuffer(range); + CreateMultiRangeBuffer(range, stage); } else { @@ -215,7 +219,7 @@ namespace Ryujinx.Graphics.Gpu.Memory if (subRange.Address != MemoryManager.PteUnmapped) { - CreateBuffer(subRange.Address, subRange.Size); + CreateBuffer(subRange.Address, subRange.Size, stage); } } } @@ -226,7 +230,8 @@ namespace Ryujinx.Graphics.Gpu.Memory /// /// Address of the buffer in memory /// Size of the buffer in bytes - public void CreateBuffer(ulong address, ulong size) + /// The type of usage that created the buffer + public void CreateBuffer(ulong address, ulong size, BufferStage stage) { ulong endAddress = address + size; @@ -239,7 +244,7 @@ namespace Ryujinx.Graphics.Gpu.Memory alignedEndAddress += BufferAlignmentSize; } - CreateBufferAligned(alignedAddress, alignedEndAddress - alignedAddress); + CreateBufferAligned(alignedAddress, alignedEndAddress - alignedAddress, stage); } /// @@ -248,8 +253,9 @@ namespace Ryujinx.Graphics.Gpu.Memory /// /// Address of the buffer in memory /// Size of the buffer in bytes + /// The type of usage that created the buffer /// Alignment of the start address of the buffer in bytes - public void CreateBuffer(ulong address, ulong size, ulong alignment) + public void CreateBuffer(ulong address, ulong size, BufferStage stage, ulong alignment) { ulong alignmentMask = alignment - 1; ulong pageAlignmentMask = BufferAlignmentMask; @@ -264,7 +270,7 @@ namespace Ryujinx.Graphics.Gpu.Memory alignedEndAddress += pageAlignmentMask; } - CreateBufferAligned(alignedAddress, alignedEndAddress - alignedAddress, alignment); + CreateBufferAligned(alignedAddress, alignedEndAddress - alignedAddress, stage, alignment); } /// @@ -272,7 +278,8 @@ namespace Ryujinx.Graphics.Gpu.Memory /// if it does not exist yet. /// /// Physical ranges of memory - private void CreateMultiRangeBuffer(MultiRange range) + /// The type of usage that created the buffer + private void CreateMultiRangeBuffer(MultiRange range, BufferStage stage) { // Ensure all non-contiguous buffer we might use are sparse aligned. for (int i = 0; i < range.Count; i++) @@ -281,7 +288,7 @@ namespace Ryujinx.Graphics.Gpu.Memory if (subRange.Address != MemoryManager.PteUnmapped) { - CreateBuffer(subRange.Address, subRange.Size, SparseBufferAlignmentSize); + CreateBuffer(subRange.Address, subRange.Size, stage, SparseBufferAlignmentSize); } } @@ -431,9 +438,9 @@ namespace Ryujinx.Graphics.Gpu.Memory result.EndGpuAddress < gpuVa + size || result.UnmappedSequence != result.Buffer.UnmappedSequence) { - MultiRange range = TranslateAndCreateBuffer(memoryManager, gpuVa, size); + MultiRange range = TranslateAndCreateBuffer(memoryManager, gpuVa, size, BufferStage.Internal); ulong address = range.GetSubRange(0).Address; - result = new BufferCacheEntry(address, gpuVa, GetBuffer(address, size)); + result = new BufferCacheEntry(address, gpuVa, GetBuffer(address, size, BufferStage.Internal)); _dirtyCache[gpuVa] = result; } @@ -466,9 +473,9 @@ namespace Ryujinx.Graphics.Gpu.Memory result.EndGpuAddress < alignedEndGpuVa || result.UnmappedSequence != result.Buffer.UnmappedSequence) { - MultiRange range = TranslateAndCreateBuffer(memoryManager, alignedGpuVa, size); + MultiRange range = TranslateAndCreateBuffer(memoryManager, alignedGpuVa, size, BufferStage.None); ulong address = range.GetSubRange(0).Address; - result = new BufferCacheEntry(address, alignedGpuVa, GetBuffer(address, size)); + result = new BufferCacheEntry(address, alignedGpuVa, GetBuffer(address, size, BufferStage.None)); _modifiedCache[alignedGpuVa] = result; } @@ -485,7 +492,8 @@ namespace Ryujinx.Graphics.Gpu.Memory /// /// Address of the buffer in guest memory /// Size in bytes of the buffer - private void CreateBufferAligned(ulong address, ulong size) + /// The type of usage that created the buffer + private void CreateBufferAligned(ulong address, ulong size, BufferStage stage) { Buffer[] overlaps = _bufferOverlaps; int overlapsCount = _buffers.FindOverlapsNonOverlapping(address, size, ref overlaps); @@ -546,13 +554,13 @@ namespace Ryujinx.Graphics.Gpu.Memory ulong newSize = endAddress - address; - CreateBufferAligned(address, newSize, anySparseCompatible, overlaps, overlapsCount); + CreateBufferAligned(address, newSize, stage, anySparseCompatible, overlaps, overlapsCount); } } else { // No overlap, just create a new buffer. - Buffer buffer = new(_context, _physicalMemory, address, size, sparseCompatible: false); + Buffer buffer = new(_context, _physicalMemory, address, size, stage, sparseCompatible: false); lock (_buffers) { @@ -570,8 +578,9 @@ namespace Ryujinx.Graphics.Gpu.Memory /// /// Address of the buffer in guest memory /// Size in bytes of the buffer + /// The type of usage that created the buffer /// Alignment of the start address of the buffer - private void CreateBufferAligned(ulong address, ulong size, ulong alignment) + private void CreateBufferAligned(ulong address, ulong size, BufferStage stage, ulong alignment) { Buffer[] overlaps = _bufferOverlaps; int overlapsCount = _buffers.FindOverlapsNonOverlapping(address, size, ref overlaps); @@ -624,13 +633,13 @@ namespace Ryujinx.Graphics.Gpu.Memory ulong newSize = endAddress - address; - CreateBufferAligned(address, newSize, sparseAligned, overlaps, overlapsCount); + CreateBufferAligned(address, newSize, stage, sparseAligned, overlaps, overlapsCount); } } else { // No overlap, just create a new buffer. - Buffer buffer = new(_context, _physicalMemory, address, size, sparseAligned); + Buffer buffer = new(_context, _physicalMemory, address, size, stage, sparseAligned); lock (_buffers) { @@ -648,12 +657,13 @@ namespace Ryujinx.Graphics.Gpu.Memory /// /// Address of the buffer in guest memory /// Size in bytes of the buffer + /// The type of usage that created the buffer /// Indicates if the buffer can be used in a sparse buffer mapping /// Buffers overlapping the range /// Total of overlaps - private void CreateBufferAligned(ulong address, ulong size, bool sparseCompatible, Buffer[] overlaps, int overlapsCount) + private void CreateBufferAligned(ulong address, ulong size, BufferStage stage, bool sparseCompatible, Buffer[] overlaps, int overlapsCount) { - Buffer newBuffer = new Buffer(_context, _physicalMemory, address, size, sparseCompatible, overlaps.Take(overlapsCount)); + Buffer newBuffer = new Buffer(_context, _physicalMemory, address, size, stage, sparseCompatible, overlaps.Take(overlapsCount)); lock (_buffers) { @@ -704,7 +714,7 @@ namespace Ryujinx.Graphics.Gpu.Memory for (int index = 0; index < overlapCount; index++) { - CreateMultiRangeBuffer(overlaps[index].Range); + CreateMultiRangeBuffer(overlaps[index].Range, BufferStage.None); } } @@ -731,8 +741,8 @@ namespace Ryujinx.Graphics.Gpu.Memory /// Size in bytes of the copy public void CopyBuffer(MemoryManager memoryManager, ulong srcVa, ulong dstVa, ulong size) { - MultiRange srcRange = TranslateAndCreateMultiBuffersPhysicalOnly(memoryManager, srcVa, size); - MultiRange dstRange = TranslateAndCreateMultiBuffersPhysicalOnly(memoryManager, dstVa, size); + MultiRange srcRange = TranslateAndCreateMultiBuffersPhysicalOnly(memoryManager, srcVa, size, BufferStage.Copy); + MultiRange dstRange = TranslateAndCreateMultiBuffersPhysicalOnly(memoryManager, dstVa, size, BufferStage.Copy); if (srcRange.Count == 1 && dstRange.Count == 1) { @@ -788,8 +798,8 @@ namespace Ryujinx.Graphics.Gpu.Memory /// Size in bytes of the copy private void CopyBufferSingleRange(MemoryManager memoryManager, ulong srcAddress, ulong dstAddress, ulong size) { - Buffer srcBuffer = GetBuffer(srcAddress, size); - Buffer dstBuffer = GetBuffer(dstAddress, size); + Buffer srcBuffer = GetBuffer(srcAddress, size, BufferStage.Copy); + Buffer dstBuffer = GetBuffer(dstAddress, size, BufferStage.Copy); int srcOffset = (int)(srcAddress - srcBuffer.Address); int dstOffset = (int)(dstAddress - dstBuffer.Address); @@ -803,7 +813,7 @@ namespace Ryujinx.Graphics.Gpu.Memory if (srcBuffer.IsModified(srcAddress, size)) { - dstBuffer.SignalModified(dstAddress, size); + dstBuffer.SignalModified(dstAddress, size, BufferStage.Copy); } else { @@ -828,12 +838,12 @@ namespace Ryujinx.Graphics.Gpu.Memory /// Value to be written into the buffer public void ClearBuffer(MemoryManager memoryManager, ulong gpuVa, ulong size, uint value) { - MultiRange range = TranslateAndCreateMultiBuffersPhysicalOnly(memoryManager, gpuVa, size); + MultiRange range = TranslateAndCreateMultiBuffersPhysicalOnly(memoryManager, gpuVa, size, BufferStage.Copy); for (int index = 0; index < range.Count; index++) { MemoryRange subRange = range.GetSubRange(index); - Buffer buffer = GetBuffer(subRange.Address, subRange.Size); + Buffer buffer = GetBuffer(subRange.Address, subRange.Size, BufferStage.Copy); int offset = (int)(subRange.Address - buffer.Address); @@ -849,18 +859,19 @@ namespace Ryujinx.Graphics.Gpu.Memory /// Gets a buffer sub-range starting at a given memory address, aligned to the next page boundary. /// /// Physical regions of memory where the buffer is mapped + /// Buffer stage that triggered the access /// Whether the buffer will be written to by this use /// The buffer sub-range starting at the given memory address - public BufferRange GetBufferRangeAligned(MultiRange range, bool write = false) + public BufferRange GetBufferRangeAligned(MultiRange range, BufferStage stage, bool write = false) { if (range.Count > 1) { - return GetBuffer(range, write).GetRange(range); + return GetBuffer(range, stage, write).GetRange(range); } else { MemoryRange subRange = range.GetSubRange(0); - return GetBuffer(subRange.Address, subRange.Size, write).GetRangeAligned(subRange.Address, subRange.Size, write); + return GetBuffer(subRange.Address, subRange.Size, stage, write).GetRangeAligned(subRange.Address, subRange.Size, write); } } @@ -868,18 +879,19 @@ namespace Ryujinx.Graphics.Gpu.Memory /// Gets a buffer sub-range for a given memory range. /// /// Physical regions of memory where the buffer is mapped + /// Buffer stage that triggered the access /// Whether the buffer will be written to by this use /// The buffer sub-range for the given range - public BufferRange GetBufferRange(MultiRange range, bool write = false) + public BufferRange GetBufferRange(MultiRange range, BufferStage stage, bool write = false) { if (range.Count > 1) { - return GetBuffer(range, write).GetRange(range); + return GetBuffer(range, stage, write).GetRange(range); } else { MemoryRange subRange = range.GetSubRange(0); - return GetBuffer(subRange.Address, subRange.Size, write).GetRange(subRange.Address, subRange.Size, write); + return GetBuffer(subRange.Address, subRange.Size, stage, write).GetRange(subRange.Address, subRange.Size, write); } } @@ -888,9 +900,10 @@ namespace Ryujinx.Graphics.Gpu.Memory /// A buffer overlapping with the specified range is assumed to already exist on the cache. /// /// Physical regions of memory where the buffer is mapped + /// Buffer stage that triggered the access /// Whether the buffer will be written to by this use /// The buffer where the range is fully contained - private MultiRangeBuffer GetBuffer(MultiRange range, bool write = false) + private MultiRangeBuffer GetBuffer(MultiRange range, BufferStage stage, bool write = false) { for (int i = 0; i < range.Count; i++) { @@ -902,7 +915,7 @@ namespace Ryujinx.Graphics.Gpu.Memory if (write) { - subBuffer.SignalModified(subRange.Address, subRange.Size); + subBuffer.SignalModified(subRange.Address, subRange.Size, stage); } } @@ -935,9 +948,10 @@ namespace Ryujinx.Graphics.Gpu.Memory /// /// Start address of the memory range /// Size in bytes of the memory range + /// Buffer stage that triggered the access /// Whether the buffer will be written to by this use /// The buffer where the range is fully contained - private Buffer GetBuffer(ulong address, ulong size, bool write = false) + private Buffer GetBuffer(ulong address, ulong size, BufferStage stage, bool write = false) { Buffer buffer; @@ -950,7 +964,7 @@ namespace Ryujinx.Graphics.Gpu.Memory if (write) { - buffer.SignalModified(address, size); + buffer.SignalModified(address, size, stage); } } else @@ -1004,6 +1018,18 @@ namespace Ryujinx.Graphics.Gpu.Memory } } + /// + /// Signal that the given buffer's handle has changed, + /// forcing rebind and any overlapping multi-range buffers to be recreated. + /// + /// The buffer that has changed handle + public void BufferBackingChanged(Buffer buffer) + { + NotifyBuffersModified?.Invoke(); + + RecreateMultiRangeBuffers(buffer.Address, buffer.Size); + } + /// /// Prune any invalid entries from a quick access dictionary. /// diff --git a/src/Ryujinx.Graphics.Gpu/Memory/BufferManager.cs b/src/Ryujinx.Graphics.Gpu/Memory/BufferManager.cs index 8f2201e0..26d9501c 100644 --- a/src/Ryujinx.Graphics.Gpu/Memory/BufferManager.cs +++ b/src/Ryujinx.Graphics.Gpu/Memory/BufferManager.cs @@ -156,7 +156,7 @@ namespace Ryujinx.Graphics.Gpu.Memory /// Type of each index buffer element public void SetIndexBuffer(ulong gpuVa, ulong size, IndexType type) { - MultiRange range = _channel.MemoryManager.Physical.BufferCache.TranslateAndCreateBuffer(_channel.MemoryManager, gpuVa, size); + MultiRange range = _channel.MemoryManager.Physical.BufferCache.TranslateAndCreateBuffer(_channel.MemoryManager, gpuVa, size, BufferStage.IndexBuffer); _indexBuffer.Range = range; _indexBuffer.Type = type; @@ -186,7 +186,7 @@ namespace Ryujinx.Graphics.Gpu.Memory /// Vertex divisor of the buffer, for instanced draws public void SetVertexBuffer(int index, ulong gpuVa, ulong size, int stride, int divisor) { - MultiRange range = _channel.MemoryManager.Physical.BufferCache.TranslateAndCreateBuffer(_channel.MemoryManager, gpuVa, size); + MultiRange range = _channel.MemoryManager.Physical.BufferCache.TranslateAndCreateBuffer(_channel.MemoryManager, gpuVa, size, BufferStage.VertexBuffer); _vertexBuffers[index].Range = range; _vertexBuffers[index].Stride = stride; @@ -213,7 +213,7 @@ namespace Ryujinx.Graphics.Gpu.Memory /// Size in bytes of the transform feedback buffer public void SetTransformFeedbackBuffer(int index, ulong gpuVa, ulong size) { - MultiRange range = _channel.MemoryManager.Physical.BufferCache.TranslateAndCreateMultiBuffers(_channel.MemoryManager, gpuVa, size); + MultiRange range = _channel.MemoryManager.Physical.BufferCache.TranslateAndCreateMultiBuffers(_channel.MemoryManager, gpuVa, size, BufferStage.TransformFeedback); _transformFeedbackBuffers[index] = new BufferBounds(range); _transformFeedbackBuffersDirty = true; @@ -260,7 +260,7 @@ namespace Ryujinx.Graphics.Gpu.Memory gpuVa = BitUtils.AlignDown(gpuVa, (ulong)_context.Capabilities.StorageBufferOffsetAlignment); - MultiRange range = _channel.MemoryManager.Physical.BufferCache.TranslateAndCreateMultiBuffers(_channel.MemoryManager, gpuVa, size); + MultiRange range = _channel.MemoryManager.Physical.BufferCache.TranslateAndCreateMultiBuffers(_channel.MemoryManager, gpuVa, size, BufferStageUtils.ComputeStorage(flags)); _cpStorageBuffers.SetBounds(index, range, flags); } @@ -284,7 +284,7 @@ namespace Ryujinx.Graphics.Gpu.Memory gpuVa = BitUtils.AlignDown(gpuVa, (ulong)_context.Capabilities.StorageBufferOffsetAlignment); - MultiRange range = _channel.MemoryManager.Physical.BufferCache.TranslateAndCreateMultiBuffers(_channel.MemoryManager, gpuVa, size); + MultiRange range = _channel.MemoryManager.Physical.BufferCache.TranslateAndCreateMultiBuffers(_channel.MemoryManager, gpuVa, size, BufferStageUtils.GraphicsStorage(stage, flags)); if (!buffers.Buffers[index].Range.Equals(range)) { @@ -303,7 +303,7 @@ namespace Ryujinx.Graphics.Gpu.Memory /// Size in bytes of the storage buffer public void SetComputeUniformBuffer(int index, ulong gpuVa, ulong size) { - MultiRange range = _channel.MemoryManager.Physical.BufferCache.TranslateAndCreateBuffer(_channel.MemoryManager, gpuVa, size); + MultiRange range = _channel.MemoryManager.Physical.BufferCache.TranslateAndCreateBuffer(_channel.MemoryManager, gpuVa, size, BufferStage.Compute); _cpUniformBuffers.SetBounds(index, range); } @@ -318,7 +318,7 @@ namespace Ryujinx.Graphics.Gpu.Memory /// Size in bytes of the storage buffer public void SetGraphicsUniformBuffer(int stage, int index, ulong gpuVa, ulong size) { - MultiRange range = _channel.MemoryManager.Physical.BufferCache.TranslateAndCreateBuffer(_channel.MemoryManager, gpuVa, size); + MultiRange range = _channel.MemoryManager.Physical.BufferCache.TranslateAndCreateBuffer(_channel.MemoryManager, gpuVa, size, BufferStageUtils.FromShaderStage(stage)); _gpUniformBuffers[stage].SetBounds(index, range); _gpUniformBuffersDirty = true; @@ -502,7 +502,7 @@ namespace Ryujinx.Graphics.Gpu.Memory foreach (var binding in _bufferTextures) { var isStore = binding.BindingInfo.Flags.HasFlag(TextureUsageFlags.ImageStore); - var range = bufferCache.GetBufferRange(binding.Range, isStore); + var range = bufferCache.GetBufferRange(binding.Range, BufferStageUtils.TextureBuffer(binding.Stage, binding.BindingInfo.Flags), isStore); binding.Texture.SetStorage(range); // The texture must be rebound to use the new storage if it was updated. @@ -526,7 +526,7 @@ namespace Ryujinx.Graphics.Gpu.Memory foreach (var binding in _bufferTextureArrays) { - var range = bufferCache.GetBufferRange(binding.Range); + var range = bufferCache.GetBufferRange(binding.Range, BufferStage.None); binding.Texture.SetStorage(range); textureArray[0] = binding.Texture; @@ -536,7 +536,7 @@ namespace Ryujinx.Graphics.Gpu.Memory foreach (var binding in _bufferImageArrays) { var isStore = binding.BindingInfo.Flags.HasFlag(TextureUsageFlags.ImageStore); - var range = bufferCache.GetBufferRange(binding.Range, isStore); + var range = bufferCache.GetBufferRange(binding.Range, BufferStage.None, isStore); binding.Texture.SetStorage(range); textureArray[0] = binding.Texture; @@ -565,7 +565,7 @@ namespace Ryujinx.Graphics.Gpu.Memory if (!_indexBuffer.Range.IsUnmapped) { - BufferRange buffer = bufferCache.GetBufferRange(_indexBuffer.Range); + BufferRange buffer = bufferCache.GetBufferRange(_indexBuffer.Range, BufferStage.IndexBuffer); _context.Renderer.Pipeline.SetIndexBuffer(buffer, _indexBuffer.Type); } @@ -597,7 +597,7 @@ namespace Ryujinx.Graphics.Gpu.Memory continue; } - BufferRange buffer = bufferCache.GetBufferRange(vb.Range); + BufferRange buffer = bufferCache.GetBufferRange(vb.Range, BufferStage.VertexBuffer); vertexBuffers[index] = new VertexBufferDescriptor(buffer, vb.Stride, vb.Divisor); } @@ -637,7 +637,7 @@ namespace Ryujinx.Graphics.Gpu.Memory continue; } - tfbs[index] = bufferCache.GetBufferRange(tfb.Range, write: true); + tfbs[index] = bufferCache.GetBufferRange(tfb.Range, BufferStage.TransformFeedback, write: true); } _context.Renderer.Pipeline.SetTransformFeedbackBuffers(tfbs); @@ -684,7 +684,7 @@ namespace Ryujinx.Graphics.Gpu.Memory _context.SupportBufferUpdater.SetTfeOffset(index, tfeOffset); - buffers[index] = new BufferAssignment(index, bufferCache.GetBufferRange(range, write: true)); + buffers[index] = new BufferAssignment(index, bufferCache.GetBufferRange(range, BufferStage.TransformFeedback, write: true)); } } @@ -751,6 +751,7 @@ namespace Ryujinx.Graphics.Gpu.Memory for (ShaderStage stage = ShaderStage.Vertex; stage <= ShaderStage.Fragment; stage++) { ref var buffers = ref bindings[(int)stage - 1]; + BufferStage bufferStage = BufferStageUtils.FromShaderStage(stage); for (int index = 0; index < buffers.Count; index++) { @@ -762,8 +763,8 @@ namespace Ryujinx.Graphics.Gpu.Memory { var isWrite = bounds.Flags.HasFlag(BufferUsageFlags.Write); var range = isStorage - ? bufferCache.GetBufferRangeAligned(bounds.Range, isWrite) - : bufferCache.GetBufferRange(bounds.Range); + ? bufferCache.GetBufferRangeAligned(bounds.Range, bufferStage | BufferStageUtils.FromUsage(bounds.Flags), isWrite) + : bufferCache.GetBufferRange(bounds.Range, bufferStage); ranges[rangesCount++] = new BufferAssignment(bindingInfo.Binding, range); } @@ -799,8 +800,8 @@ namespace Ryujinx.Graphics.Gpu.Memory { var isWrite = bounds.Flags.HasFlag(BufferUsageFlags.Write); var range = isStorage - ? bufferCache.GetBufferRangeAligned(bounds.Range, isWrite) - : bufferCache.GetBufferRange(bounds.Range); + ? bufferCache.GetBufferRangeAligned(bounds.Range, BufferStageUtils.ComputeStorage(bounds.Flags), isWrite) + : bufferCache.GetBufferRange(bounds.Range, BufferStage.Compute); ranges[rangesCount++] = new BufferAssignment(bindingInfo.Binding, range); } @@ -875,7 +876,7 @@ namespace Ryujinx.Graphics.Gpu.Memory Format format, bool isImage) { - _channel.MemoryManager.Physical.BufferCache.CreateBuffer(range); + _channel.MemoryManager.Physical.BufferCache.CreateBuffer(range, BufferStageUtils.TextureBuffer(stage, bindingInfo.Flags)); _bufferTextures.Add(new BufferTextureBinding(stage, texture, range, bindingInfo, format, isImage)); } @@ -883,6 +884,7 @@ namespace Ryujinx.Graphics.Gpu.Memory /// /// Sets the buffer storage of a buffer texture array element. This will be bound when the buffer manager commits bindings. /// + /// Shader stage accessing the texture /// Texture array where the element will be inserted /// Buffer texture /// Physical ranges of memory where the buffer texture data is located @@ -890,6 +892,7 @@ namespace Ryujinx.Graphics.Gpu.Memory /// Index of the binding on the array /// Format of the buffer texture public void SetBufferTextureStorage( + ShaderStage stage, ITextureArray array, ITexture texture, MultiRange range, @@ -897,7 +900,7 @@ namespace Ryujinx.Graphics.Gpu.Memory int index, Format format) { - _channel.MemoryManager.Physical.BufferCache.CreateBuffer(range); + _channel.MemoryManager.Physical.BufferCache.CreateBuffer(range, BufferStageUtils.TextureBuffer(stage, bindingInfo.Flags)); _bufferTextureArrays.Add(new BufferTextureArrayBinding(array, texture, range, bindingInfo, index, format)); } @@ -905,6 +908,7 @@ namespace Ryujinx.Graphics.Gpu.Memory /// /// Sets the buffer storage of a buffer image array element. This will be bound when the buffer manager commits bindings. /// + /// Shader stage accessing the texture /// Image array where the element will be inserted /// Buffer texture /// Physical ranges of memory where the buffer texture data is located @@ -912,6 +916,7 @@ namespace Ryujinx.Graphics.Gpu.Memory /// Index of the binding on the array /// Format of the buffer texture public void SetBufferTextureStorage( + ShaderStage stage, IImageArray array, ITexture texture, MultiRange range, @@ -919,7 +924,7 @@ namespace Ryujinx.Graphics.Gpu.Memory int index, Format format) { - _channel.MemoryManager.Physical.BufferCache.CreateBuffer(range); + _channel.MemoryManager.Physical.BufferCache.CreateBuffer(range, BufferStageUtils.TextureBuffer(stage, bindingInfo.Flags)); _bufferImageArrays.Add(new BufferTextureArrayBinding(array, texture, range, bindingInfo, index, format)); } diff --git a/src/Ryujinx.Graphics.Gpu/Memory/BufferMigration.cs b/src/Ryujinx.Graphics.Gpu/Memory/BufferMigration.cs index 0a526803..ce998531 100644 --- a/src/Ryujinx.Graphics.Gpu/Memory/BufferMigration.cs +++ b/src/Ryujinx.Graphics.Gpu/Memory/BufferMigration.cs @@ -1,37 +1,21 @@ using System; +using System.Threading; namespace Ryujinx.Graphics.Gpu.Memory { /// - /// A record of when buffer data was copied from one buffer to another, along with the SyncNumber when the migration will be complete. - /// Keeps the source buffer alive for data flushes until the migration is complete. + /// A record of when buffer data was copied from multiple buffers to one migration target, + /// along with the SyncNumber when the migration will be complete. + /// Keeps the source buffers alive for data flushes until the migration is complete. + /// All spans cover the full range of the "destination" buffer. /// internal class BufferMigration : IDisposable { /// - /// The offset for the migrated region. + /// Ranges from source buffers that were copied as part of this migration. + /// Ordered by increasing base address. /// - private readonly ulong _offset; - - /// - /// The size for the migrated region. - /// - private readonly ulong _size; - - /// - /// The buffer that was migrated from. - /// - private readonly Buffer _buffer; - - /// - /// The source range action, to be called on overlap with an unreached sync number. - /// - private readonly Action _sourceRangeAction; - - /// - /// The source range list. - /// - private readonly BufferModifiedRangeList _source; + public BufferMigrationSpan[] Spans { get; private set; } /// /// The destination range list. This range list must be updated when flushing the source. @@ -43,55 +27,193 @@ namespace Ryujinx.Graphics.Gpu.Memory /// public readonly ulong SyncNumber; + /// + /// Number of active users there are traversing this migration's spans. + /// + private int _refCount; + + /// + /// Create a new buffer migration. + /// + /// Source spans for the migration + /// Destination buffer range list + /// Sync number where this migration will be complete + public BufferMigration(BufferMigrationSpan[] spans, BufferModifiedRangeList destination, ulong syncNumber) + { + Spans = spans; + Destination = destination; + SyncNumber = syncNumber; + } + + /// + /// Add a span to the migration. Allocates a new array with the target size, and replaces it. + /// + /// + /// The base address for the span is assumed to be higher than all other spans in the migration, + /// to keep the span array ordered. + /// + public void AddSpanToEnd(BufferMigrationSpan span) + { + BufferMigrationSpan[] oldSpans = Spans; + + BufferMigrationSpan[] newSpans = new BufferMigrationSpan[oldSpans.Length + 1]; + + oldSpans.CopyTo(newSpans, 0); + + newSpans[oldSpans.Length] = span; + + Spans = newSpans; + } + + /// + /// Performs the given range action, or one from a migration that overlaps and has not synced yet. + /// + /// The offset to pass to the action + /// The size to pass to the action + /// The sync number that has been reached + /// The action to perform + public void RangeActionWithMigration(ulong offset, ulong size, ulong syncNumber, BufferFlushAction rangeAction) + { + long syncDiff = (long)(syncNumber - SyncNumber); + + if (syncDiff >= 0) + { + // The migration has completed. Run the parent action. + rangeAction(offset, size, syncNumber); + } + else + { + Interlocked.Increment(ref _refCount); + + ulong prevAddress = offset; + ulong endAddress = offset + size; + + foreach (BufferMigrationSpan span in Spans) + { + if (!span.Overlaps(offset, size)) + { + continue; + } + + if (span.Address > prevAddress) + { + // There's a gap between this span and the last (or the start address). Flush the range using the parent action. + + rangeAction(prevAddress, span.Address - prevAddress, syncNumber); + } + + span.RangeActionWithMigration(offset, size, syncNumber); + + prevAddress = span.Address + span.Size; + } + + if (endAddress > prevAddress) + { + // There's a gap at the end of the range with no migration. Flush the range using the parent action. + rangeAction(prevAddress, endAddress - prevAddress, syncNumber); + } + + Interlocked.Decrement(ref _refCount); + } + } + + /// + /// Dispose the buffer migration. This removes the reference from the destination range list, + /// and runs all the dispose buffers for the migration spans. (typically disposes the source buffer) + /// + public void Dispose() + { + while (Volatile.Read(ref _refCount) > 0) + { + // Coming into this method, the sync for the migration will be met, so nothing can increment the ref count. + // However, an existing traversal of the spans for data flush could still be in progress. + // Spin if this is ever the case, so they don't get disposed before the operation is complete. + } + + Destination.RemoveMigration(this); + + foreach (BufferMigrationSpan span in Spans) + { + span.Dispose(); + } + } + } + + /// + /// A record of when buffer data was copied from one buffer to another, for a specific range in a source buffer. + /// Keeps the source buffer alive for data flushes until the migration is complete. + /// + internal readonly struct BufferMigrationSpan : IDisposable + { + /// + /// The offset for the migrated region. + /// + public readonly ulong Address; + + /// + /// The size for the migrated region. + /// + public readonly ulong Size; + + /// + /// The action to perform when the migration isn't needed anymore. + /// + private readonly Action _disposeAction; + + /// + /// The source range action, to be called on overlap with an unreached sync number. + /// + private readonly BufferFlushAction _sourceRangeAction; + + /// + /// Optional migration for the source data. Can chain together if many migrations happen in a short time. + /// If this is null, then _sourceRangeAction will always provide up to date data. + /// + private readonly BufferMigration _source; + /// /// Creates a record for a buffer migration. /// /// The source buffer for this migration + /// The action to perform when the migration isn't needed anymore /// The flush action for the source buffer - /// The modified range list for the source buffer - /// The modified range list for the destination buffer - /// The sync number for when the migration is complete - public BufferMigration( + /// Pending migration for the source buffer + public BufferMigrationSpan( Buffer buffer, - Action sourceRangeAction, - BufferModifiedRangeList source, - BufferModifiedRangeList dest, - ulong syncNumber) + Action disposeAction, + BufferFlushAction sourceRangeAction, + BufferMigration source) { - _offset = buffer.Address; - _size = buffer.Size; - _buffer = buffer; + Address = buffer.Address; + Size = buffer.Size; + _disposeAction = disposeAction; _sourceRangeAction = sourceRangeAction; _source = source; - Destination = dest; - SyncNumber = syncNumber; } + /// + /// Creates a record for a buffer migration, using the default buffer dispose action. + /// + /// The source buffer for this migration + /// The flush action for the source buffer + /// Pending migration for the source buffer + public BufferMigrationSpan( + Buffer buffer, + BufferFlushAction sourceRangeAction, + BufferMigration source) : this(buffer, buffer.DecrementReferenceCount, sourceRangeAction, source) { } + /// /// Determine if the given range overlaps this migration, and has not been completed yet. /// /// Start offset /// Range size - /// The sync number that was waited on /// True if overlapping and in progress, false otherwise - public bool Overlaps(ulong offset, ulong size, ulong syncNumber) + public bool Overlaps(ulong offset, ulong size) { ulong end = offset + size; - ulong destEnd = _offset + _size; - long syncDiff = (long)(syncNumber - SyncNumber); // syncNumber is less if the copy has not completed. + ulong destEnd = Address + Size; - return !(end <= _offset || offset >= destEnd) && syncDiff < 0; - } - - /// - /// Determine if the given range matches this migration. - /// - /// Start offset - /// Range size - /// True if the range exactly matches, false otherwise - public bool FullyMatches(ulong offset, ulong size) - { - return _offset == offset && _size == size; + return !(end <= Address || offset >= destEnd); } /// @@ -100,26 +222,30 @@ namespace Ryujinx.Graphics.Gpu.Memory /// Start offset /// Range size /// Current sync number - /// The modified range list that originally owned this range - public void RangeActionWithMigration(ulong offset, ulong size, ulong syncNumber, BufferModifiedRangeList parent) + public void RangeActionWithMigration(ulong offset, ulong size, ulong syncNumber) { ulong end = offset + size; - end = Math.Min(_offset + _size, end); - offset = Math.Max(_offset, offset); + end = Math.Min(Address + Size, end); + offset = Math.Max(Address, offset); size = end - offset; - _source.RangeActionWithMigration(offset, size, syncNumber, parent, _sourceRangeAction); + if (_source != null) + { + _source.RangeActionWithMigration(offset, size, syncNumber, _sourceRangeAction); + } + else + { + _sourceRangeAction(offset, size, syncNumber); + } } /// - /// Removes this reference to the range list, potentially allowing for the source buffer to be disposed. + /// Removes this migration span, potentially allowing for the source buffer to be disposed. /// public void Dispose() { - Destination.RemoveMigration(this); - - _buffer.DecrementReferenceCount(); + _disposeAction(); } } } diff --git a/src/Ryujinx.Graphics.Gpu/Memory/BufferModifiedRangeList.cs b/src/Ryujinx.Graphics.Gpu/Memory/BufferModifiedRangeList.cs index 6ada8a4b..d330de63 100644 --- a/src/Ryujinx.Graphics.Gpu/Memory/BufferModifiedRangeList.cs +++ b/src/Ryujinx.Graphics.Gpu/Memory/BufferModifiedRangeList.cs @@ -1,7 +1,6 @@ using Ryujinx.Common.Pools; using Ryujinx.Memory.Range; using System; -using System.Collections.Generic; using System.Linq; namespace Ryujinx.Graphics.Gpu.Memory @@ -72,10 +71,10 @@ namespace Ryujinx.Graphics.Gpu.Memory private readonly GpuContext _context; private readonly Buffer _parent; - private readonly Action _flushAction; + private readonly BufferFlushAction _flushAction; - private List _sources; - private BufferMigration _migrationTarget; + private BufferMigration _source; + private BufferModifiedRangeList _migrationTarget; private readonly object _lock = new(); @@ -99,7 +98,7 @@ namespace Ryujinx.Graphics.Gpu.Memory /// GPU context that the buffer range list belongs to /// The parent buffer that owns this range list /// The flush action for the parent buffer - public BufferModifiedRangeList(GpuContext context, Buffer parent, Action flushAction) : base(BackingInitialSize) + public BufferModifiedRangeList(GpuContext context, Buffer parent, BufferFlushAction flushAction) : base(BackingInitialSize) { _context = context; _parent = parent; @@ -199,6 +198,36 @@ namespace Ryujinx.Graphics.Gpu.Memory } } + /// + /// Gets modified ranges within the specified region, and then fires the given action for each range individually. + /// + /// Start address to query + /// Size to query + /// Sync number required for a range to be signalled + /// The action to call for each modified range + public void GetRangesAtSync(ulong address, ulong size, ulong syncNumber, Action rangeAction) + { + int count = 0; + + ref var overlaps = ref ThreadStaticArray.Get(); + + // Range list must be consistent for this operation. + lock (_lock) + { + count = FindOverlapsNonOverlapping(address, size, ref overlaps); + } + + for (int i = 0; i < count; i++) + { + BufferModifiedRange overlap = overlaps[i]; + + if (overlap.SyncNumber == syncNumber) + { + rangeAction(overlap.Address, overlap.Size); + } + } + } + /// /// Gets modified ranges within the specified region, and then fires the given action for each range individually. /// @@ -245,41 +274,16 @@ namespace Ryujinx.Graphics.Gpu.Memory /// The offset to pass to the action /// The size to pass to the action /// The sync number that has been reached - /// The modified range list that originally owned this range /// The action to perform - public void RangeActionWithMigration(ulong offset, ulong size, ulong syncNumber, BufferModifiedRangeList parent, Action rangeAction) + public void RangeActionWithMigration(ulong offset, ulong size, ulong syncNumber, BufferFlushAction rangeAction) { - bool firstSource = true; - - if (parent != this) + if (_source != null) { - lock (_lock) - { - if (_sources != null) - { - foreach (BufferMigration source in _sources) - { - if (source.Overlaps(offset, size, syncNumber)) - { - if (firstSource && !source.FullyMatches(offset, size)) - { - // Perform this buffer's action first. The migrations will run after. - rangeAction(offset, size); - } - - source.RangeActionWithMigration(offset, size, syncNumber, parent); - - firstSource = false; - } - } - } - } + _source.RangeActionWithMigration(offset, size, syncNumber, rangeAction); } - - if (firstSource) + else { - // No overlapping migrations, or they are not meant for this range, flush the data using the given action. - rangeAction(offset, size); + rangeAction(offset, size, syncNumber); } } @@ -319,7 +323,7 @@ namespace Ryujinx.Graphics.Gpu.Memory ClearPart(overlap, clampAddress, clampEnd); - RangeActionWithMigration(clampAddress, clampEnd - clampAddress, waitSync, overlap.Parent, _flushAction); + RangeActionWithMigration(clampAddress, clampEnd - clampAddress, waitSync, _flushAction); } } @@ -329,7 +333,7 @@ namespace Ryujinx.Graphics.Gpu.Memory // There is a migration target to call instead. This can't be changed after set so accessing it outside the lock is fine. - _migrationTarget.Destination.RemoveRangesAndFlush(overlaps, rangeCount, highestDiff, currentSync, address, endAddress); + _migrationTarget.RemoveRangesAndFlush(overlaps, rangeCount, highestDiff, currentSync, address, endAddress); } /// @@ -367,7 +371,7 @@ namespace Ryujinx.Graphics.Gpu.Memory if (rangeCount == -1) { - _migrationTarget.Destination.WaitForAndFlushRanges(address, size); + _migrationTarget.WaitForAndFlushRanges(address, size); return; } @@ -407,6 +411,9 @@ namespace Ryujinx.Graphics.Gpu.Memory /// /// Inherit ranges from another modified range list. /// + /// + /// Assumes that ranges will be inherited in address ascending order. + /// /// The range list to inherit from /// The action to call for each modified range public void InheritRanges(BufferModifiedRangeList ranges, Action registerRangeAction) @@ -415,18 +422,31 @@ namespace Ryujinx.Graphics.Gpu.Memory lock (ranges._lock) { - BufferMigration migration = new(ranges._parent, ranges._flushAction, ranges, this, _context.SyncNumber); - - ranges._parent.IncrementReferenceCount(); - ranges._migrationTarget = migration; - - _context.RegisterBufferMigration(migration); - inheritRanges = ranges.ToArray(); lock (_lock) { - (_sources ??= new List()).Add(migration); + // Copy over the migration from the previous range list + + BufferMigration oldMigration = ranges._source; + + BufferMigrationSpan span = new BufferMigrationSpan(ranges._parent, ranges._flushAction, oldMigration); + ranges._parent.IncrementReferenceCount(); + + if (_source == null) + { + // Create a new migration. + _source = new BufferMigration(new BufferMigrationSpan[] { span }, this, _context.SyncNumber); + + _context.RegisterBufferMigration(_source); + } + else + { + // Extend the migration + _source.AddSpanToEnd(span); + } + + ranges._migrationTarget = this; foreach (BufferModifiedRange range in inheritRanges) { @@ -445,6 +465,27 @@ namespace Ryujinx.Graphics.Gpu.Memory } } + /// + /// Register a migration from previous buffer storage. This migration is from a snapshot of the buffer's + /// current handle to its handle in the future, and is assumed to be complete when the sync action completes. + /// When the migration completes, the handle is disposed. + /// + public void SelfMigration() + { + lock (_lock) + { + BufferMigrationSpan span = new(_parent, _parent.GetSnapshotDisposeAction(), _parent.GetSnapshotFlushAction(), _source); + BufferMigration migration = new(new BufferMigrationSpan[] { span }, this, _context.SyncNumber); + + // Migration target is used to redirect flush actions to the latest range list, + // so we don't need to set it here. (this range list is still the latest) + + _context.RegisterBufferMigration(migration); + + _source = migration; + } + } + /// /// Removes a source buffer migration, indicating its copy has completed. /// @@ -453,7 +494,10 @@ namespace Ryujinx.Graphics.Gpu.Memory { lock (_lock) { - _sources.Remove(migration); + if (_source == migration) + { + _source = null; + } } } diff --git a/src/Ryujinx.Graphics.Gpu/Memory/BufferPreFlush.cs b/src/Ryujinx.Graphics.Gpu/Memory/BufferPreFlush.cs new file mode 100644 index 00000000..d58b9ea6 --- /dev/null +++ b/src/Ryujinx.Graphics.Gpu/Memory/BufferPreFlush.cs @@ -0,0 +1,295 @@ +using Ryujinx.Common; +using Ryujinx.Graphics.GAL; +using System; + +namespace Ryujinx.Graphics.Gpu.Memory +{ + /// + /// Manages flushing ranges from buffers in advance for easy access, if they are flushed often. + /// Typically, from device local memory to a host mapped target for cached access. + /// + internal class BufferPreFlush : IDisposable + { + private const ulong PageSize = MemoryManager.PageSize; + + /// + /// Threshold for the number of copies without a flush required to disable preflush on a page. + /// + private const int DeactivateCopyThreshold = 200; + + /// + /// Value that indicates whether a page has been flushed or copied before. + /// + private enum PreFlushState + { + None, + HasFlushed, + HasCopied + } + + /// + /// Flush state for each page of the buffer. + /// Controls whether data should be copied to the flush buffer, what sync is expected + /// and unflushed copy counting for stopping copies that are no longer needed. + /// + private struct PreFlushPage + { + public PreFlushState State; + public ulong FirstActivatedSync; + public ulong LastCopiedSync; + public int CopyCount; + } + + /// + /// True if there are ranges that should copy to the flush buffer, false otherwise. + /// + public bool ShouldCopy { get; private set; } + + private readonly GpuContext _context; + private readonly Buffer _buffer; + private readonly PreFlushPage[] _pages; + private readonly ulong _address; + private readonly ulong _size; + private readonly ulong _misalignment; + private readonly Action _flushAction; + + private BufferHandle _flushBuffer; + + public BufferPreFlush(GpuContext context, Buffer parent, Action flushAction) + { + _context = context; + _buffer = parent; + _address = parent.Address; + _size = parent.Size; + _pages = new PreFlushPage[BitUtils.DivRoundUp(_size, PageSize)]; + _misalignment = _address & (PageSize - 1); + + _flushAction = flushAction; + } + + /// + /// Ensure that the flush buffer exists. + /// + private void EnsureFlushBuffer() + { + if (_flushBuffer == BufferHandle.Null) + { + _flushBuffer = _context.Renderer.CreateBuffer((int)_size, BufferAccess.HostMemory); + } + } + + /// + /// Gets a page range from an address and size byte range. + /// + /// Range address + /// Range size + /// A page index and count + private (int index, int count) GetPageRange(ulong address, ulong size) + { + ulong offset = address - _address; + ulong endOffset = offset + size; + + int basePage = (int)(offset / PageSize); + int endPage = (int)((endOffset - 1) / PageSize); + + return (basePage, 1 + endPage - basePage); + } + + /// + /// Gets an offset and size range in the parent buffer from a page index and count. + /// + /// Range start page + /// Range page count + /// Offset and size range + private (int offset, int size) GetOffset(int startPage, int count) + { + int offset = (int)((ulong)startPage * PageSize - _misalignment); + int endOffset = (int)((ulong)(startPage + count) * PageSize - _misalignment); + + offset = Math.Max(0, offset); + endOffset = Math.Min((int)_size, endOffset); + + return (offset, endOffset - offset); + } + + /// + /// Copy a range of pages from the parent buffer into the flush buffer. + /// + /// Range start page + /// Range page count + private void CopyPageRange(int startPage, int count) + { + (int offset, int size) = GetOffset(startPage, count); + + EnsureFlushBuffer(); + + _context.Renderer.Pipeline.CopyBuffer(_buffer.Handle, _flushBuffer, offset, offset, size); + } + + /// + /// Copy a modified range into the flush buffer if it's marked as flushed. + /// Any pages the range overlaps are copied, and copies aren't repeated in the same sync number. + /// + /// Range address + /// Range size + public void CopyModified(ulong address, ulong size) + { + (int baseIndex, int count) = GetPageRange(address, size); + ulong syncNumber = _context.SyncNumber; + + int startPage = -1; + + for (int i = 0; i < count; i++) + { + int pageIndex = baseIndex + i; + ref PreFlushPage page = ref _pages[pageIndex]; + + if (page.State > PreFlushState.None) + { + // Perform the copy, and update the state of each page. + if (startPage == -1) + { + startPage = pageIndex; + } + + if (page.State != PreFlushState.HasCopied) + { + page.FirstActivatedSync = syncNumber; + page.State = PreFlushState.HasCopied; + } + else if (page.CopyCount++ >= DeactivateCopyThreshold) + { + page.CopyCount = 0; + page.State = PreFlushState.None; + } + + if (page.LastCopiedSync != syncNumber) + { + page.LastCopiedSync = syncNumber; + } + } + else if (startPage != -1) + { + CopyPageRange(startPage, pageIndex - startPage); + + startPage = -1; + } + } + + if (startPage != -1) + { + CopyPageRange(startPage, (baseIndex + count) - startPage); + } + } + + /// + /// Flush the given page range back into guest memory, optionally using data from the flush buffer. + /// The actual flushed range is an intersection of the page range and the address range. + /// + /// Address range start + /// Address range size + /// Page range start + /// Page range count + /// True if the data should come from the flush buffer + private void FlushPageRange(ulong address, ulong size, int startPage, int count, bool preFlush) + { + (int pageOffset, int pageSize) = GetOffset(startPage, count); + + int offset = (int)(address - _address); + int end = offset + (int)size; + + offset = Math.Max(offset, pageOffset); + end = Math.Min(end, pageOffset + pageSize); + + if (end >= offset) + { + BufferHandle handle = preFlush ? _flushBuffer : _buffer.Handle; + _flushAction(handle, _address + (ulong)offset, (ulong)(end - offset)); + } + } + + /// + /// Flush the given address range back into guest memory, optionally using data from the flush buffer. + /// When a copy has been performed on or before the waited sync number, the data can come from the flush buffer. + /// Otherwise, it flushes the parent buffer directly. + /// + /// Range address + /// Range size + /// Sync number that has been waited for + public void FlushWithAction(ulong address, ulong size, ulong syncNumber) + { + // Copy the parts of the range that have pre-flush copies that have been completed. + // Run the flush action for ranges that don't have pre-flush copies. + + // If a range doesn't have a pre-flush copy, consider adding one. + + (int baseIndex, int count) = GetPageRange(address, size); + + bool rangePreFlushed = false; + int startPage = -1; + + for (int i = 0; i < count; i++) + { + int pageIndex = baseIndex + i; + ref PreFlushPage page = ref _pages[pageIndex]; + + bool flushPage = false; + page.CopyCount = 0; + + if (page.State == PreFlushState.HasCopied) + { + if (syncNumber >= page.FirstActivatedSync) + { + // After the range is first activated, its data will always be copied to the preflush buffer on each sync. + flushPage = true; + } + } + else if (page.State == PreFlushState.None) + { + page.State = PreFlushState.HasFlushed; + ShouldCopy = true; + } + + if (flushPage) + { + if (!rangePreFlushed || startPage == -1) + { + if (startPage != -1) + { + FlushPageRange(address, size, startPage, pageIndex - startPage, false); + } + + rangePreFlushed = true; + startPage = pageIndex; + } + } + else if (rangePreFlushed || startPage == -1) + { + if (startPage != -1) + { + FlushPageRange(address, size, startPage, pageIndex - startPage, true); + } + + rangePreFlushed = false; + startPage = pageIndex; + } + } + + if (startPage != -1) + { + FlushPageRange(address, size, startPage, (baseIndex + count) - startPage, rangePreFlushed); + } + } + + /// + /// Dispose the flush buffer, if present. + /// + public void Dispose() + { + if (_flushBuffer != BufferHandle.Null) + { + _context.Renderer.DeleteBuffer(_flushBuffer); + } + } + } +} diff --git a/src/Ryujinx.Graphics.Gpu/Memory/BufferStage.cs b/src/Ryujinx.Graphics.Gpu/Memory/BufferStage.cs new file mode 100644 index 00000000..d56abda2 --- /dev/null +++ b/src/Ryujinx.Graphics.Gpu/Memory/BufferStage.cs @@ -0,0 +1,99 @@ +using Ryujinx.Graphics.Shader; +using System.Runtime.CompilerServices; + +namespace Ryujinx.Graphics.Gpu.Memory +{ + /// + /// Pipeline stages that can modify buffer data, as well as flags indicating storage usage. + /// Must match ShaderStage for the shader stages, though anything after that can be in any order. + /// + internal enum BufferStage : byte + { + Compute, + Vertex, + TessellationControl, + TessellationEvaluation, + Geometry, + Fragment, + + Indirect, + VertexBuffer, + IndexBuffer, + Copy, + TransformFeedback, + Internal, + None, + + StageMask = 0x3f, + StorageMask = 0xc0, + + StorageRead = 0x40, + StorageWrite = 0x80, + +#pragma warning disable CA1069 // Enums values should not be duplicated + StorageAtomic = 0xc0 +#pragma warning restore CA1069 // Enums values should not be duplicated + } + + /// + /// Utility methods to convert shader stages and binding flags into buffer stages. + /// + internal static class BufferStageUtils + { + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static BufferStage FromShaderStage(ShaderStage stage) + { + return (BufferStage)stage; + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static BufferStage FromShaderStage(int stageIndex) + { + return (BufferStage)(stageIndex + 1); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static BufferStage FromUsage(BufferUsageFlags flags) + { + if (flags.HasFlag(BufferUsageFlags.Write)) + { + return BufferStage.StorageWrite; + } + else + { + return BufferStage.StorageRead; + } + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static BufferStage FromUsage(TextureUsageFlags flags) + { + if (flags.HasFlag(TextureUsageFlags.ImageStore)) + { + return BufferStage.StorageWrite; + } + else + { + return BufferStage.StorageRead; + } + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static BufferStage TextureBuffer(ShaderStage shaderStage, TextureUsageFlags flags) + { + return FromShaderStage(shaderStage) | FromUsage(flags); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static BufferStage GraphicsStorage(int stageIndex, BufferUsageFlags flags) + { + return FromShaderStage(stageIndex) | FromUsage(flags); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static BufferStage ComputeStorage(BufferUsageFlags flags) + { + return BufferStage.Compute | FromUsage(flags); + } + } +} diff --git a/src/Ryujinx.Graphics.OpenGL/OpenGLRenderer.cs b/src/Ryujinx.Graphics.OpenGL/OpenGLRenderer.cs index 2a39ae44..7bcff947 100644 --- a/src/Ryujinx.Graphics.OpenGL/OpenGLRenderer.cs +++ b/src/Ryujinx.Graphics.OpenGL/OpenGLRenderer.cs @@ -61,7 +61,9 @@ namespace Ryujinx.Graphics.OpenGL { BufferCount++; - if (access.HasFlag(GAL.BufferAccess.FlushPersistent)) + var memType = access & GAL.BufferAccess.MemoryTypeMask; + + if (memType == GAL.BufferAccess.HostMemory) { BufferHandle handle = Buffer.CreatePersistent(size); @@ -75,11 +77,6 @@ namespace Ryujinx.Graphics.OpenGL } } - public BufferHandle CreateBuffer(int size, GAL.BufferAccess access, BufferHandle storageHint) - { - return CreateBuffer(size, access); - } - public BufferHandle CreateBuffer(nint pointer, int size) { throw new NotSupportedException(); @@ -148,6 +145,7 @@ namespace Ryujinx.Graphics.OpenGL return new Capabilities( api: TargetApi.OpenGL, vendorName: GpuVendor, + memoryType: SystemMemoryType.BackendManaged, hasFrontFacingBug: intelWindows, hasVectorIndexingBug: amdWindows, needsFragmentOutputSpecialization: false, diff --git a/src/Ryujinx.Graphics.Vulkan/BufferHolder.cs b/src/Ryujinx.Graphics.Vulkan/BufferHolder.cs index 3673ee5a..3dcbc313 100644 --- a/src/Ryujinx.Graphics.Vulkan/BufferHolder.cs +++ b/src/Ryujinx.Graphics.Vulkan/BufferHolder.cs @@ -1,4 +1,3 @@ -using Ryujinx.Common.Logging; using Ryujinx.Graphics.GAL; using Silk.NET.Vulkan; using System; @@ -31,40 +30,29 @@ namespace Ryujinx.Graphics.Vulkan private readonly VulkanRenderer _gd; private readonly Device _device; - private MemoryAllocation _allocation; - private Auto _buffer; - private Auto _allocationAuto; + private readonly MemoryAllocation _allocation; + private readonly Auto _buffer; + private readonly Auto _allocationAuto; private readonly bool _allocationImported; - private ulong _bufferHandle; + private readonly ulong _bufferHandle; private CacheByRange _cachedConvertedBuffers; public int Size { get; } - private IntPtr _map; + private readonly IntPtr _map; - private MultiFenceHolder _waitable; + private readonly MultiFenceHolder _waitable; private bool _lastAccessIsWrite; - private BufferAllocationType _baseType; - private BufferAllocationType _currentType; - private bool _swapQueued; - - public BufferAllocationType DesiredType { get; private set; } - - private int _setCount; - private int _writeCount; - private int _flushCount; - private int _flushTemp; - private int _lastFlushWrite = -1; + private readonly BufferAllocationType _baseType; + private readonly BufferAllocationType _activeType; private readonly ReaderWriterLockSlim _flushLock; private FenceHolder _flushFence; private int _flushWaiting; - private List _swapActions; - private byte[] _pendingData; private BufferMirrorRangeList _pendingDataRanges; private Dictionary _mirrors; @@ -83,8 +71,7 @@ namespace Ryujinx.Graphics.Vulkan _map = allocation.HostPointer; _baseType = type; - _currentType = currentType; - DesiredType = currentType; + _activeType = currentType; _flushLock = new ReaderWriterLockSlim(); _useMirrors = gd.IsTBDR; @@ -104,8 +91,7 @@ namespace Ryujinx.Graphics.Vulkan _map = _allocation.HostPointer + offset; _baseType = type; - _currentType = currentType; - DesiredType = currentType; + _activeType = currentType; _flushLock = new ReaderWriterLockSlim(); } @@ -120,164 +106,11 @@ namespace Ryujinx.Graphics.Vulkan Size = size; _baseType = BufferAllocationType.Sparse; - _currentType = BufferAllocationType.Sparse; - DesiredType = BufferAllocationType.Sparse; + _activeType = BufferAllocationType.Sparse; _flushLock = new ReaderWriterLockSlim(); } - public bool TryBackingSwap(ref CommandBufferScoped? cbs) - { - if (_swapQueued && DesiredType != _currentType) - { - // Only swap if the buffer is not used in any queued command buffer. - bool isRented = _buffer.HasRentedCommandBufferDependency(_gd.CommandBufferPool); - - if (!isRented && _gd.CommandBufferPool.OwnedByCurrentThread && !_flushLock.IsReadLockHeld && (_pendingData == null || cbs != null)) - { - var currentAllocation = _allocationAuto; - var currentBuffer = _buffer; - IntPtr currentMap = _map; - - (VkBuffer buffer, MemoryAllocation allocation, BufferAllocationType resultType) = _gd.BufferManager.CreateBacking(_gd, Size, DesiredType, false, false, _currentType); - - if (buffer.Handle != 0) - { - if (cbs != null) - { - ClearMirrors(cbs.Value, 0, Size); - } - - _flushLock.EnterWriteLock(); - - ClearFlushFence(); - - _waitable = new MultiFenceHolder(Size); - - _allocation = allocation; - _allocationAuto = new Auto(allocation); - _buffer = new Auto(new DisposableBuffer(_gd.Api, _device, buffer), this, _waitable, _allocationAuto); - _bufferHandle = buffer.Handle; - _map = allocation.HostPointer; - - if (_map != IntPtr.Zero && currentMap != IntPtr.Zero) - { - // Copy data directly. Readbacks don't have to wait if this is done. - - unsafe - { - new Span((void*)currentMap, Size).CopyTo(new Span((void*)_map, Size)); - } - } - else - { - cbs ??= _gd.CommandBufferPool.Rent(); - - CommandBufferScoped cbsV = cbs.Value; - - Copy(_gd, cbsV, currentBuffer, _buffer, 0, 0, Size); - - // Need to wait for the data to reach the new buffer before data can be flushed. - - _flushFence = _gd.CommandBufferPool.GetFence(cbsV.CommandBufferIndex); - _flushFence.Get(); - } - - Logger.Debug?.PrintMsg(LogClass.Gpu, $"Converted {Size} buffer {_currentType} to {resultType}"); - - _currentType = resultType; - - if (_swapActions != null) - { - foreach (var action in _swapActions) - { - action(); - } - - _swapActions.Clear(); - } - - currentBuffer.Dispose(); - currentAllocation.Dispose(); - - _gd.PipelineInternal.SwapBuffer(currentBuffer, _buffer); - - _flushLock.ExitWriteLock(); - } - - _swapQueued = false; - - return true; - } - - return false; - } - - _swapQueued = false; - - return true; - } - - private void ConsiderBackingSwap() - { - if (_baseType == BufferAllocationType.Auto) - { - // When flushed, wait for a bit more info to make a decision. - bool wasFlushed = _flushTemp > 0; - int multiplier = wasFlushed ? 2 : 0; - if (_writeCount >= (WriteCountThreshold << multiplier) || _setCount >= (SetCountThreshold << multiplier) || _flushCount >= (FlushCountThreshold << multiplier)) - { - if (_flushCount > 0 || _flushTemp-- > 0) - { - // Buffers that flush should ideally be mapped in host address space for easy copies. - // If the buffer is large it will do better on GPU memory, as there will be more writes than data flushes (typically individual pages). - // If it is small, then it's likely most of the buffer will be flushed so we want it on host memory, as access is cached. - - bool hostMappingSensitive = _gd.Vendor == Vendor.Nvidia; - bool deviceLocalMapped = Size > DeviceLocalSizeThreshold || (wasFlushed && _writeCount > _flushCount * 10 && hostMappingSensitive) || _currentType == BufferAllocationType.DeviceLocalMapped; - - DesiredType = deviceLocalMapped ? BufferAllocationType.DeviceLocalMapped : BufferAllocationType.HostMapped; - - // It's harder for a buffer that is flushed to revert to another type of mapping. - if (_flushCount > 0) - { - _flushTemp = 1000; - } - } - else if (_writeCount >= (WriteCountThreshold << multiplier)) - { - // Buffers that are written often should ideally be in the device local heap. (Storage buffers) - DesiredType = BufferAllocationType.DeviceLocal; - } - else if (_setCount > (SetCountThreshold << multiplier)) - { - // Buffers that have their data set often should ideally be host mapped. (Constant buffers) - DesiredType = BufferAllocationType.HostMapped; - } - - _lastFlushWrite = -1; - _flushCount = 0; - _writeCount = 0; - _setCount = 0; - } - - if (!_swapQueued && DesiredType != _currentType) - { - _swapQueued = true; - - _gd.PipelineInternal.AddBackingSwap(this); - } - } - } - - public void Pin() - { - if (_baseType == BufferAllocationType.Auto) - { - _baseType = _currentType; - } - } - public unsafe Auto CreateView(VkFormat format, int offset, int size, Action invalidateView) { var bufferViewCreateInfo = new BufferViewCreateInfo @@ -291,19 +124,9 @@ namespace Ryujinx.Graphics.Vulkan _gd.Api.CreateBufferView(_device, bufferViewCreateInfo, null, out var bufferView).ThrowOnError(); - (_swapActions ??= new List()).Add(invalidateView); - return new Auto(new DisposableBufferView(_gd.Api, _device, bufferView), this, _waitable, _buffer); } - public void InheritMetrics(BufferHolder other) - { - _setCount = other._setCount; - _writeCount = other._writeCount; - _flushCount = other._flushCount; - _flushTemp = other._flushTemp; - } - public unsafe void InsertBarrier(CommandBuffer commandBuffer, bool isWrite) { // If the last access is write, we always need a barrier to be sure we will read or modify @@ -423,18 +246,8 @@ namespace Ryujinx.Graphics.Vulkan { if (isWrite) { - _writeCount++; - SignalWrite(0, Size); } - else if (isSSBO) - { - // Always consider SSBO access for swapping to device local memory. - - _writeCount++; - - ConsiderBackingSwap(); - } return _buffer; } @@ -443,8 +256,6 @@ namespace Ryujinx.Graphics.Vulkan { if (isWrite) { - _writeCount++; - SignalWrite(offset, size); } @@ -543,8 +354,6 @@ namespace Ryujinx.Graphics.Vulkan public void SignalWrite(int offset, int size) { - ConsiderBackingSwap(); - if (offset == 0 && size == Size) { _cachedConvertedBuffers.Clear(); @@ -624,13 +433,6 @@ namespace Ryujinx.Graphics.Vulkan WaitForFlushFence(); - if (_lastFlushWrite != _writeCount) - { - // If it's on the same page as the last flush, ignore it. - _lastFlushWrite = _writeCount; - _flushCount++; - } - Span result; if (_map != IntPtr.Zero) @@ -711,8 +513,7 @@ namespace Ryujinx.Graphics.Vulkan return; } - _setCount++; - bool allowMirror = _useMirrors && allowCbsWait && cbs != null && _currentType <= BufferAllocationType.HostMapped; + bool allowMirror = _useMirrors && allowCbsWait && cbs != null && _activeType <= BufferAllocationType.HostMapped; if (_map != IntPtr.Zero) { @@ -863,8 +664,6 @@ namespace Ryujinx.Graphics.Vulkan var dstBuffer = GetBuffer(cbs.CommandBuffer, dstOffset, data.Length, true).Get(cbs, dstOffset, data.Length, true).Value; - _writeCount--; - InsertBufferBarrier( _gd, cbs.CommandBuffer, @@ -1100,8 +899,6 @@ namespace Ryujinx.Graphics.Vulkan public void Dispose() { - _swapQueued = false; - _gd.PipelineInternal?.FlushCommandsIfWeightExceeding(_buffer, (ulong)Size); _buffer.Dispose(); diff --git a/src/Ryujinx.Graphics.Vulkan/BufferManager.cs b/src/Ryujinx.Graphics.Vulkan/BufferManager.cs index 33289a0e..e73cde83 100644 --- a/src/Ryujinx.Graphics.Vulkan/BufferManager.cs +++ b/src/Ryujinx.Graphics.Vulkan/BufferManager.cs @@ -165,10 +165,6 @@ namespace Ryujinx.Graphics.Vulkan if (TryGetBuffer(range.Handle, out var existingHolder)) { - // Since this buffer now also owns the memory from the referenced buffer, - // we pin it to ensure the memory location will not change. - existingHolder.Pin(); - (var memory, var offset) = existingHolder.GetDeviceMemoryAndOffset(); memoryBinds[index] = new SparseMemoryBind() @@ -235,10 +231,9 @@ namespace Ryujinx.Graphics.Vulkan int size, bool sparseCompatible = false, BufferAllocationType baseType = BufferAllocationType.HostMapped, - BufferHandle storageHint = default, bool forceMirrors = false) { - return CreateWithHandle(gd, size, out _, sparseCompatible, baseType, storageHint, forceMirrors); + return CreateWithHandle(gd, size, out _, sparseCompatible, baseType, forceMirrors); } public BufferHandle CreateWithHandle( @@ -247,10 +242,9 @@ namespace Ryujinx.Graphics.Vulkan out BufferHolder holder, bool sparseCompatible = false, BufferAllocationType baseType = BufferAllocationType.HostMapped, - BufferHandle storageHint = default, bool forceMirrors = false) { - holder = Create(gd, size, forConditionalRendering: false, sparseCompatible, baseType, storageHint); + holder = Create(gd, size, forConditionalRendering: false, sparseCompatible, baseType); if (holder == null) { return BufferHandle.Null; @@ -387,31 +381,13 @@ namespace Ryujinx.Graphics.Vulkan int size, bool forConditionalRendering = false, bool sparseCompatible = false, - BufferAllocationType baseType = BufferAllocationType.HostMapped, - BufferHandle storageHint = default) + BufferAllocationType baseType = BufferAllocationType.HostMapped) { BufferAllocationType type = baseType; - BufferHolder storageHintHolder = null; if (baseType == BufferAllocationType.Auto) { - if (gd.IsSharedMemory) - { - baseType = BufferAllocationType.HostMapped; - type = baseType; - } - else - { - type = size >= BufferHolder.DeviceLocalSizeThreshold ? BufferAllocationType.DeviceLocal : BufferAllocationType.HostMapped; - } - - if (storageHint != BufferHandle.Null) - { - if (TryGetBuffer(storageHint, out storageHintHolder)) - { - type = storageHintHolder.DesiredType; - } - } + type = BufferAllocationType.HostMapped; } (VkBuffer buffer, MemoryAllocation allocation, BufferAllocationType resultType) = @@ -421,11 +397,6 @@ namespace Ryujinx.Graphics.Vulkan { var holder = new BufferHolder(gd, _device, buffer, allocation, size, baseType, resultType); - if (storageHintHolder != null) - { - holder.InheritMetrics(storageHintHolder); - } - return holder; } diff --git a/src/Ryujinx.Graphics.Vulkan/EnumConversion.cs b/src/Ryujinx.Graphics.Vulkan/EnumConversion.cs index f9243bf8..9d1fd9ff 100644 --- a/src/Ryujinx.Graphics.Vulkan/EnumConversion.cs +++ b/src/Ryujinx.Graphics.Vulkan/EnumConversion.cs @@ -424,10 +424,20 @@ namespace Ryujinx.Graphics.Vulkan public static BufferAllocationType Convert(this BufferAccess access) { - if (access.HasFlag(BufferAccess.FlushPersistent) || access.HasFlag(BufferAccess.Stream)) + BufferAccess memType = access & BufferAccess.MemoryTypeMask; + + if (memType == BufferAccess.HostMemory || access.HasFlag(BufferAccess.Stream)) { return BufferAllocationType.HostMapped; } + else if (memType == BufferAccess.DeviceMemory) + { + return BufferAllocationType.DeviceLocal; + } + else if (memType == BufferAccess.DeviceMemoryMapped) + { + return BufferAllocationType.DeviceLocalMapped; + } return BufferAllocationType.Auto; } diff --git a/src/Ryujinx.Graphics.Vulkan/PipelineFull.cs b/src/Ryujinx.Graphics.Vulkan/PipelineFull.cs index 4987548c..357d517e 100644 --- a/src/Ryujinx.Graphics.Vulkan/PipelineFull.cs +++ b/src/Ryujinx.Graphics.Vulkan/PipelineFull.cs @@ -222,20 +222,6 @@ namespace Ryujinx.Graphics.Vulkan } } - private void TryBackingSwaps() - { - CommandBufferScoped? cbs = null; - - _backingSwaps.RemoveAll(holder => holder.TryBackingSwap(ref cbs)); - - cbs?.Dispose(); - } - - public void AddBackingSwap(BufferHolder holder) - { - _backingSwaps.Add(holder); - } - public void Restore() { if (Pipeline != null) @@ -291,8 +277,6 @@ namespace Ryujinx.Graphics.Vulkan Gd.ResetCounterPool(); - TryBackingSwaps(); - Restore(); } diff --git a/src/Ryujinx.Graphics.Vulkan/VulkanRenderer.cs b/src/Ryujinx.Graphics.Vulkan/VulkanRenderer.cs index 8ef05de3..175d5e3e 100644 --- a/src/Ryujinx.Graphics.Vulkan/VulkanRenderer.cs +++ b/src/Ryujinx.Graphics.Vulkan/VulkanRenderer.cs @@ -486,12 +486,7 @@ namespace Ryujinx.Graphics.Vulkan public BufferHandle CreateBuffer(int size, BufferAccess access) { - return BufferManager.CreateWithHandle(this, size, access.HasFlag(BufferAccess.SparseCompatible), access.Convert(), default, access == BufferAccess.Stream); - } - - public BufferHandle CreateBuffer(int size, BufferAccess access, BufferHandle storageHint) - { - return BufferManager.CreateWithHandle(this, size, access.HasFlag(BufferAccess.SparseCompatible), access.Convert(), storageHint); + return BufferManager.CreateWithHandle(this, size, access.HasFlag(BufferAccess.SparseCompatible), access.Convert(), access.HasFlag(BufferAccess.Stream)); } public BufferHandle CreateBuffer(nint pointer, int size) @@ -675,9 +670,23 @@ namespace Ryujinx.Graphics.Vulkan var limits = _physicalDevice.PhysicalDeviceProperties.Limits; var mainQueueProperties = _physicalDevice.QueueFamilyProperties[QueueFamilyIndex]; + SystemMemoryType memoryType; + + if (IsSharedMemory) + { + memoryType = SystemMemoryType.UnifiedMemory; + } + else + { + memoryType = Vendor == Vendor.Nvidia ? + SystemMemoryType.DedicatedMemorySlowStorage : + SystemMemoryType.DedicatedMemory; + } + return new Capabilities( api: TargetApi.Vulkan, GpuVendor, + memoryType: memoryType, hasFrontFacingBug: IsIntelWindows, hasVectorIndexingBug: Vendor == Vendor.Qualcomm, needsFragmentOutputSpecialization: IsMoltenVk,