diff --git a/src/Ryujinx.Graphics.GAL/BufferAccess.cs b/src/Ryujinx.Graphics.GAL/BufferAccess.cs index faefa518..1e7736f8 100644 --- a/src/Ryujinx.Graphics.GAL/BufferAccess.cs +++ b/src/Ryujinx.Graphics.GAL/BufferAccess.cs @@ -6,8 +6,13 @@ namespace Ryujinx.Graphics.GAL public enum BufferAccess { Default = 0, - FlushPersistent = 1 << 0, - Stream = 1 << 1, - SparseCompatible = 1 << 2, + HostMemory = 1, + DeviceMemory = 2, + DeviceMemoryMapped = 3, + + MemoryTypeMask = 0xf, + + Stream = 1 << 4, + SparseCompatible = 1 << 5, } } diff --git a/src/Ryujinx.Graphics.GAL/Capabilities.cs b/src/Ryujinx.Graphics.GAL/Capabilities.cs index 779ce5b5..d758586a 100644 --- a/src/Ryujinx.Graphics.GAL/Capabilities.cs +++ b/src/Ryujinx.Graphics.GAL/Capabilities.cs @@ -6,6 +6,7 @@ namespace Ryujinx.Graphics.GAL { public readonly TargetApi Api; public readonly string VendorName; + public readonly SystemMemoryType MemoryType; public readonly bool HasFrontFacingBug; public readonly bool HasVectorIndexingBug; @@ -66,6 +67,7 @@ namespace Ryujinx.Graphics.GAL public Capabilities( TargetApi api, string vendorName, + SystemMemoryType memoryType, bool hasFrontFacingBug, bool hasVectorIndexingBug, bool needsFragmentOutputSpecialization, @@ -120,6 +122,7 @@ namespace Ryujinx.Graphics.GAL { Api = api; VendorName = vendorName; + MemoryType = memoryType; HasFrontFacingBug = hasFrontFacingBug; HasVectorIndexingBug = hasVectorIndexingBug; NeedsFragmentOutputSpecialization = needsFragmentOutputSpecialization; diff --git a/src/Ryujinx.Graphics.GAL/IRenderer.cs b/src/Ryujinx.Graphics.GAL/IRenderer.cs index a3466e39..85d0bd72 100644 --- a/src/Ryujinx.Graphics.GAL/IRenderer.cs +++ b/src/Ryujinx.Graphics.GAL/IRenderer.cs @@ -17,7 +17,6 @@ namespace Ryujinx.Graphics.GAL void BackgroundContextAction(Action action, bool alwaysBackground = false); BufferHandle CreateBuffer(int size, BufferAccess access = BufferAccess.Default); - BufferHandle CreateBuffer(int size, BufferAccess access, BufferHandle storageHint); BufferHandle CreateBuffer(nint pointer, int size); BufferHandle CreateBufferSparse(ReadOnlySpan storageBuffers); diff --git a/src/Ryujinx.Graphics.GAL/Multithreading/CommandHelper.cs b/src/Ryujinx.Graphics.GAL/Multithreading/CommandHelper.cs index fd2919be..23f1a64e 100644 --- a/src/Ryujinx.Graphics.GAL/Multithreading/CommandHelper.cs +++ b/src/Ryujinx.Graphics.GAL/Multithreading/CommandHelper.cs @@ -44,7 +44,6 @@ namespace Ryujinx.Graphics.GAL.Multithreading } Register(CommandType.Action); - Register(CommandType.CreateBuffer); Register(CommandType.CreateBufferAccess); Register(CommandType.CreateBufferSparse); Register(CommandType.CreateHostBuffer); diff --git a/src/Ryujinx.Graphics.GAL/Multithreading/CommandType.cs b/src/Ryujinx.Graphics.GAL/Multithreading/CommandType.cs index a5e7336c..f95aab05 100644 --- a/src/Ryujinx.Graphics.GAL/Multithreading/CommandType.cs +++ b/src/Ryujinx.Graphics.GAL/Multithreading/CommandType.cs @@ -3,7 +3,6 @@ namespace Ryujinx.Graphics.GAL.Multithreading enum CommandType : byte { Action, - CreateBuffer, CreateBufferAccess, CreateBufferSparse, CreateHostBuffer, diff --git a/src/Ryujinx.Graphics.GAL/Multithreading/Commands/Renderer/CreateBufferCommand.cs b/src/Ryujinx.Graphics.GAL/Multithreading/Commands/Renderer/CreateBufferCommand.cs deleted file mode 100644 index 60a6e4bf..00000000 --- a/src/Ryujinx.Graphics.GAL/Multithreading/Commands/Renderer/CreateBufferCommand.cs +++ /dev/null @@ -1,31 +0,0 @@ -namespace Ryujinx.Graphics.GAL.Multithreading.Commands.Renderer -{ - struct CreateBufferCommand : IGALCommand, IGALCommand - { - public readonly CommandType CommandType => CommandType.CreateBuffer; - private BufferHandle _threadedHandle; - private int _size; - private BufferAccess _access; - private BufferHandle _storageHint; - - public void Set(BufferHandle threadedHandle, int size, BufferAccess access, BufferHandle storageHint) - { - _threadedHandle = threadedHandle; - _size = size; - _access = access; - _storageHint = storageHint; - } - - public static void Run(ref CreateBufferCommand command, ThreadedRenderer threaded, IRenderer renderer) - { - BufferHandle hint = BufferHandle.Null; - - if (command._storageHint != BufferHandle.Null) - { - hint = threaded.Buffers.MapBuffer(command._storageHint); - } - - threaded.Buffers.AssignBuffer(command._threadedHandle, renderer.CreateBuffer(command._size, command._access, hint)); - } - } -} diff --git a/src/Ryujinx.Graphics.GAL/Multithreading/ThreadedRenderer.cs b/src/Ryujinx.Graphics.GAL/Multithreading/ThreadedRenderer.cs index 5e17bcd2..cc3d2e5c 100644 --- a/src/Ryujinx.Graphics.GAL/Multithreading/ThreadedRenderer.cs +++ b/src/Ryujinx.Graphics.GAL/Multithreading/ThreadedRenderer.cs @@ -272,15 +272,6 @@ namespace Ryujinx.Graphics.GAL.Multithreading return handle; } - public BufferHandle CreateBuffer(int size, BufferAccess access, BufferHandle storageHint) - { - BufferHandle handle = Buffers.CreateBufferHandle(); - New().Set(handle, size, access, storageHint); - QueueCommand(); - - return handle; - } - public BufferHandle CreateBuffer(nint pointer, int size) { BufferHandle handle = Buffers.CreateBufferHandle(); diff --git a/src/Ryujinx.Graphics.GAL/SystemMemoryType.cs b/src/Ryujinx.Graphics.GAL/SystemMemoryType.cs new file mode 100644 index 00000000..53292129 --- /dev/null +++ b/src/Ryujinx.Graphics.GAL/SystemMemoryType.cs @@ -0,0 +1,29 @@ +namespace Ryujinx.Graphics.GAL +{ + public enum SystemMemoryType + { + /// + /// The backend manages the ownership of memory. This mode never supports host imported memory. + /// + BackendManaged, + + /// + /// Device memory has similar performance to host memory, usually because it's shared between CPU/GPU. + /// Use host memory whenever possible. + /// + UnifiedMemory, + + /// + /// GPU storage to host memory goes though a slow interconnect, but it would still be preferable to use it if the data is flushed back often. + /// Assumes constant buffer access to host memory is rather fast. + /// + DedicatedMemory, + + /// + /// GPU storage to host memory goes though a slow interconnect, that is very slow when doing access from storage. + /// When frequently accessed, copy buffers to host memory using DMA. + /// Assumes constant buffer access to host memory is rather fast. + /// + DedicatedMemorySlowStorage + } +} diff --git a/src/Ryujinx.Graphics.Gpu/Engine/MME/MacroHLE.cs b/src/Ryujinx.Graphics.Gpu/Engine/MME/MacroHLE.cs index 7f3772f4..475d1ee4 100644 --- a/src/Ryujinx.Graphics.Gpu/Engine/MME/MacroHLE.cs +++ b/src/Ryujinx.Graphics.Gpu/Engine/MME/MacroHLE.cs @@ -5,6 +5,7 @@ using Ryujinx.Graphics.GAL; using Ryujinx.Graphics.Gpu.Engine.GPFifo; using Ryujinx.Graphics.Gpu.Engine.Threed; using Ryujinx.Graphics.Gpu.Engine.Types; +using Ryujinx.Graphics.Gpu.Memory; using Ryujinx.Memory.Range; using System; using System.Collections.Generic; @@ -495,8 +496,8 @@ namespace Ryujinx.Graphics.Gpu.Engine.MME ulong indirectBufferSize = (ulong)maxDrawCount * (ulong)stride; - MultiRange indirectBufferRange = bufferCache.TranslateAndCreateMultiBuffers(_processor.MemoryManager, indirectBufferGpuVa, indirectBufferSize); - MultiRange parameterBufferRange = bufferCache.TranslateAndCreateMultiBuffers(_processor.MemoryManager, parameterBufferGpuVa, 4); + MultiRange indirectBufferRange = bufferCache.TranslateAndCreateMultiBuffers(_processor.MemoryManager, indirectBufferGpuVa, indirectBufferSize, BufferStage.Indirect); + MultiRange parameterBufferRange = bufferCache.TranslateAndCreateMultiBuffers(_processor.MemoryManager, parameterBufferGpuVa, 4, BufferStage.Indirect); _processor.ThreedClass.DrawIndirect( topology, diff --git a/src/Ryujinx.Graphics.Gpu/Engine/Threed/ComputeDraw/VtgAsComputeContext.cs b/src/Ryujinx.Graphics.Gpu/Engine/Threed/ComputeDraw/VtgAsComputeContext.cs index f9cb40b0..6de50fb2 100644 --- a/src/Ryujinx.Graphics.Gpu/Engine/Threed/ComputeDraw/VtgAsComputeContext.cs +++ b/src/Ryujinx.Graphics.Gpu/Engine/Threed/ComputeDraw/VtgAsComputeContext.cs @@ -438,7 +438,7 @@ namespace Ryujinx.Graphics.Gpu.Engine.Threed.ComputeDraw ReadOnlySpan dataBytes = MemoryMarshal.Cast(data); - BufferHandle buffer = _context.Renderer.CreateBuffer(dataBytes.Length); + BufferHandle buffer = _context.Renderer.CreateBuffer(dataBytes.Length, BufferAccess.DeviceMemory); _context.Renderer.SetBufferData(buffer, 0, dataBytes); return new IndexBuffer(buffer, count, dataBytes.Length); @@ -529,7 +529,7 @@ namespace Ryujinx.Graphics.Gpu.Engine.Threed.ComputeDraw { if (_dummyBuffer == BufferHandle.Null) { - _dummyBuffer = _context.Renderer.CreateBuffer(DummyBufferSize); + _dummyBuffer = _context.Renderer.CreateBuffer(DummyBufferSize, BufferAccess.DeviceMemory); _context.Renderer.Pipeline.ClearBuffer(_dummyBuffer, 0, DummyBufferSize, 0); } @@ -550,7 +550,7 @@ namespace Ryujinx.Graphics.Gpu.Engine.Threed.ComputeDraw _context.Renderer.DeleteBuffer(_sequentialIndexBuffer); } - _sequentialIndexBuffer = _context.Renderer.CreateBuffer(count * sizeof(uint)); + _sequentialIndexBuffer = _context.Renderer.CreateBuffer(count * sizeof(uint), BufferAccess.DeviceMemory); _sequentialIndexBufferCount = count; Span data = new int[count]; @@ -583,7 +583,7 @@ namespace Ryujinx.Graphics.Gpu.Engine.Threed.ComputeDraw _context.Renderer.DeleteBuffer(buffer.Handle); } - buffer.Handle = _context.Renderer.CreateBuffer(newSize); + buffer.Handle = _context.Renderer.CreateBuffer(newSize, BufferAccess.DeviceMemory); buffer.Size = newSize; } diff --git a/src/Ryujinx.Graphics.Gpu/Engine/Threed/ComputeDraw/VtgAsComputeState.cs b/src/Ryujinx.Graphics.Gpu/Engine/Threed/ComputeDraw/VtgAsComputeState.cs index 6324e6a1..73682866 100644 --- a/src/Ryujinx.Graphics.Gpu/Engine/Threed/ComputeDraw/VtgAsComputeState.cs +++ b/src/Ryujinx.Graphics.Gpu/Engine/Threed/ComputeDraw/VtgAsComputeState.cs @@ -3,6 +3,7 @@ using Ryujinx.Common.Logging; using Ryujinx.Graphics.GAL; using Ryujinx.Graphics.Gpu.Engine.Types; using Ryujinx.Graphics.Gpu.Image; +using Ryujinx.Graphics.Gpu.Memory; using Ryujinx.Graphics.Gpu.Shader; using Ryujinx.Graphics.Shader; using Ryujinx.Graphics.Shader.Translation; @@ -370,7 +371,7 @@ namespace Ryujinx.Graphics.Gpu.Engine.Threed.ComputeDraw { var memoryManager = _channel.MemoryManager; - BufferRange range = memoryManager.Physical.BufferCache.GetBufferRange(memoryManager.GetPhysicalRegions(address, size)); + BufferRange range = memoryManager.Physical.BufferCache.GetBufferRange(memoryManager.GetPhysicalRegions(address, size), BufferStage.VertexBuffer); ITexture bufferTexture = _vacContext.EnsureBufferTexture(index + 2, format); bufferTexture.SetStorage(range); @@ -412,7 +413,9 @@ namespace Ryujinx.Graphics.Gpu.Engine.Threed.ComputeDraw var memoryManager = _channel.MemoryManager; ulong misalign = address & ((ulong)_context.Capabilities.TextureBufferOffsetAlignment - 1); - BufferRange range = memoryManager.Physical.BufferCache.GetBufferRange(memoryManager.GetPhysicalRegions(address + indexOffset - misalign, size + misalign)); + BufferRange range = memoryManager.Physical.BufferCache.GetBufferRange( + memoryManager.GetPhysicalRegions(address + indexOffset - misalign, size + misalign), + BufferStage.IndexBuffer); misalignedOffset = (int)misalign >> shift; SetIndexBufferTexture(reservations, range, format); diff --git a/src/Ryujinx.Graphics.Gpu/Engine/Threed/DrawManager.cs b/src/Ryujinx.Graphics.Gpu/Engine/Threed/DrawManager.cs index d8de14de..56ef64c6 100644 --- a/src/Ryujinx.Graphics.Gpu/Engine/Threed/DrawManager.cs +++ b/src/Ryujinx.Graphics.Gpu/Engine/Threed/DrawManager.cs @@ -684,8 +684,8 @@ namespace Ryujinx.Graphics.Gpu.Engine.Threed if (hasCount) { - var indirectBuffer = memory.BufferCache.GetBufferRange(indirectBufferRange); - var parameterBuffer = memory.BufferCache.GetBufferRange(parameterBufferRange); + var indirectBuffer = memory.BufferCache.GetBufferRange(indirectBufferRange, BufferStage.Indirect); + var parameterBuffer = memory.BufferCache.GetBufferRange(parameterBufferRange, BufferStage.Indirect); if (indexed) { @@ -698,7 +698,7 @@ namespace Ryujinx.Graphics.Gpu.Engine.Threed } else { - var indirectBuffer = memory.BufferCache.GetBufferRange(indirectBufferRange); + var indirectBuffer = memory.BufferCache.GetBufferRange(indirectBufferRange, BufferStage.Indirect); if (indexed) { diff --git a/src/Ryujinx.Graphics.Gpu/GpuContext.cs b/src/Ryujinx.Graphics.Gpu/GpuContext.cs index 53ea8cb2..048d32fb 100644 --- a/src/Ryujinx.Graphics.Gpu/GpuContext.cs +++ b/src/Ryujinx.Graphics.Gpu/GpuContext.cs @@ -393,17 +393,18 @@ namespace Ryujinx.Graphics.Gpu if (force || _pendingSync || (syncpoint && SyncpointActions.Count > 0)) { - Renderer.CreateSync(SyncNumber, strict); - foreach (var action in SyncActions) { action.SyncPreAction(syncpoint); } + foreach (var action in SyncpointActions) { action.SyncPreAction(syncpoint); } + Renderer.CreateSync(SyncNumber, strict); + SyncNumber++; SyncActions.RemoveAll(action => action.SyncAction(syncpoint)); diff --git a/src/Ryujinx.Graphics.Gpu/Image/TextureBindingsArrayCache.cs b/src/Ryujinx.Graphics.Gpu/Image/TextureBindingsArrayCache.cs index 7e486e0a..a54d0700 100644 --- a/src/Ryujinx.Graphics.Gpu/Image/TextureBindingsArrayCache.cs +++ b/src/Ryujinx.Graphics.Gpu/Image/TextureBindingsArrayCache.cs @@ -708,11 +708,11 @@ namespace Ryujinx.Graphics.Gpu.Image format = texture.Format; } - _channel.BufferManager.SetBufferTextureStorage(entry.ImageArray, hostTexture, texture.Range, bindingInfo, index, format); + _channel.BufferManager.SetBufferTextureStorage(stage, entry.ImageArray, hostTexture, texture.Range, bindingInfo, index, format); } else { - _channel.BufferManager.SetBufferTextureStorage(entry.TextureArray, hostTexture, texture.Range, bindingInfo, index, format); + _channel.BufferManager.SetBufferTextureStorage(stage, entry.TextureArray, hostTexture, texture.Range, bindingInfo, index, format); } } else if (isImage) @@ -921,11 +921,11 @@ namespace Ryujinx.Graphics.Gpu.Image format = texture.Format; } - _channel.BufferManager.SetBufferTextureStorage(entry.ImageArray, hostTexture, texture.Range, bindingInfo, index, format); + _channel.BufferManager.SetBufferTextureStorage(stage, entry.ImageArray, hostTexture, texture.Range, bindingInfo, index, format); } else { - _channel.BufferManager.SetBufferTextureStorage(entry.TextureArray, hostTexture, texture.Range, bindingInfo, index, format); + _channel.BufferManager.SetBufferTextureStorage(stage, entry.TextureArray, hostTexture, texture.Range, bindingInfo, index, format); } } else if (isImage) diff --git a/src/Ryujinx.Graphics.Gpu/Image/TextureGroup.cs b/src/Ryujinx.Graphics.Gpu/Image/TextureGroup.cs index 4e1133d1..06ca2c59 100644 --- a/src/Ryujinx.Graphics.Gpu/Image/TextureGroup.cs +++ b/src/Ryujinx.Graphics.Gpu/Image/TextureGroup.cs @@ -645,7 +645,7 @@ namespace Ryujinx.Graphics.Gpu.Image } else { - _flushBuffer = _context.Renderer.CreateBuffer((int)Storage.Size, BufferAccess.FlushPersistent); + _flushBuffer = _context.Renderer.CreateBuffer((int)Storage.Size, BufferAccess.HostMemory); _flushBufferImported = false; } diff --git a/src/Ryujinx.Graphics.Gpu/Memory/Buffer.cs b/src/Ryujinx.Graphics.Gpu/Memory/Buffer.cs index d293060b..e060e0b4 100644 --- a/src/Ryujinx.Graphics.Gpu/Memory/Buffer.cs +++ b/src/Ryujinx.Graphics.Gpu/Memory/Buffer.cs @@ -10,6 +10,8 @@ using System.Threading; namespace Ryujinx.Graphics.Gpu.Memory { + delegate void BufferFlushAction(ulong address, ulong size, ulong syncNumber); + /// /// Buffer, used to store vertex and index data, uniform and storage buffers, and others. /// @@ -23,7 +25,7 @@ namespace Ryujinx.Graphics.Gpu.Memory /// /// Host buffer handle. /// - public BufferHandle Handle { get; } + public BufferHandle Handle { get; private set; } /// /// Start address of the buffer in guest memory. @@ -60,6 +62,17 @@ namespace Ryujinx.Graphics.Gpu.Memory /// private BufferModifiedRangeList _modifiedRanges = null; + /// + /// A structure that is used to flush buffer data back to a host mapped buffer for cached readback. + /// Only used if the buffer data is explicitly owned by device local memory. + /// + private BufferPreFlush _preFlush = null; + + /// + /// Usage tracking state that determines what type of backing the buffer should use. + /// + public BufferBackingState BackingState; + private readonly MultiRegionHandle _memoryTrackingGranular; private readonly RegionHandle _memoryTracking; @@ -87,6 +100,7 @@ namespace Ryujinx.Graphics.Gpu.Memory /// Physical memory where the buffer is mapped /// Start address of the buffer /// Size of the buffer in bytes + /// The type of usage that created the buffer /// Indicates if the buffer can be used in a sparse buffer mapping /// Buffers which this buffer contains, and will inherit tracking handles from public Buffer( @@ -94,6 +108,7 @@ namespace Ryujinx.Graphics.Gpu.Memory PhysicalMemory physicalMemory, ulong address, ulong size, + BufferStage stage, bool sparseCompatible, IEnumerable baseBuffers = null) { @@ -103,9 +118,11 @@ namespace Ryujinx.Graphics.Gpu.Memory Size = size; SparseCompatible = sparseCompatible; - BufferAccess access = sparseCompatible ? BufferAccess.SparseCompatible : BufferAccess.Default; + BackingState = new BufferBackingState(_context, this, stage, baseBuffers); - Handle = context.Renderer.CreateBuffer((int)size, access, baseBuffers?.MaxBy(x => x.Size).Handle ?? BufferHandle.Null); + BufferAccess access = BackingState.SwitchAccess(this); + + Handle = context.Renderer.CreateBuffer((int)size, access); _useGranular = size > GranularBufferThreshold; @@ -161,6 +178,29 @@ namespace Ryujinx.Graphics.Gpu.Memory _virtualDependenciesLock = new ReaderWriterLockSlim(); } + /// + /// Recreates the backing buffer based on the desired access type + /// reported by the backing state struct. + /// + private void ChangeBacking() + { + BufferAccess access = BackingState.SwitchAccess(this); + + BufferHandle newHandle = _context.Renderer.CreateBuffer((int)Size, access); + + _context.Renderer.Pipeline.CopyBuffer(Handle, newHandle, 0, 0, (int)Size); + + _modifiedRanges?.SelfMigration(); + + // If swtiching from device local to host mapped, pre-flushing data no longer makes sense. + // This is set to null and disposed when the migration fully completes. + _preFlush = null; + + Handle = newHandle; + + _physicalMemory.BufferCache.BufferBackingChanged(this); + } + /// /// Gets a sub-range from the buffer, from a start address til a page boundary after the given size. /// @@ -246,6 +286,7 @@ namespace Ryujinx.Graphics.Gpu.Memory } else { + BackingState.RecordSet(); _context.Renderer.SetBufferData(Handle, 0, _physicalMemory.GetSpan(Address, (int)Size)); CopyToDependantVirtualBuffers(); } @@ -283,15 +324,35 @@ namespace Ryujinx.Graphics.Gpu.Memory _modifiedRanges ??= new BufferModifiedRangeList(_context, this, Flush); } + /// + /// Checks if a backing change is deemed necessary from the given usage. + /// If it is, queues a backing change to happen on the next sync action. + /// + /// Buffer stage that can change backing type + private void TryQueueBackingChange(BufferStage stage) + { + if (BackingState.ShouldChangeBacking(stage)) + { + if (!_syncActionRegistered) + { + _context.RegisterSyncAction(this); + _syncActionRegistered = true; + } + } + } + /// /// Signal that the given region of the buffer has been modified. /// /// The start address of the modified region /// The size of the modified region - public void SignalModified(ulong address, ulong size) + /// Buffer stage that triggered the modification + public void SignalModified(ulong address, ulong size, BufferStage stage) { EnsureRangeList(); + TryQueueBackingChange(stage); + _modifiedRanges.SignalModified(address, size); if (!_syncActionRegistered) @@ -311,6 +372,37 @@ namespace Ryujinx.Graphics.Gpu.Memory _modifiedRanges?.Clear(address, size); } + /// + /// Action to be performed immediately before sync is created. + /// This will copy any buffer ranges designated for pre-flushing. + /// + /// True if the action is a guest syncpoint + public void SyncPreAction(bool syncpoint) + { + if (_referenceCount == 0) + { + return; + } + + if (BackingState.ShouldChangeBacking()) + { + ChangeBacking(); + } + + if (BackingState.IsDeviceLocal) + { + _preFlush ??= new BufferPreFlush(_context, this, FlushImpl); + + if (_preFlush.ShouldCopy) + { + _modifiedRanges?.GetRangesAtSync(Address, Size, _context.SyncNumber, (address, size) => + { + _preFlush.CopyModified(address, size); + }); + } + } + } + /// /// Action to be performed when a syncpoint is reached after modification. /// This will register read/write tracking to flush the buffer from GPU when its memory is used. @@ -466,6 +558,8 @@ namespace Ryujinx.Graphics.Gpu.Memory /// Size of the modified region private void LoadRegion(ulong mAddress, ulong mSize) { + BackingState.RecordSet(); + int offset = (int)(mAddress - Address); _context.Renderer.SetBufferData(Handle, offset, _physicalMemory.GetSpan(mAddress, (int)mSize)); @@ -539,18 +633,84 @@ namespace Ryujinx.Graphics.Gpu.Memory /// Flushes a range of the buffer. /// This writes the range data back into guest memory. /// + /// Buffer handle to flush data from /// Start address of the range /// Size in bytes of the range - public void Flush(ulong address, ulong size) + private void FlushImpl(BufferHandle handle, ulong address, ulong size) { int offset = (int)(address - Address); - using PinnedSpan data = _context.Renderer.GetBufferData(Handle, offset, (int)size); + using PinnedSpan data = _context.Renderer.GetBufferData(handle, offset, (int)size); // TODO: When write tracking shaders, they will need to be aware of changes in overlapping buffers. _physicalMemory.WriteUntracked(address, CopyFromDependantVirtualBuffers(data.Get(), address, size)); } + /// + /// Flushes a range of the buffer. + /// This writes the range data back into guest memory. + /// + /// Start address of the range + /// Size in bytes of the range + private void FlushImpl(ulong address, ulong size) + { + FlushImpl(Handle, address, size); + } + + /// + /// Flushes a range of the buffer from the most optimal source. + /// This writes the range data back into guest memory. + /// + /// Start address of the range + /// Size in bytes of the range + /// Sync number waited for before flushing the data + public void Flush(ulong address, ulong size, ulong syncNumber) + { + BackingState.RecordFlush(); + + BufferPreFlush preFlush = _preFlush; + + if (preFlush != null) + { + preFlush.FlushWithAction(address, size, syncNumber); + } + else + { + FlushImpl(address, size); + } + } + /// + /// Gets an action that disposes the backing buffer using its current handle. + /// Useful for deleting an old copy of the buffer after the handle changes. + /// + /// An action that flushes data from the specified range, using the buffer handle at the time the method is generated + public Action GetSnapshotDisposeAction() + { + BufferHandle handle = Handle; + BufferPreFlush preFlush = _preFlush; + + return () => + { + _context.Renderer.DeleteBuffer(handle); + preFlush?.Dispose(); + }; + } + + /// + /// Gets an action that flushes a range of the buffer using its current handle. + /// Useful for flushing data from old copies of the buffer after the handle changes. + /// + /// An action that flushes data from the specified range, using the buffer handle at the time the method is generated + public BufferFlushAction GetSnapshotFlushAction() + { + BufferHandle handle = Handle; + + return (ulong address, ulong size, ulong _) => + { + FlushImpl(handle, address, size); + }; + } + /// /// Align a given address and size region to page boundaries. /// @@ -857,6 +1017,8 @@ namespace Ryujinx.Graphics.Gpu.Memory _modifiedRanges?.Clear(); _context.Renderer.DeleteBuffer(Handle); + _preFlush?.Dispose(); + _preFlush = null; UnmappedSequence++; } diff --git a/src/Ryujinx.Graphics.Gpu/Memory/BufferBackingState.cs b/src/Ryujinx.Graphics.Gpu/Memory/BufferBackingState.cs new file mode 100644 index 00000000..3f65131e --- /dev/null +++ b/src/Ryujinx.Graphics.Gpu/Memory/BufferBackingState.cs @@ -0,0 +1,294 @@ +using Ryujinx.Graphics.GAL; +using System; +using System.Collections.Generic; + +namespace Ryujinx.Graphics.Gpu.Memory +{ + /// + /// Type of backing memory. + /// In ascending order of priority when merging multiple buffer backing states. + /// + internal enum BufferBackingType + { + HostMemory, + DeviceMemory, + DeviceMemoryWithFlush + } + + /// + /// Keeps track of buffer usage to decide what memory heap that buffer memory is placed on. + /// Dedicated GPUs prefer certain types of resources to be device local, + /// and if we need data to be read back, we might prefer that they're in host memory. + /// + /// The measurements recorded here compare to a set of heruristics (thresholds and conditions) + /// that appear to produce good performance in most software. + /// + internal struct BufferBackingState + { + private const int DeviceLocalSizeThreshold = 256 * 1024; // 256kb + + private const int SetCountThreshold = 100; + private const int WriteCountThreshold = 50; + private const int FlushCountThreshold = 5; + private const int DeviceLocalForceExpiry = 100; + + public readonly bool IsDeviceLocal => _activeType != BufferBackingType.HostMemory; + + private readonly SystemMemoryType _systemMemoryType; + private BufferBackingType _activeType; + private BufferBackingType _desiredType; + + private bool _canSwap; + + private int _setCount; + private int _writeCount; + private int _flushCount; + private int _flushTemp; + private int _lastFlushWrite; + private int _deviceLocalForceCount; + + private readonly int _size; + + /// + /// Initialize the buffer backing state for a given parent buffer. + /// + /// GPU context + /// Parent buffer + /// Initial buffer stage + /// Buffers to inherit state from + public BufferBackingState(GpuContext context, Buffer parent, BufferStage stage, IEnumerable baseBuffers = null) + { + _size = (int)parent.Size; + _systemMemoryType = context.Capabilities.MemoryType; + + // Backend managed is always auto, unified memory is always host. + _desiredType = BufferBackingType.HostMemory; + _canSwap = _systemMemoryType != SystemMemoryType.BackendManaged && _systemMemoryType != SystemMemoryType.UnifiedMemory; + + if (_canSwap) + { + // Might want to start certain buffers as being device local, + // and the usage might also lock those buffers into being device local. + + BufferStage storageFlags = stage & BufferStage.StorageMask; + + if (parent.Size > DeviceLocalSizeThreshold && baseBuffers == null) + { + _desiredType = BufferBackingType.DeviceMemory; + } + + if (storageFlags != 0) + { + // Storage buffer bindings may require special treatment. + + var rawStage = stage & BufferStage.StageMask; + + if (rawStage == BufferStage.Fragment) + { + // Fragment read should start device local. + + _desiredType = BufferBackingType.DeviceMemory; + + if (storageFlags != BufferStage.StorageRead) + { + // Fragment write should stay device local until the use doesn't happen anymore. + + _deviceLocalForceCount = DeviceLocalForceExpiry; + } + } + + // TODO: Might be nice to force atomic access to be device local for any stage. + } + + if (baseBuffers != null) + { + foreach (Buffer buffer in baseBuffers) + { + CombineState(buffer.BackingState); + } + } + } + } + + /// + /// Combine buffer backing types, selecting the one with highest priority. + /// + /// First buffer backing type + /// Second buffer backing type + /// Combined buffer backing type + private static BufferBackingType CombineTypes(BufferBackingType left, BufferBackingType right) + { + return (BufferBackingType)Math.Max((int)left, (int)right); + } + + /// + /// Combine the state from the given buffer backing state with this one, + /// so that the state isn't lost when migrating buffers. + /// + /// Buffer state to combine into this state + private void CombineState(BufferBackingState oldState) + { + _setCount += oldState._setCount; + _writeCount += oldState._writeCount; + _flushCount += oldState._flushCount; + _flushTemp += oldState._flushTemp; + _lastFlushWrite = -1; + _deviceLocalForceCount = Math.Max(_deviceLocalForceCount, oldState._deviceLocalForceCount); + + _canSwap &= oldState._canSwap; + + _desiredType = CombineTypes(_desiredType, oldState._desiredType); + } + + /// + /// Get the buffer access for the desired backing type, and record that type as now being active. + /// + /// Parent buffer + /// Buffer access + public BufferAccess SwitchAccess(Buffer parent) + { + BufferAccess access = parent.SparseCompatible ? BufferAccess.SparseCompatible : BufferAccess.Default; + + bool isBackendManaged = _systemMemoryType == SystemMemoryType.BackendManaged; + + if (!isBackendManaged) + { + switch (_desiredType) + { + case BufferBackingType.HostMemory: + access |= BufferAccess.HostMemory; + break; + case BufferBackingType.DeviceMemory: + access |= BufferAccess.DeviceMemory; + break; + case BufferBackingType.DeviceMemoryWithFlush: + access |= BufferAccess.DeviceMemoryMapped; + break; + } + } + + _activeType = _desiredType; + + return access; + } + + /// + /// Record when data has been uploaded to the buffer. + /// + public void RecordSet() + { + _setCount++; + + ConsiderUseCounts(); + } + + /// + /// Record when data has been flushed from the buffer. + /// + public void RecordFlush() + { + if (_lastFlushWrite != _writeCount) + { + // If it's on the same page as the last flush, ignore it. + _lastFlushWrite = _writeCount; + _flushCount++; + } + } + + /// + /// Determine if the buffer backing should be changed. + /// + /// True if the desired backing type is different from the current type + public readonly bool ShouldChangeBacking() + { + return _desiredType != _activeType; + } + + /// + /// Determine if the buffer backing should be changed, considering a new use with the given buffer stage. + /// + /// Buffer stage for the use + /// True if the desired backing type is different from the current type + public bool ShouldChangeBacking(BufferStage stage) + { + if (!_canSwap) + { + return false; + } + + BufferStage storageFlags = stage & BufferStage.StorageMask; + + if (storageFlags != 0) + { + if (storageFlags != BufferStage.StorageRead) + { + // Storage write. + _writeCount++; + + var rawStage = stage & BufferStage.StageMask; + + if (rawStage == BufferStage.Fragment) + { + // Switch to device memory, swap back only if this use disappears. + + _desiredType = CombineTypes(_desiredType, BufferBackingType.DeviceMemory); + _deviceLocalForceCount = DeviceLocalForceExpiry; + + // TODO: Might be nice to force atomic access to be device local for any stage. + } + } + + ConsiderUseCounts(); + } + + return _desiredType != _activeType; + } + + /// + /// Evaluate the current counts to determine what the buffer's desired backing type is. + /// This method depends on heuristics devised by testing a variety of software. + /// + private void ConsiderUseCounts() + { + if (_canSwap) + { + if (_writeCount >= WriteCountThreshold || _setCount >= SetCountThreshold || _flushCount >= FlushCountThreshold) + { + if (_deviceLocalForceCount > 0 && --_deviceLocalForceCount != 0) + { + // Some buffer usage demanded that the buffer stay device local. + // The desired type was selected when this counter was set. + } + else if (_flushCount > 0 || _flushTemp-- > 0) + { + // Buffers that flush should ideally be mapped in host address space for easy copies. + // If the buffer is large it will do better on GPU memory, as there will be more writes than data flushes (typically individual pages). + // If it is small, then it's likely most of the buffer will be flushed so we want it on host memory, as access is cached. + _desiredType = _size > DeviceLocalSizeThreshold ? BufferBackingType.DeviceMemoryWithFlush : BufferBackingType.HostMemory; + } + else if (_writeCount >= WriteCountThreshold) + { + // Buffers that are written often should ideally be in the device local heap. (Storage buffers) + _desiredType = BufferBackingType.DeviceMemory; + } + else if (_setCount > SetCountThreshold) + { + // Buffers that have their data set often should ideally be host mapped. (Constant buffers) + _desiredType = BufferBackingType.HostMemory; + } + + // It's harder for a buffer that is flushed to revert to another type of mapping. + if (_flushCount > 0) + { + _flushTemp = 1000; + } + + _lastFlushWrite = -1; + _flushCount = 0; + _writeCount = 0; + _setCount = 0; + } + } + } + } +} diff --git a/src/Ryujinx.Graphics.Gpu/Memory/BufferCache.cs b/src/Ryujinx.Graphics.Gpu/Memory/BufferCache.cs index c6284780..66d2cdb6 100644 --- a/src/Ryujinx.Graphics.Gpu/Memory/BufferCache.cs +++ b/src/Ryujinx.Graphics.Gpu/Memory/BufferCache.cs @@ -107,8 +107,9 @@ namespace Ryujinx.Graphics.Gpu.Memory /// GPU memory manager where the buffer is mapped /// Start GPU virtual address of the buffer /// Size in bytes of the buffer + /// The type of usage that created the buffer /// Contiguous physical range of the buffer, after address translation - public MultiRange TranslateAndCreateBuffer(MemoryManager memoryManager, ulong gpuVa, ulong size) + public MultiRange TranslateAndCreateBuffer(MemoryManager memoryManager, ulong gpuVa, ulong size, BufferStage stage) { if (gpuVa == 0) { @@ -119,7 +120,7 @@ namespace Ryujinx.Graphics.Gpu.Memory if (address != MemoryManager.PteUnmapped) { - CreateBuffer(address, size); + CreateBuffer(address, size, stage); } return new MultiRange(address, size); @@ -132,8 +133,9 @@ namespace Ryujinx.Graphics.Gpu.Memory /// GPU memory manager where the buffer is mapped /// Start GPU virtual address of the buffer /// Size in bytes of the buffer + /// The type of usage that created the buffer /// Physical ranges of the buffer, after address translation - public MultiRange TranslateAndCreateMultiBuffers(MemoryManager memoryManager, ulong gpuVa, ulong size) + public MultiRange TranslateAndCreateMultiBuffers(MemoryManager memoryManager, ulong gpuVa, ulong size, BufferStage stage) { if (gpuVa == 0) { @@ -149,7 +151,7 @@ namespace Ryujinx.Graphics.Gpu.Memory return range; } - CreateBuffer(range); + CreateBuffer(range, stage); return range; } @@ -161,8 +163,9 @@ namespace Ryujinx.Graphics.Gpu.Memory /// GPU memory manager where the buffer is mapped /// Start GPU virtual address of the buffer /// Size in bytes of the buffer + /// The type of usage that created the buffer /// Physical ranges of the buffer, after address translation - public MultiRange TranslateAndCreateMultiBuffersPhysicalOnly(MemoryManager memoryManager, ulong gpuVa, ulong size) + public MultiRange TranslateAndCreateMultiBuffersPhysicalOnly(MemoryManager memoryManager, ulong gpuVa, ulong size, BufferStage stage) { if (gpuVa == 0) { @@ -186,11 +189,11 @@ namespace Ryujinx.Graphics.Gpu.Memory { if (range.Count > 1) { - CreateBuffer(subRange.Address, subRange.Size, SparseBufferAlignmentSize); + CreateBuffer(subRange.Address, subRange.Size, stage, SparseBufferAlignmentSize); } else { - CreateBuffer(subRange.Address, subRange.Size); + CreateBuffer(subRange.Address, subRange.Size, stage); } } } @@ -203,11 +206,12 @@ namespace Ryujinx.Graphics.Gpu.Memory /// This can be used to ensure the existance of a buffer. /// /// Physical ranges of memory where the buffer data is located - public void CreateBuffer(MultiRange range) + /// The type of usage that created the buffer + public void CreateBuffer(MultiRange range, BufferStage stage) { if (range.Count > 1) { - CreateMultiRangeBuffer(range); + CreateMultiRangeBuffer(range, stage); } else { @@ -215,7 +219,7 @@ namespace Ryujinx.Graphics.Gpu.Memory if (subRange.Address != MemoryManager.PteUnmapped) { - CreateBuffer(subRange.Address, subRange.Size); + CreateBuffer(subRange.Address, subRange.Size, stage); } } } @@ -226,7 +230,8 @@ namespace Ryujinx.Graphics.Gpu.Memory /// /// Address of the buffer in memory /// Size of the buffer in bytes - public void CreateBuffer(ulong address, ulong size) + /// The type of usage that created the buffer + public void CreateBuffer(ulong address, ulong size, BufferStage stage) { ulong endAddress = address + size; @@ -239,7 +244,7 @@ namespace Ryujinx.Graphics.Gpu.Memory alignedEndAddress += BufferAlignmentSize; } - CreateBufferAligned(alignedAddress, alignedEndAddress - alignedAddress); + CreateBufferAligned(alignedAddress, alignedEndAddress - alignedAddress, stage); } /// @@ -248,8 +253,9 @@ namespace Ryujinx.Graphics.Gpu.Memory /// /// Address of the buffer in memory /// Size of the buffer in bytes + /// The type of usage that created the buffer /// Alignment of the start address of the buffer in bytes - public void CreateBuffer(ulong address, ulong size, ulong alignment) + public void CreateBuffer(ulong address, ulong size, BufferStage stage, ulong alignment) { ulong alignmentMask = alignment - 1; ulong pageAlignmentMask = BufferAlignmentMask; @@ -264,7 +270,7 @@ namespace Ryujinx.Graphics.Gpu.Memory alignedEndAddress += pageAlignmentMask; } - CreateBufferAligned(alignedAddress, alignedEndAddress - alignedAddress, alignment); + CreateBufferAligned(alignedAddress, alignedEndAddress - alignedAddress, stage, alignment); } /// @@ -272,7 +278,8 @@ namespace Ryujinx.Graphics.Gpu.Memory /// if it does not exist yet. /// /// Physical ranges of memory - private void CreateMultiRangeBuffer(MultiRange range) + /// The type of usage that created the buffer + private void CreateMultiRangeBuffer(MultiRange range, BufferStage stage) { // Ensure all non-contiguous buffer we might use are sparse aligned. for (int i = 0; i < range.Count; i++) @@ -281,7 +288,7 @@ namespace Ryujinx.Graphics.Gpu.Memory if (subRange.Address != MemoryManager.PteUnmapped) { - CreateBuffer(subRange.Address, subRange.Size, SparseBufferAlignmentSize); + CreateBuffer(subRange.Address, subRange.Size, stage, SparseBufferAlignmentSize); } } @@ -431,9 +438,9 @@ namespace Ryujinx.Graphics.Gpu.Memory result.EndGpuAddress < gpuVa + size || result.UnmappedSequence != result.Buffer.UnmappedSequence) { - MultiRange range = TranslateAndCreateBuffer(memoryManager, gpuVa, size); + MultiRange range = TranslateAndCreateBuffer(memoryManager, gpuVa, size, BufferStage.Internal); ulong address = range.GetSubRange(0).Address; - result = new BufferCacheEntry(address, gpuVa, GetBuffer(address, size)); + result = new BufferCacheEntry(address, gpuVa, GetBuffer(address, size, BufferStage.Internal)); _dirtyCache[gpuVa] = result; } @@ -466,9 +473,9 @@ namespace Ryujinx.Graphics.Gpu.Memory result.EndGpuAddress < alignedEndGpuVa || result.UnmappedSequence != result.Buffer.UnmappedSequence) { - MultiRange range = TranslateAndCreateBuffer(memoryManager, alignedGpuVa, size); + MultiRange range = TranslateAndCreateBuffer(memoryManager, alignedGpuVa, size, BufferStage.None); ulong address = range.GetSubRange(0).Address; - result = new BufferCacheEntry(address, alignedGpuVa, GetBuffer(address, size)); + result = new BufferCacheEntry(address, alignedGpuVa, GetBuffer(address, size, BufferStage.None)); _modifiedCache[alignedGpuVa] = result; } @@ -485,7 +492,8 @@ namespace Ryujinx.Graphics.Gpu.Memory /// /// Address of the buffer in guest memory /// Size in bytes of the buffer - private void CreateBufferAligned(ulong address, ulong size) + /// The type of usage that created the buffer + private void CreateBufferAligned(ulong address, ulong size, BufferStage stage) { Buffer[] overlaps = _bufferOverlaps; int overlapsCount = _buffers.FindOverlapsNonOverlapping(address, size, ref overlaps); @@ -546,13 +554,13 @@ namespace Ryujinx.Graphics.Gpu.Memory ulong newSize = endAddress - address; - CreateBufferAligned(address, newSize, anySparseCompatible, overlaps, overlapsCount); + CreateBufferAligned(address, newSize, stage, anySparseCompatible, overlaps, overlapsCount); } } else { // No overlap, just create a new buffer. - Buffer buffer = new(_context, _physicalMemory, address, size, sparseCompatible: false); + Buffer buffer = new(_context, _physicalMemory, address, size, stage, sparseCompatible: false); lock (_buffers) { @@ -570,8 +578,9 @@ namespace Ryujinx.Graphics.Gpu.Memory /// /// Address of the buffer in guest memory /// Size in bytes of the buffer + /// The type of usage that created the buffer /// Alignment of the start address of the buffer - private void CreateBufferAligned(ulong address, ulong size, ulong alignment) + private void CreateBufferAligned(ulong address, ulong size, BufferStage stage, ulong alignment) { Buffer[] overlaps = _bufferOverlaps; int overlapsCount = _buffers.FindOverlapsNonOverlapping(address, size, ref overlaps); @@ -624,13 +633,13 @@ namespace Ryujinx.Graphics.Gpu.Memory ulong newSize = endAddress - address; - CreateBufferAligned(address, newSize, sparseAligned, overlaps, overlapsCount); + CreateBufferAligned(address, newSize, stage, sparseAligned, overlaps, overlapsCount); } } else { // No overlap, just create a new buffer. - Buffer buffer = new(_context, _physicalMemory, address, size, sparseAligned); + Buffer buffer = new(_context, _physicalMemory, address, size, stage, sparseAligned); lock (_buffers) { @@ -648,12 +657,13 @@ namespace Ryujinx.Graphics.Gpu.Memory /// /// Address of the buffer in guest memory /// Size in bytes of the buffer + /// The type of usage that created the buffer /// Indicates if the buffer can be used in a sparse buffer mapping /// Buffers overlapping the range /// Total of overlaps - private void CreateBufferAligned(ulong address, ulong size, bool sparseCompatible, Buffer[] overlaps, int overlapsCount) + private void CreateBufferAligned(ulong address, ulong size, BufferStage stage, bool sparseCompatible, Buffer[] overlaps, int overlapsCount) { - Buffer newBuffer = new Buffer(_context, _physicalMemory, address, size, sparseCompatible, overlaps.Take(overlapsCount)); + Buffer newBuffer = new Buffer(_context, _physicalMemory, address, size, stage, sparseCompatible, overlaps.Take(overlapsCount)); lock (_buffers) { @@ -704,7 +714,7 @@ namespace Ryujinx.Graphics.Gpu.Memory for (int index = 0; index < overlapCount; index++) { - CreateMultiRangeBuffer(overlaps[index].Range); + CreateMultiRangeBuffer(overlaps[index].Range, BufferStage.None); } } @@ -731,8 +741,8 @@ namespace Ryujinx.Graphics.Gpu.Memory /// Size in bytes of the copy public void CopyBuffer(MemoryManager memoryManager, ulong srcVa, ulong dstVa, ulong size) { - MultiRange srcRange = TranslateAndCreateMultiBuffersPhysicalOnly(memoryManager, srcVa, size); - MultiRange dstRange = TranslateAndCreateMultiBuffersPhysicalOnly(memoryManager, dstVa, size); + MultiRange srcRange = TranslateAndCreateMultiBuffersPhysicalOnly(memoryManager, srcVa, size, BufferStage.Copy); + MultiRange dstRange = TranslateAndCreateMultiBuffersPhysicalOnly(memoryManager, dstVa, size, BufferStage.Copy); if (srcRange.Count == 1 && dstRange.Count == 1) { @@ -788,8 +798,8 @@ namespace Ryujinx.Graphics.Gpu.Memory /// Size in bytes of the copy private void CopyBufferSingleRange(MemoryManager memoryManager, ulong srcAddress, ulong dstAddress, ulong size) { - Buffer srcBuffer = GetBuffer(srcAddress, size); - Buffer dstBuffer = GetBuffer(dstAddress, size); + Buffer srcBuffer = GetBuffer(srcAddress, size, BufferStage.Copy); + Buffer dstBuffer = GetBuffer(dstAddress, size, BufferStage.Copy); int srcOffset = (int)(srcAddress - srcBuffer.Address); int dstOffset = (int)(dstAddress - dstBuffer.Address); @@ -803,7 +813,7 @@ namespace Ryujinx.Graphics.Gpu.Memory if (srcBuffer.IsModified(srcAddress, size)) { - dstBuffer.SignalModified(dstAddress, size); + dstBuffer.SignalModified(dstAddress, size, BufferStage.Copy); } else { @@ -828,12 +838,12 @@ namespace Ryujinx.Graphics.Gpu.Memory /// Value to be written into the buffer public void ClearBuffer(MemoryManager memoryManager, ulong gpuVa, ulong size, uint value) { - MultiRange range = TranslateAndCreateMultiBuffersPhysicalOnly(memoryManager, gpuVa, size); + MultiRange range = TranslateAndCreateMultiBuffersPhysicalOnly(memoryManager, gpuVa, size, BufferStage.Copy); for (int index = 0; index < range.Count; index++) { MemoryRange subRange = range.GetSubRange(index); - Buffer buffer = GetBuffer(subRange.Address, subRange.Size); + Buffer buffer = GetBuffer(subRange.Address, subRange.Size, BufferStage.Copy); int offset = (int)(subRange.Address - buffer.Address); @@ -849,18 +859,19 @@ namespace Ryujinx.Graphics.Gpu.Memory /// Gets a buffer sub-range starting at a given memory address, aligned to the next page boundary. /// /// Physical regions of memory where the buffer is mapped + /// Buffer stage that triggered the access /// Whether the buffer will be written to by this use /// The buffer sub-range starting at the given memory address - public BufferRange GetBufferRangeAligned(MultiRange range, bool write = false) + public BufferRange GetBufferRangeAligned(MultiRange range, BufferStage stage, bool write = false) { if (range.Count > 1) { - return GetBuffer(range, write).GetRange(range); + return GetBuffer(range, stage, write).GetRange(range); } else { MemoryRange subRange = range.GetSubRange(0); - return GetBuffer(subRange.Address, subRange.Size, write).GetRangeAligned(subRange.Address, subRange.Size, write); + return GetBuffer(subRange.Address, subRange.Size, stage, write).GetRangeAligned(subRange.Address, subRange.Size, write); } } @@ -868,18 +879,19 @@ namespace Ryujinx.Graphics.Gpu.Memory /// Gets a buffer sub-range for a given memory range. /// /// Physical regions of memory where the buffer is mapped + /// Buffer stage that triggered the access /// Whether the buffer will be written to by this use /// The buffer sub-range for the given range - public BufferRange GetBufferRange(MultiRange range, bool write = false) + public BufferRange GetBufferRange(MultiRange range, BufferStage stage, bool write = false) { if (range.Count > 1) { - return GetBuffer(range, write).GetRange(range); + return GetBuffer(range, stage, write).GetRange(range); } else { MemoryRange subRange = range.GetSubRange(0); - return GetBuffer(subRange.Address, subRange.Size, write).GetRange(subRange.Address, subRange.Size, write); + return GetBuffer(subRange.Address, subRange.Size, stage, write).GetRange(subRange.Address, subRange.Size, write); } } @@ -888,9 +900,10 @@ namespace Ryujinx.Graphics.Gpu.Memory /// A buffer overlapping with the specified range is assumed to already exist on the cache. /// /// Physical regions of memory where the buffer is mapped + /// Buffer stage that triggered the access /// Whether the buffer will be written to by this use /// The buffer where the range is fully contained - private MultiRangeBuffer GetBuffer(MultiRange range, bool write = false) + private MultiRangeBuffer GetBuffer(MultiRange range, BufferStage stage, bool write = false) { for (int i = 0; i < range.Count; i++) { @@ -902,7 +915,7 @@ namespace Ryujinx.Graphics.Gpu.Memory if (write) { - subBuffer.SignalModified(subRange.Address, subRange.Size); + subBuffer.SignalModified(subRange.Address, subRange.Size, stage); } } @@ -935,9 +948,10 @@ namespace Ryujinx.Graphics.Gpu.Memory /// /// Start address of the memory range /// Size in bytes of the memory range + /// Buffer stage that triggered the access /// Whether the buffer will be written to by this use /// The buffer where the range is fully contained - private Buffer GetBuffer(ulong address, ulong size, bool write = false) + private Buffer GetBuffer(ulong address, ulong size, BufferStage stage, bool write = false) { Buffer buffer; @@ -950,7 +964,7 @@ namespace Ryujinx.Graphics.Gpu.Memory if (write) { - buffer.SignalModified(address, size); + buffer.SignalModified(address, size, stage); } } else @@ -1004,6 +1018,18 @@ namespace Ryujinx.Graphics.Gpu.Memory } } + /// + /// Signal that the given buffer's handle has changed, + /// forcing rebind and any overlapping multi-range buffers to be recreated. + /// + /// The buffer that has changed handle + public void BufferBackingChanged(Buffer buffer) + { + NotifyBuffersModified?.Invoke(); + + RecreateMultiRangeBuffers(buffer.Address, buffer.Size); + } + /// /// Prune any invalid entries from a quick access dictionary. /// diff --git a/src/Ryujinx.Graphics.Gpu/Memory/BufferManager.cs b/src/Ryujinx.Graphics.Gpu/Memory/BufferManager.cs index 8f2201e0..26d9501c 100644 --- a/src/Ryujinx.Graphics.Gpu/Memory/BufferManager.cs +++ b/src/Ryujinx.Graphics.Gpu/Memory/BufferManager.cs @@ -156,7 +156,7 @@ namespace Ryujinx.Graphics.Gpu.Memory /// Type of each index buffer element public void SetIndexBuffer(ulong gpuVa, ulong size, IndexType type) { - MultiRange range = _channel.MemoryManager.Physical.BufferCache.TranslateAndCreateBuffer(_channel.MemoryManager, gpuVa, size); + MultiRange range = _channel.MemoryManager.Physical.BufferCache.TranslateAndCreateBuffer(_channel.MemoryManager, gpuVa, size, BufferStage.IndexBuffer); _indexBuffer.Range = range; _indexBuffer.Type = type; @@ -186,7 +186,7 @@ namespace Ryujinx.Graphics.Gpu.Memory /// Vertex divisor of the buffer, for instanced draws public void SetVertexBuffer(int index, ulong gpuVa, ulong size, int stride, int divisor) { - MultiRange range = _channel.MemoryManager.Physical.BufferCache.TranslateAndCreateBuffer(_channel.MemoryManager, gpuVa, size); + MultiRange range = _channel.MemoryManager.Physical.BufferCache.TranslateAndCreateBuffer(_channel.MemoryManager, gpuVa, size, BufferStage.VertexBuffer); _vertexBuffers[index].Range = range; _vertexBuffers[index].Stride = stride; @@ -213,7 +213,7 @@ namespace Ryujinx.Graphics.Gpu.Memory /// Size in bytes of the transform feedback buffer public void SetTransformFeedbackBuffer(int index, ulong gpuVa, ulong size) { - MultiRange range = _channel.MemoryManager.Physical.BufferCache.TranslateAndCreateMultiBuffers(_channel.MemoryManager, gpuVa, size); + MultiRange range = _channel.MemoryManager.Physical.BufferCache.TranslateAndCreateMultiBuffers(_channel.MemoryManager, gpuVa, size, BufferStage.TransformFeedback); _transformFeedbackBuffers[index] = new BufferBounds(range); _transformFeedbackBuffersDirty = true; @@ -260,7 +260,7 @@ namespace Ryujinx.Graphics.Gpu.Memory gpuVa = BitUtils.AlignDown(gpuVa, (ulong)_context.Capabilities.StorageBufferOffsetAlignment); - MultiRange range = _channel.MemoryManager.Physical.BufferCache.TranslateAndCreateMultiBuffers(_channel.MemoryManager, gpuVa, size); + MultiRange range = _channel.MemoryManager.Physical.BufferCache.TranslateAndCreateMultiBuffers(_channel.MemoryManager, gpuVa, size, BufferStageUtils.ComputeStorage(flags)); _cpStorageBuffers.SetBounds(index, range, flags); } @@ -284,7 +284,7 @@ namespace Ryujinx.Graphics.Gpu.Memory gpuVa = BitUtils.AlignDown(gpuVa, (ulong)_context.Capabilities.StorageBufferOffsetAlignment); - MultiRange range = _channel.MemoryManager.Physical.BufferCache.TranslateAndCreateMultiBuffers(_channel.MemoryManager, gpuVa, size); + MultiRange range = _channel.MemoryManager.Physical.BufferCache.TranslateAndCreateMultiBuffers(_channel.MemoryManager, gpuVa, size, BufferStageUtils.GraphicsStorage(stage, flags)); if (!buffers.Buffers[index].Range.Equals(range)) { @@ -303,7 +303,7 @@ namespace Ryujinx.Graphics.Gpu.Memory /// Size in bytes of the storage buffer public void SetComputeUniformBuffer(int index, ulong gpuVa, ulong size) { - MultiRange range = _channel.MemoryManager.Physical.BufferCache.TranslateAndCreateBuffer(_channel.MemoryManager, gpuVa, size); + MultiRange range = _channel.MemoryManager.Physical.BufferCache.TranslateAndCreateBuffer(_channel.MemoryManager, gpuVa, size, BufferStage.Compute); _cpUniformBuffers.SetBounds(index, range); } @@ -318,7 +318,7 @@ namespace Ryujinx.Graphics.Gpu.Memory /// Size in bytes of the storage buffer public void SetGraphicsUniformBuffer(int stage, int index, ulong gpuVa, ulong size) { - MultiRange range = _channel.MemoryManager.Physical.BufferCache.TranslateAndCreateBuffer(_channel.MemoryManager, gpuVa, size); + MultiRange range = _channel.MemoryManager.Physical.BufferCache.TranslateAndCreateBuffer(_channel.MemoryManager, gpuVa, size, BufferStageUtils.FromShaderStage(stage)); _gpUniformBuffers[stage].SetBounds(index, range); _gpUniformBuffersDirty = true; @@ -502,7 +502,7 @@ namespace Ryujinx.Graphics.Gpu.Memory foreach (var binding in _bufferTextures) { var isStore = binding.BindingInfo.Flags.HasFlag(TextureUsageFlags.ImageStore); - var range = bufferCache.GetBufferRange(binding.Range, isStore); + var range = bufferCache.GetBufferRange(binding.Range, BufferStageUtils.TextureBuffer(binding.Stage, binding.BindingInfo.Flags), isStore); binding.Texture.SetStorage(range); // The texture must be rebound to use the new storage if it was updated. @@ -526,7 +526,7 @@ namespace Ryujinx.Graphics.Gpu.Memory foreach (var binding in _bufferTextureArrays) { - var range = bufferCache.GetBufferRange(binding.Range); + var range = bufferCache.GetBufferRange(binding.Range, BufferStage.None); binding.Texture.SetStorage(range); textureArray[0] = binding.Texture; @@ -536,7 +536,7 @@ namespace Ryujinx.Graphics.Gpu.Memory foreach (var binding in _bufferImageArrays) { var isStore = binding.BindingInfo.Flags.HasFlag(TextureUsageFlags.ImageStore); - var range = bufferCache.GetBufferRange(binding.Range, isStore); + var range = bufferCache.GetBufferRange(binding.Range, BufferStage.None, isStore); binding.Texture.SetStorage(range); textureArray[0] = binding.Texture; @@ -565,7 +565,7 @@ namespace Ryujinx.Graphics.Gpu.Memory if (!_indexBuffer.Range.IsUnmapped) { - BufferRange buffer = bufferCache.GetBufferRange(_indexBuffer.Range); + BufferRange buffer = bufferCache.GetBufferRange(_indexBuffer.Range, BufferStage.IndexBuffer); _context.Renderer.Pipeline.SetIndexBuffer(buffer, _indexBuffer.Type); } @@ -597,7 +597,7 @@ namespace Ryujinx.Graphics.Gpu.Memory continue; } - BufferRange buffer = bufferCache.GetBufferRange(vb.Range); + BufferRange buffer = bufferCache.GetBufferRange(vb.Range, BufferStage.VertexBuffer); vertexBuffers[index] = new VertexBufferDescriptor(buffer, vb.Stride, vb.Divisor); } @@ -637,7 +637,7 @@ namespace Ryujinx.Graphics.Gpu.Memory continue; } - tfbs[index] = bufferCache.GetBufferRange(tfb.Range, write: true); + tfbs[index] = bufferCache.GetBufferRange(tfb.Range, BufferStage.TransformFeedback, write: true); } _context.Renderer.Pipeline.SetTransformFeedbackBuffers(tfbs); @@ -684,7 +684,7 @@ namespace Ryujinx.Graphics.Gpu.Memory _context.SupportBufferUpdater.SetTfeOffset(index, tfeOffset); - buffers[index] = new BufferAssignment(index, bufferCache.GetBufferRange(range, write: true)); + buffers[index] = new BufferAssignment(index, bufferCache.GetBufferRange(range, BufferStage.TransformFeedback, write: true)); } } @@ -751,6 +751,7 @@ namespace Ryujinx.Graphics.Gpu.Memory for (ShaderStage stage = ShaderStage.Vertex; stage <= ShaderStage.Fragment; stage++) { ref var buffers = ref bindings[(int)stage - 1]; + BufferStage bufferStage = BufferStageUtils.FromShaderStage(stage); for (int index = 0; index < buffers.Count; index++) { @@ -762,8 +763,8 @@ namespace Ryujinx.Graphics.Gpu.Memory { var isWrite = bounds.Flags.HasFlag(BufferUsageFlags.Write); var range = isStorage - ? bufferCache.GetBufferRangeAligned(bounds.Range, isWrite) - : bufferCache.GetBufferRange(bounds.Range); + ? bufferCache.GetBufferRangeAligned(bounds.Range, bufferStage | BufferStageUtils.FromUsage(bounds.Flags), isWrite) + : bufferCache.GetBufferRange(bounds.Range, bufferStage); ranges[rangesCount++] = new BufferAssignment(bindingInfo.Binding, range); } @@ -799,8 +800,8 @@ namespace Ryujinx.Graphics.Gpu.Memory { var isWrite = bounds.Flags.HasFlag(BufferUsageFlags.Write); var range = isStorage - ? bufferCache.GetBufferRangeAligned(bounds.Range, isWrite) - : bufferCache.GetBufferRange(bounds.Range); + ? bufferCache.GetBufferRangeAligned(bounds.Range, BufferStageUtils.ComputeStorage(bounds.Flags), isWrite) + : bufferCache.GetBufferRange(bounds.Range, BufferStage.Compute); ranges[rangesCount++] = new BufferAssignment(bindingInfo.Binding, range); } @@ -875,7 +876,7 @@ namespace Ryujinx.Graphics.Gpu.Memory Format format, bool isImage) { - _channel.MemoryManager.Physical.BufferCache.CreateBuffer(range); + _channel.MemoryManager.Physical.BufferCache.CreateBuffer(range, BufferStageUtils.TextureBuffer(stage, bindingInfo.Flags)); _bufferTextures.Add(new BufferTextureBinding(stage, texture, range, bindingInfo, format, isImage)); } @@ -883,6 +884,7 @@ namespace Ryujinx.Graphics.Gpu.Memory /// /// Sets the buffer storage of a buffer texture array element. This will be bound when the buffer manager commits bindings. /// + /// Shader stage accessing the texture /// Texture array where the element will be inserted /// Buffer texture /// Physical ranges of memory where the buffer texture data is located @@ -890,6 +892,7 @@ namespace Ryujinx.Graphics.Gpu.Memory /// Index of the binding on the array /// Format of the buffer texture public void SetBufferTextureStorage( + ShaderStage stage, ITextureArray array, ITexture texture, MultiRange range, @@ -897,7 +900,7 @@ namespace Ryujinx.Graphics.Gpu.Memory int index, Format format) { - _channel.MemoryManager.Physical.BufferCache.CreateBuffer(range); + _channel.MemoryManager.Physical.BufferCache.CreateBuffer(range, BufferStageUtils.TextureBuffer(stage, bindingInfo.Flags)); _bufferTextureArrays.Add(new BufferTextureArrayBinding(array, texture, range, bindingInfo, index, format)); } @@ -905,6 +908,7 @@ namespace Ryujinx.Graphics.Gpu.Memory /// /// Sets the buffer storage of a buffer image array element. This will be bound when the buffer manager commits bindings. /// + /// Shader stage accessing the texture /// Image array where the element will be inserted /// Buffer texture /// Physical ranges of memory where the buffer texture data is located @@ -912,6 +916,7 @@ namespace Ryujinx.Graphics.Gpu.Memory /// Index of the binding on the array /// Format of the buffer texture public void SetBufferTextureStorage( + ShaderStage stage, IImageArray array, ITexture texture, MultiRange range, @@ -919,7 +924,7 @@ namespace Ryujinx.Graphics.Gpu.Memory int index, Format format) { - _channel.MemoryManager.Physical.BufferCache.CreateBuffer(range); + _channel.MemoryManager.Physical.BufferCache.CreateBuffer(range, BufferStageUtils.TextureBuffer(stage, bindingInfo.Flags)); _bufferImageArrays.Add(new BufferTextureArrayBinding(array, texture, range, bindingInfo, index, format)); } diff --git a/src/Ryujinx.Graphics.Gpu/Memory/BufferMigration.cs b/src/Ryujinx.Graphics.Gpu/Memory/BufferMigration.cs index 0a526803..ce998531 100644 --- a/src/Ryujinx.Graphics.Gpu/Memory/BufferMigration.cs +++ b/src/Ryujinx.Graphics.Gpu/Memory/BufferMigration.cs @@ -1,37 +1,21 @@ using System; +using System.Threading; namespace Ryujinx.Graphics.Gpu.Memory { /// - /// A record of when buffer data was copied from one buffer to another, along with the SyncNumber when the migration will be complete. - /// Keeps the source buffer alive for data flushes until the migration is complete. + /// A record of when buffer data was copied from multiple buffers to one migration target, + /// along with the SyncNumber when the migration will be complete. + /// Keeps the source buffers alive for data flushes until the migration is complete. + /// All spans cover the full range of the "destination" buffer. /// internal class BufferMigration : IDisposable { /// - /// The offset for the migrated region. + /// Ranges from source buffers that were copied as part of this migration. + /// Ordered by increasing base address. /// - private readonly ulong _offset; - - /// - /// The size for the migrated region. - /// - private readonly ulong _size; - - /// - /// The buffer that was migrated from. - /// - private readonly Buffer _buffer; - - /// - /// The source range action, to be called on overlap with an unreached sync number. - /// - private readonly Action _sourceRangeAction; - - /// - /// The source range list. - /// - private readonly BufferModifiedRangeList _source; + public BufferMigrationSpan[] Spans { get; private set; } /// /// The destination range list. This range list must be updated when flushing the source. @@ -43,55 +27,193 @@ namespace Ryujinx.Graphics.Gpu.Memory /// public readonly ulong SyncNumber; + /// + /// Number of active users there are traversing this migration's spans. + /// + private int _refCount; + + /// + /// Create a new buffer migration. + /// + /// Source spans for the migration + /// Destination buffer range list + /// Sync number where this migration will be complete + public BufferMigration(BufferMigrationSpan[] spans, BufferModifiedRangeList destination, ulong syncNumber) + { + Spans = spans; + Destination = destination; + SyncNumber = syncNumber; + } + + /// + /// Add a span to the migration. Allocates a new array with the target size, and replaces it. + /// + /// + /// The base address for the span is assumed to be higher than all other spans in the migration, + /// to keep the span array ordered. + /// + public void AddSpanToEnd(BufferMigrationSpan span) + { + BufferMigrationSpan[] oldSpans = Spans; + + BufferMigrationSpan[] newSpans = new BufferMigrationSpan[oldSpans.Length + 1]; + + oldSpans.CopyTo(newSpans, 0); + + newSpans[oldSpans.Length] = span; + + Spans = newSpans; + } + + /// + /// Performs the given range action, or one from a migration that overlaps and has not synced yet. + /// + /// The offset to pass to the action + /// The size to pass to the action + /// The sync number that has been reached + /// The action to perform + public void RangeActionWithMigration(ulong offset, ulong size, ulong syncNumber, BufferFlushAction rangeAction) + { + long syncDiff = (long)(syncNumber - SyncNumber); + + if (syncDiff >= 0) + { + // The migration has completed. Run the parent action. + rangeAction(offset, size, syncNumber); + } + else + { + Interlocked.Increment(ref _refCount); + + ulong prevAddress = offset; + ulong endAddress = offset + size; + + foreach (BufferMigrationSpan span in Spans) + { + if (!span.Overlaps(offset, size)) + { + continue; + } + + if (span.Address > prevAddress) + { + // There's a gap between this span and the last (or the start address). Flush the range using the parent action. + + rangeAction(prevAddress, span.Address - prevAddress, syncNumber); + } + + span.RangeActionWithMigration(offset, size, syncNumber); + + prevAddress = span.Address + span.Size; + } + + if (endAddress > prevAddress) + { + // There's a gap at the end of the range with no migration. Flush the range using the parent action. + rangeAction(prevAddress, endAddress - prevAddress, syncNumber); + } + + Interlocked.Decrement(ref _refCount); + } + } + + /// + /// Dispose the buffer migration. This removes the reference from the destination range list, + /// and runs all the dispose buffers for the migration spans. (typically disposes the source buffer) + /// + public void Dispose() + { + while (Volatile.Read(ref _refCount) > 0) + { + // Coming into this method, the sync for the migration will be met, so nothing can increment the ref count. + // However, an existing traversal of the spans for data flush could still be in progress. + // Spin if this is ever the case, so they don't get disposed before the operation is complete. + } + + Destination.RemoveMigration(this); + + foreach (BufferMigrationSpan span in Spans) + { + span.Dispose(); + } + } + } + + /// + /// A record of when buffer data was copied from one buffer to another, for a specific range in a source buffer. + /// Keeps the source buffer alive for data flushes until the migration is complete. + /// + internal readonly struct BufferMigrationSpan : IDisposable + { + /// + /// The offset for the migrated region. + /// + public readonly ulong Address; + + /// + /// The size for the migrated region. + /// + public readonly ulong Size; + + /// + /// The action to perform when the migration isn't needed anymore. + /// + private readonly Action _disposeAction; + + /// + /// The source range action, to be called on overlap with an unreached sync number. + /// + private readonly BufferFlushAction _sourceRangeAction; + + /// + /// Optional migration for the source data. Can chain together if many migrations happen in a short time. + /// If this is null, then _sourceRangeAction will always provide up to date data. + /// + private readonly BufferMigration _source; + /// /// Creates a record for a buffer migration. /// /// The source buffer for this migration + /// The action to perform when the migration isn't needed anymore /// The flush action for the source buffer - /// The modified range list for the source buffer - /// The modified range list for the destination buffer - /// The sync number for when the migration is complete - public BufferMigration( + /// Pending migration for the source buffer + public BufferMigrationSpan( Buffer buffer, - Action sourceRangeAction, - BufferModifiedRangeList source, - BufferModifiedRangeList dest, - ulong syncNumber) + Action disposeAction, + BufferFlushAction sourceRangeAction, + BufferMigration source) { - _offset = buffer.Address; - _size = buffer.Size; - _buffer = buffer; + Address = buffer.Address; + Size = buffer.Size; + _disposeAction = disposeAction; _sourceRangeAction = sourceRangeAction; _source = source; - Destination = dest; - SyncNumber = syncNumber; } + /// + /// Creates a record for a buffer migration, using the default buffer dispose action. + /// + /// The source buffer for this migration + /// The flush action for the source buffer + /// Pending migration for the source buffer + public BufferMigrationSpan( + Buffer buffer, + BufferFlushAction sourceRangeAction, + BufferMigration source) : this(buffer, buffer.DecrementReferenceCount, sourceRangeAction, source) { } + /// /// Determine if the given range overlaps this migration, and has not been completed yet. /// /// Start offset /// Range size - /// The sync number that was waited on /// True if overlapping and in progress, false otherwise - public bool Overlaps(ulong offset, ulong size, ulong syncNumber) + public bool Overlaps(ulong offset, ulong size) { ulong end = offset + size; - ulong destEnd = _offset + _size; - long syncDiff = (long)(syncNumber - SyncNumber); // syncNumber is less if the copy has not completed. + ulong destEnd = Address + Size; - return !(end <= _offset || offset >= destEnd) && syncDiff < 0; - } - - /// - /// Determine if the given range matches this migration. - /// - /// Start offset - /// Range size - /// True if the range exactly matches, false otherwise - public bool FullyMatches(ulong offset, ulong size) - { - return _offset == offset && _size == size; + return !(end <= Address || offset >= destEnd); } /// @@ -100,26 +222,30 @@ namespace Ryujinx.Graphics.Gpu.Memory /// Start offset /// Range size /// Current sync number - /// The modified range list that originally owned this range - public void RangeActionWithMigration(ulong offset, ulong size, ulong syncNumber, BufferModifiedRangeList parent) + public void RangeActionWithMigration(ulong offset, ulong size, ulong syncNumber) { ulong end = offset + size; - end = Math.Min(_offset + _size, end); - offset = Math.Max(_offset, offset); + end = Math.Min(Address + Size, end); + offset = Math.Max(Address, offset); size = end - offset; - _source.RangeActionWithMigration(offset, size, syncNumber, parent, _sourceRangeAction); + if (_source != null) + { + _source.RangeActionWithMigration(offset, size, syncNumber, _sourceRangeAction); + } + else + { + _sourceRangeAction(offset, size, syncNumber); + } } /// - /// Removes this reference to the range list, potentially allowing for the source buffer to be disposed. + /// Removes this migration span, potentially allowing for the source buffer to be disposed. /// public void Dispose() { - Destination.RemoveMigration(this); - - _buffer.DecrementReferenceCount(); + _disposeAction(); } } } diff --git a/src/Ryujinx.Graphics.Gpu/Memory/BufferModifiedRangeList.cs b/src/Ryujinx.Graphics.Gpu/Memory/BufferModifiedRangeList.cs index 6ada8a4b..d330de63 100644 --- a/src/Ryujinx.Graphics.Gpu/Memory/BufferModifiedRangeList.cs +++ b/src/Ryujinx.Graphics.Gpu/Memory/BufferModifiedRangeList.cs @@ -1,7 +1,6 @@ using Ryujinx.Common.Pools; using Ryujinx.Memory.Range; using System; -using System.Collections.Generic; using System.Linq; namespace Ryujinx.Graphics.Gpu.Memory @@ -72,10 +71,10 @@ namespace Ryujinx.Graphics.Gpu.Memory private readonly GpuContext _context; private readonly Buffer _parent; - private readonly Action _flushAction; + private readonly BufferFlushAction _flushAction; - private List _sources; - private BufferMigration _migrationTarget; + private BufferMigration _source; + private BufferModifiedRangeList _migrationTarget; private readonly object _lock = new(); @@ -99,7 +98,7 @@ namespace Ryujinx.Graphics.Gpu.Memory /// GPU context that the buffer range list belongs to /// The parent buffer that owns this range list /// The flush action for the parent buffer - public BufferModifiedRangeList(GpuContext context, Buffer parent, Action flushAction) : base(BackingInitialSize) + public BufferModifiedRangeList(GpuContext context, Buffer parent, BufferFlushAction flushAction) : base(BackingInitialSize) { _context = context; _parent = parent; @@ -199,6 +198,36 @@ namespace Ryujinx.Graphics.Gpu.Memory } } + /// + /// Gets modified ranges within the specified region, and then fires the given action for each range individually. + /// + /// Start address to query + /// Size to query + /// Sync number required for a range to be signalled + /// The action to call for each modified range + public void GetRangesAtSync(ulong address, ulong size, ulong syncNumber, Action rangeAction) + { + int count = 0; + + ref var overlaps = ref ThreadStaticArray.Get(); + + // Range list must be consistent for this operation. + lock (_lock) + { + count = FindOverlapsNonOverlapping(address, size, ref overlaps); + } + + for (int i = 0; i < count; i++) + { + BufferModifiedRange overlap = overlaps[i]; + + if (overlap.SyncNumber == syncNumber) + { + rangeAction(overlap.Address, overlap.Size); + } + } + } + /// /// Gets modified ranges within the specified region, and then fires the given action for each range individually. /// @@ -245,41 +274,16 @@ namespace Ryujinx.Graphics.Gpu.Memory /// The offset to pass to the action /// The size to pass to the action /// The sync number that has been reached - /// The modified range list that originally owned this range /// The action to perform - public void RangeActionWithMigration(ulong offset, ulong size, ulong syncNumber, BufferModifiedRangeList parent, Action rangeAction) + public void RangeActionWithMigration(ulong offset, ulong size, ulong syncNumber, BufferFlushAction rangeAction) { - bool firstSource = true; - - if (parent != this) + if (_source != null) { - lock (_lock) - { - if (_sources != null) - { - foreach (BufferMigration source in _sources) - { - if (source.Overlaps(offset, size, syncNumber)) - { - if (firstSource && !source.FullyMatches(offset, size)) - { - // Perform this buffer's action first. The migrations will run after. - rangeAction(offset, size); - } - - source.RangeActionWithMigration(offset, size, syncNumber, parent); - - firstSource = false; - } - } - } - } + _source.RangeActionWithMigration(offset, size, syncNumber, rangeAction); } - - if (firstSource) + else { - // No overlapping migrations, or they are not meant for this range, flush the data using the given action. - rangeAction(offset, size); + rangeAction(offset, size, syncNumber); } } @@ -319,7 +323,7 @@ namespace Ryujinx.Graphics.Gpu.Memory ClearPart(overlap, clampAddress, clampEnd); - RangeActionWithMigration(clampAddress, clampEnd - clampAddress, waitSync, overlap.Parent, _flushAction); + RangeActionWithMigration(clampAddress, clampEnd - clampAddress, waitSync, _flushAction); } } @@ -329,7 +333,7 @@ namespace Ryujinx.Graphics.Gpu.Memory // There is a migration target to call instead. This can't be changed after set so accessing it outside the lock is fine. - _migrationTarget.Destination.RemoveRangesAndFlush(overlaps, rangeCount, highestDiff, currentSync, address, endAddress); + _migrationTarget.RemoveRangesAndFlush(overlaps, rangeCount, highestDiff, currentSync, address, endAddress); } /// @@ -367,7 +371,7 @@ namespace Ryujinx.Graphics.Gpu.Memory if (rangeCount == -1) { - _migrationTarget.Destination.WaitForAndFlushRanges(address, size); + _migrationTarget.WaitForAndFlushRanges(address, size); return; } @@ -407,6 +411,9 @@ namespace Ryujinx.Graphics.Gpu.Memory /// /// Inherit ranges from another modified range list. /// + /// + /// Assumes that ranges will be inherited in address ascending order. + /// /// The range list to inherit from /// The action to call for each modified range public void InheritRanges(BufferModifiedRangeList ranges, Action registerRangeAction) @@ -415,18 +422,31 @@ namespace Ryujinx.Graphics.Gpu.Memory lock (ranges._lock) { - BufferMigration migration = new(ranges._parent, ranges._flushAction, ranges, this, _context.SyncNumber); - - ranges._parent.IncrementReferenceCount(); - ranges._migrationTarget = migration; - - _context.RegisterBufferMigration(migration); - inheritRanges = ranges.ToArray(); lock (_lock) { - (_sources ??= new List()).Add(migration); + // Copy over the migration from the previous range list + + BufferMigration oldMigration = ranges._source; + + BufferMigrationSpan span = new BufferMigrationSpan(ranges._parent, ranges._flushAction, oldMigration); + ranges._parent.IncrementReferenceCount(); + + if (_source == null) + { + // Create a new migration. + _source = new BufferMigration(new BufferMigrationSpan[] { span }, this, _context.SyncNumber); + + _context.RegisterBufferMigration(_source); + } + else + { + // Extend the migration + _source.AddSpanToEnd(span); + } + + ranges._migrationTarget = this; foreach (BufferModifiedRange range in inheritRanges) { @@ -445,6 +465,27 @@ namespace Ryujinx.Graphics.Gpu.Memory } } + /// + /// Register a migration from previous buffer storage. This migration is from a snapshot of the buffer's + /// current handle to its handle in the future, and is assumed to be complete when the sync action completes. + /// When the migration completes, the handle is disposed. + /// + public void SelfMigration() + { + lock (_lock) + { + BufferMigrationSpan span = new(_parent, _parent.GetSnapshotDisposeAction(), _parent.GetSnapshotFlushAction(), _source); + BufferMigration migration = new(new BufferMigrationSpan[] { span }, this, _context.SyncNumber); + + // Migration target is used to redirect flush actions to the latest range list, + // so we don't need to set it here. (this range list is still the latest) + + _context.RegisterBufferMigration(migration); + + _source = migration; + } + } + /// /// Removes a source buffer migration, indicating its copy has completed. /// @@ -453,7 +494,10 @@ namespace Ryujinx.Graphics.Gpu.Memory { lock (_lock) { - _sources.Remove(migration); + if (_source == migration) + { + _source = null; + } } } diff --git a/src/Ryujinx.Graphics.Gpu/Memory/BufferPreFlush.cs b/src/Ryujinx.Graphics.Gpu/Memory/BufferPreFlush.cs new file mode 100644 index 00000000..d58b9ea6 --- /dev/null +++ b/src/Ryujinx.Graphics.Gpu/Memory/BufferPreFlush.cs @@ -0,0 +1,295 @@ +using Ryujinx.Common; +using Ryujinx.Graphics.GAL; +using System; + +namespace Ryujinx.Graphics.Gpu.Memory +{ + /// + /// Manages flushing ranges from buffers in advance for easy access, if they are flushed often. + /// Typically, from device local memory to a host mapped target for cached access. + /// + internal class BufferPreFlush : IDisposable + { + private const ulong PageSize = MemoryManager.PageSize; + + /// + /// Threshold for the number of copies without a flush required to disable preflush on a page. + /// + private const int DeactivateCopyThreshold = 200; + + /// + /// Value that indicates whether a page has been flushed or copied before. + /// + private enum PreFlushState + { + None, + HasFlushed, + HasCopied + } + + /// + /// Flush state for each page of the buffer. + /// Controls whether data should be copied to the flush buffer, what sync is expected + /// and unflushed copy counting for stopping copies that are no longer needed. + /// + private struct PreFlushPage + { + public PreFlushState State; + public ulong FirstActivatedSync; + public ulong LastCopiedSync; + public int CopyCount; + } + + /// + /// True if there are ranges that should copy to the flush buffer, false otherwise. + /// + public bool ShouldCopy { get; private set; } + + private readonly GpuContext _context; + private readonly Buffer _buffer; + private readonly PreFlushPage[] _pages; + private readonly ulong _address; + private readonly ulong _size; + private readonly ulong _misalignment; + private readonly Action _flushAction; + + private BufferHandle _flushBuffer; + + public BufferPreFlush(GpuContext context, Buffer parent, Action flushAction) + { + _context = context; + _buffer = parent; + _address = parent.Address; + _size = parent.Size; + _pages = new PreFlushPage[BitUtils.DivRoundUp(_size, PageSize)]; + _misalignment = _address & (PageSize - 1); + + _flushAction = flushAction; + } + + /// + /// Ensure that the flush buffer exists. + /// + private void EnsureFlushBuffer() + { + if (_flushBuffer == BufferHandle.Null) + { + _flushBuffer = _context.Renderer.CreateBuffer((int)_size, BufferAccess.HostMemory); + } + } + + /// + /// Gets a page range from an address and size byte range. + /// + /// Range address + /// Range size + /// A page index and count + private (int index, int count) GetPageRange(ulong address, ulong size) + { + ulong offset = address - _address; + ulong endOffset = offset + size; + + int basePage = (int)(offset / PageSize); + int endPage = (int)((endOffset - 1) / PageSize); + + return (basePage, 1 + endPage - basePage); + } + + /// + /// Gets an offset and size range in the parent buffer from a page index and count. + /// + /// Range start page + /// Range page count + /// Offset and size range + private (int offset, int size) GetOffset(int startPage, int count) + { + int offset = (int)((ulong)startPage * PageSize - _misalignment); + int endOffset = (int)((ulong)(startPage + count) * PageSize - _misalignment); + + offset = Math.Max(0, offset); + endOffset = Math.Min((int)_size, endOffset); + + return (offset, endOffset - offset); + } + + /// + /// Copy a range of pages from the parent buffer into the flush buffer. + /// + /// Range start page + /// Range page count + private void CopyPageRange(int startPage, int count) + { + (int offset, int size) = GetOffset(startPage, count); + + EnsureFlushBuffer(); + + _context.Renderer.Pipeline.CopyBuffer(_buffer.Handle, _flushBuffer, offset, offset, size); + } + + /// + /// Copy a modified range into the flush buffer if it's marked as flushed. + /// Any pages the range overlaps are copied, and copies aren't repeated in the same sync number. + /// + /// Range address + /// Range size + public void CopyModified(ulong address, ulong size) + { + (int baseIndex, int count) = GetPageRange(address, size); + ulong syncNumber = _context.SyncNumber; + + int startPage = -1; + + for (int i = 0; i < count; i++) + { + int pageIndex = baseIndex + i; + ref PreFlushPage page = ref _pages[pageIndex]; + + if (page.State > PreFlushState.None) + { + // Perform the copy, and update the state of each page. + if (startPage == -1) + { + startPage = pageIndex; + } + + if (page.State != PreFlushState.HasCopied) + { + page.FirstActivatedSync = syncNumber; + page.State = PreFlushState.HasCopied; + } + else if (page.CopyCount++ >= DeactivateCopyThreshold) + { + page.CopyCount = 0; + page.State = PreFlushState.None; + } + + if (page.LastCopiedSync != syncNumber) + { + page.LastCopiedSync = syncNumber; + } + } + else if (startPage != -1) + { + CopyPageRange(startPage, pageIndex - startPage); + + startPage = -1; + } + } + + if (startPage != -1) + { + CopyPageRange(startPage, (baseIndex + count) - startPage); + } + } + + /// + /// Flush the given page range back into guest memory, optionally using data from the flush buffer. + /// The actual flushed range is an intersection of the page range and the address range. + /// + /// Address range start + /// Address range size + /// Page range start + /// Page range count + /// True if the data should come from the flush buffer + private void FlushPageRange(ulong address, ulong size, int startPage, int count, bool preFlush) + { + (int pageOffset, int pageSize) = GetOffset(startPage, count); + + int offset = (int)(address - _address); + int end = offset + (int)size; + + offset = Math.Max(offset, pageOffset); + end = Math.Min(end, pageOffset + pageSize); + + if (end >= offset) + { + BufferHandle handle = preFlush ? _flushBuffer : _buffer.Handle; + _flushAction(handle, _address + (ulong)offset, (ulong)(end - offset)); + } + } + + /// + /// Flush the given address range back into guest memory, optionally using data from the flush buffer. + /// When a copy has been performed on or before the waited sync number, the data can come from the flush buffer. + /// Otherwise, it flushes the parent buffer directly. + /// + /// Range address + /// Range size + /// Sync number that has been waited for + public void FlushWithAction(ulong address, ulong size, ulong syncNumber) + { + // Copy the parts of the range that have pre-flush copies that have been completed. + // Run the flush action for ranges that don't have pre-flush copies. + + // If a range doesn't have a pre-flush copy, consider adding one. + + (int baseIndex, int count) = GetPageRange(address, size); + + bool rangePreFlushed = false; + int startPage = -1; + + for (int i = 0; i < count; i++) + { + int pageIndex = baseIndex + i; + ref PreFlushPage page = ref _pages[pageIndex]; + + bool flushPage = false; + page.CopyCount = 0; + + if (page.State == PreFlushState.HasCopied) + { + if (syncNumber >= page.FirstActivatedSync) + { + // After the range is first activated, its data will always be copied to the preflush buffer on each sync. + flushPage = true; + } + } + else if (page.State == PreFlushState.None) + { + page.State = PreFlushState.HasFlushed; + ShouldCopy = true; + } + + if (flushPage) + { + if (!rangePreFlushed || startPage == -1) + { + if (startPage != -1) + { + FlushPageRange(address, size, startPage, pageIndex - startPage, false); + } + + rangePreFlushed = true; + startPage = pageIndex; + } + } + else if (rangePreFlushed || startPage == -1) + { + if (startPage != -1) + { + FlushPageRange(address, size, startPage, pageIndex - startPage, true); + } + + rangePreFlushed = false; + startPage = pageIndex; + } + } + + if (startPage != -1) + { + FlushPageRange(address, size, startPage, (baseIndex + count) - startPage, rangePreFlushed); + } + } + + /// + /// Dispose the flush buffer, if present. + /// + public void Dispose() + { + if (_flushBuffer != BufferHandle.Null) + { + _context.Renderer.DeleteBuffer(_flushBuffer); + } + } + } +} diff --git a/src/Ryujinx.Graphics.Gpu/Memory/BufferStage.cs b/src/Ryujinx.Graphics.Gpu/Memory/BufferStage.cs new file mode 100644 index 00000000..d56abda2 --- /dev/null +++ b/src/Ryujinx.Graphics.Gpu/Memory/BufferStage.cs @@ -0,0 +1,99 @@ +using Ryujinx.Graphics.Shader; +using System.Runtime.CompilerServices; + +namespace Ryujinx.Graphics.Gpu.Memory +{ + /// + /// Pipeline stages that can modify buffer data, as well as flags indicating storage usage. + /// Must match ShaderStage for the shader stages, though anything after that can be in any order. + /// + internal enum BufferStage : byte + { + Compute, + Vertex, + TessellationControl, + TessellationEvaluation, + Geometry, + Fragment, + + Indirect, + VertexBuffer, + IndexBuffer, + Copy, + TransformFeedback, + Internal, + None, + + StageMask = 0x3f, + StorageMask = 0xc0, + + StorageRead = 0x40, + StorageWrite = 0x80, + +#pragma warning disable CA1069 // Enums values should not be duplicated + StorageAtomic = 0xc0 +#pragma warning restore CA1069 // Enums values should not be duplicated + } + + /// + /// Utility methods to convert shader stages and binding flags into buffer stages. + /// + internal static class BufferStageUtils + { + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static BufferStage FromShaderStage(ShaderStage stage) + { + return (BufferStage)stage; + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static BufferStage FromShaderStage(int stageIndex) + { + return (BufferStage)(stageIndex + 1); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static BufferStage FromUsage(BufferUsageFlags flags) + { + if (flags.HasFlag(BufferUsageFlags.Write)) + { + return BufferStage.StorageWrite; + } + else + { + return BufferStage.StorageRead; + } + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static BufferStage FromUsage(TextureUsageFlags flags) + { + if (flags.HasFlag(TextureUsageFlags.ImageStore)) + { + return BufferStage.StorageWrite; + } + else + { + return BufferStage.StorageRead; + } + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static BufferStage TextureBuffer(ShaderStage shaderStage, TextureUsageFlags flags) + { + return FromShaderStage(shaderStage) | FromUsage(flags); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static BufferStage GraphicsStorage(int stageIndex, BufferUsageFlags flags) + { + return FromShaderStage(stageIndex) | FromUsage(flags); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static BufferStage ComputeStorage(BufferUsageFlags flags) + { + return BufferStage.Compute | FromUsage(flags); + } + } +} diff --git a/src/Ryujinx.Graphics.OpenGL/OpenGLRenderer.cs b/src/Ryujinx.Graphics.OpenGL/OpenGLRenderer.cs index 2a39ae44..7bcff947 100644 --- a/src/Ryujinx.Graphics.OpenGL/OpenGLRenderer.cs +++ b/src/Ryujinx.Graphics.OpenGL/OpenGLRenderer.cs @@ -61,7 +61,9 @@ namespace Ryujinx.Graphics.OpenGL { BufferCount++; - if (access.HasFlag(GAL.BufferAccess.FlushPersistent)) + var memType = access & GAL.BufferAccess.MemoryTypeMask; + + if (memType == GAL.BufferAccess.HostMemory) { BufferHandle handle = Buffer.CreatePersistent(size); @@ -75,11 +77,6 @@ namespace Ryujinx.Graphics.OpenGL } } - public BufferHandle CreateBuffer(int size, GAL.BufferAccess access, BufferHandle storageHint) - { - return CreateBuffer(size, access); - } - public BufferHandle CreateBuffer(nint pointer, int size) { throw new NotSupportedException(); @@ -148,6 +145,7 @@ namespace Ryujinx.Graphics.OpenGL return new Capabilities( api: TargetApi.OpenGL, vendorName: GpuVendor, + memoryType: SystemMemoryType.BackendManaged, hasFrontFacingBug: intelWindows, hasVectorIndexingBug: amdWindows, needsFragmentOutputSpecialization: false, diff --git a/src/Ryujinx.Graphics.Vulkan/BufferHolder.cs b/src/Ryujinx.Graphics.Vulkan/BufferHolder.cs index 3673ee5a..3dcbc313 100644 --- a/src/Ryujinx.Graphics.Vulkan/BufferHolder.cs +++ b/src/Ryujinx.Graphics.Vulkan/BufferHolder.cs @@ -1,4 +1,3 @@ -using Ryujinx.Common.Logging; using Ryujinx.Graphics.GAL; using Silk.NET.Vulkan; using System; @@ -31,40 +30,29 @@ namespace Ryujinx.Graphics.Vulkan private readonly VulkanRenderer _gd; private readonly Device _device; - private MemoryAllocation _allocation; - private Auto _buffer; - private Auto _allocationAuto; + private readonly MemoryAllocation _allocation; + private readonly Auto _buffer; + private readonly Auto _allocationAuto; private readonly bool _allocationImported; - private ulong _bufferHandle; + private readonly ulong _bufferHandle; private CacheByRange _cachedConvertedBuffers; public int Size { get; } - private IntPtr _map; + private readonly IntPtr _map; - private MultiFenceHolder _waitable; + private readonly MultiFenceHolder _waitable; private bool _lastAccessIsWrite; - private BufferAllocationType _baseType; - private BufferAllocationType _currentType; - private bool _swapQueued; - - public BufferAllocationType DesiredType { get; private set; } - - private int _setCount; - private int _writeCount; - private int _flushCount; - private int _flushTemp; - private int _lastFlushWrite = -1; + private readonly BufferAllocationType _baseType; + private readonly BufferAllocationType _activeType; private readonly ReaderWriterLockSlim _flushLock; private FenceHolder _flushFence; private int _flushWaiting; - private List _swapActions; - private byte[] _pendingData; private BufferMirrorRangeList _pendingDataRanges; private Dictionary _mirrors; @@ -83,8 +71,7 @@ namespace Ryujinx.Graphics.Vulkan _map = allocation.HostPointer; _baseType = type; - _currentType = currentType; - DesiredType = currentType; + _activeType = currentType; _flushLock = new ReaderWriterLockSlim(); _useMirrors = gd.IsTBDR; @@ -104,8 +91,7 @@ namespace Ryujinx.Graphics.Vulkan _map = _allocation.HostPointer + offset; _baseType = type; - _currentType = currentType; - DesiredType = currentType; + _activeType = currentType; _flushLock = new ReaderWriterLockSlim(); } @@ -120,164 +106,11 @@ namespace Ryujinx.Graphics.Vulkan Size = size; _baseType = BufferAllocationType.Sparse; - _currentType = BufferAllocationType.Sparse; - DesiredType = BufferAllocationType.Sparse; + _activeType = BufferAllocationType.Sparse; _flushLock = new ReaderWriterLockSlim(); } - public bool TryBackingSwap(ref CommandBufferScoped? cbs) - { - if (_swapQueued && DesiredType != _currentType) - { - // Only swap if the buffer is not used in any queued command buffer. - bool isRented = _buffer.HasRentedCommandBufferDependency(_gd.CommandBufferPool); - - if (!isRented && _gd.CommandBufferPool.OwnedByCurrentThread && !_flushLock.IsReadLockHeld && (_pendingData == null || cbs != null)) - { - var currentAllocation = _allocationAuto; - var currentBuffer = _buffer; - IntPtr currentMap = _map; - - (VkBuffer buffer, MemoryAllocation allocation, BufferAllocationType resultType) = _gd.BufferManager.CreateBacking(_gd, Size, DesiredType, false, false, _currentType); - - if (buffer.Handle != 0) - { - if (cbs != null) - { - ClearMirrors(cbs.Value, 0, Size); - } - - _flushLock.EnterWriteLock(); - - ClearFlushFence(); - - _waitable = new MultiFenceHolder(Size); - - _allocation = allocation; - _allocationAuto = new Auto(allocation); - _buffer = new Auto(new DisposableBuffer(_gd.Api, _device, buffer), this, _waitable, _allocationAuto); - _bufferHandle = buffer.Handle; - _map = allocation.HostPointer; - - if (_map != IntPtr.Zero && currentMap != IntPtr.Zero) - { - // Copy data directly. Readbacks don't have to wait if this is done. - - unsafe - { - new Span((void*)currentMap, Size).CopyTo(new Span((void*)_map, Size)); - } - } - else - { - cbs ??= _gd.CommandBufferPool.Rent(); - - CommandBufferScoped cbsV = cbs.Value; - - Copy(_gd, cbsV, currentBuffer, _buffer, 0, 0, Size); - - // Need to wait for the data to reach the new buffer before data can be flushed. - - _flushFence = _gd.CommandBufferPool.GetFence(cbsV.CommandBufferIndex); - _flushFence.Get(); - } - - Logger.Debug?.PrintMsg(LogClass.Gpu, $"Converted {Size} buffer {_currentType} to {resultType}"); - - _currentType = resultType; - - if (_swapActions != null) - { - foreach (var action in _swapActions) - { - action(); - } - - _swapActions.Clear(); - } - - currentBuffer.Dispose(); - currentAllocation.Dispose(); - - _gd.PipelineInternal.SwapBuffer(currentBuffer, _buffer); - - _flushLock.ExitWriteLock(); - } - - _swapQueued = false; - - return true; - } - - return false; - } - - _swapQueued = false; - - return true; - } - - private void ConsiderBackingSwap() - { - if (_baseType == BufferAllocationType.Auto) - { - // When flushed, wait for a bit more info to make a decision. - bool wasFlushed = _flushTemp > 0; - int multiplier = wasFlushed ? 2 : 0; - if (_writeCount >= (WriteCountThreshold << multiplier) || _setCount >= (SetCountThreshold << multiplier) || _flushCount >= (FlushCountThreshold << multiplier)) - { - if (_flushCount > 0 || _flushTemp-- > 0) - { - // Buffers that flush should ideally be mapped in host address space for easy copies. - // If the buffer is large it will do better on GPU memory, as there will be more writes than data flushes (typically individual pages). - // If it is small, then it's likely most of the buffer will be flushed so we want it on host memory, as access is cached. - - bool hostMappingSensitive = _gd.Vendor == Vendor.Nvidia; - bool deviceLocalMapped = Size > DeviceLocalSizeThreshold || (wasFlushed && _writeCount > _flushCount * 10 && hostMappingSensitive) || _currentType == BufferAllocationType.DeviceLocalMapped; - - DesiredType = deviceLocalMapped ? BufferAllocationType.DeviceLocalMapped : BufferAllocationType.HostMapped; - - // It's harder for a buffer that is flushed to revert to another type of mapping. - if (_flushCount > 0) - { - _flushTemp = 1000; - } - } - else if (_writeCount >= (WriteCountThreshold << multiplier)) - { - // Buffers that are written often should ideally be in the device local heap. (Storage buffers) - DesiredType = BufferAllocationType.DeviceLocal; - } - else if (_setCount > (SetCountThreshold << multiplier)) - { - // Buffers that have their data set often should ideally be host mapped. (Constant buffers) - DesiredType = BufferAllocationType.HostMapped; - } - - _lastFlushWrite = -1; - _flushCount = 0; - _writeCount = 0; - _setCount = 0; - } - - if (!_swapQueued && DesiredType != _currentType) - { - _swapQueued = true; - - _gd.PipelineInternal.AddBackingSwap(this); - } - } - } - - public void Pin() - { - if (_baseType == BufferAllocationType.Auto) - { - _baseType = _currentType; - } - } - public unsafe Auto CreateView(VkFormat format, int offset, int size, Action invalidateView) { var bufferViewCreateInfo = new BufferViewCreateInfo @@ -291,19 +124,9 @@ namespace Ryujinx.Graphics.Vulkan _gd.Api.CreateBufferView(_device, bufferViewCreateInfo, null, out var bufferView).ThrowOnError(); - (_swapActions ??= new List()).Add(invalidateView); - return new Auto(new DisposableBufferView(_gd.Api, _device, bufferView), this, _waitable, _buffer); } - public void InheritMetrics(BufferHolder other) - { - _setCount = other._setCount; - _writeCount = other._writeCount; - _flushCount = other._flushCount; - _flushTemp = other._flushTemp; - } - public unsafe void InsertBarrier(CommandBuffer commandBuffer, bool isWrite) { // If the last access is write, we always need a barrier to be sure we will read or modify @@ -423,18 +246,8 @@ namespace Ryujinx.Graphics.Vulkan { if (isWrite) { - _writeCount++; - SignalWrite(0, Size); } - else if (isSSBO) - { - // Always consider SSBO access for swapping to device local memory. - - _writeCount++; - - ConsiderBackingSwap(); - } return _buffer; } @@ -443,8 +256,6 @@ namespace Ryujinx.Graphics.Vulkan { if (isWrite) { - _writeCount++; - SignalWrite(offset, size); } @@ -543,8 +354,6 @@ namespace Ryujinx.Graphics.Vulkan public void SignalWrite(int offset, int size) { - ConsiderBackingSwap(); - if (offset == 0 && size == Size) { _cachedConvertedBuffers.Clear(); @@ -624,13 +433,6 @@ namespace Ryujinx.Graphics.Vulkan WaitForFlushFence(); - if (_lastFlushWrite != _writeCount) - { - // If it's on the same page as the last flush, ignore it. - _lastFlushWrite = _writeCount; - _flushCount++; - } - Span result; if (_map != IntPtr.Zero) @@ -711,8 +513,7 @@ namespace Ryujinx.Graphics.Vulkan return; } - _setCount++; - bool allowMirror = _useMirrors && allowCbsWait && cbs != null && _currentType <= BufferAllocationType.HostMapped; + bool allowMirror = _useMirrors && allowCbsWait && cbs != null && _activeType <= BufferAllocationType.HostMapped; if (_map != IntPtr.Zero) { @@ -863,8 +664,6 @@ namespace Ryujinx.Graphics.Vulkan var dstBuffer = GetBuffer(cbs.CommandBuffer, dstOffset, data.Length, true).Get(cbs, dstOffset, data.Length, true).Value; - _writeCount--; - InsertBufferBarrier( _gd, cbs.CommandBuffer, @@ -1100,8 +899,6 @@ namespace Ryujinx.Graphics.Vulkan public void Dispose() { - _swapQueued = false; - _gd.PipelineInternal?.FlushCommandsIfWeightExceeding(_buffer, (ulong)Size); _buffer.Dispose(); diff --git a/src/Ryujinx.Graphics.Vulkan/BufferManager.cs b/src/Ryujinx.Graphics.Vulkan/BufferManager.cs index 33289a0e..e73cde83 100644 --- a/src/Ryujinx.Graphics.Vulkan/BufferManager.cs +++ b/src/Ryujinx.Graphics.Vulkan/BufferManager.cs @@ -165,10 +165,6 @@ namespace Ryujinx.Graphics.Vulkan if (TryGetBuffer(range.Handle, out var existingHolder)) { - // Since this buffer now also owns the memory from the referenced buffer, - // we pin it to ensure the memory location will not change. - existingHolder.Pin(); - (var memory, var offset) = existingHolder.GetDeviceMemoryAndOffset(); memoryBinds[index] = new SparseMemoryBind() @@ -235,10 +231,9 @@ namespace Ryujinx.Graphics.Vulkan int size, bool sparseCompatible = false, BufferAllocationType baseType = BufferAllocationType.HostMapped, - BufferHandle storageHint = default, bool forceMirrors = false) { - return CreateWithHandle(gd, size, out _, sparseCompatible, baseType, storageHint, forceMirrors); + return CreateWithHandle(gd, size, out _, sparseCompatible, baseType, forceMirrors); } public BufferHandle CreateWithHandle( @@ -247,10 +242,9 @@ namespace Ryujinx.Graphics.Vulkan out BufferHolder holder, bool sparseCompatible = false, BufferAllocationType baseType = BufferAllocationType.HostMapped, - BufferHandle storageHint = default, bool forceMirrors = false) { - holder = Create(gd, size, forConditionalRendering: false, sparseCompatible, baseType, storageHint); + holder = Create(gd, size, forConditionalRendering: false, sparseCompatible, baseType); if (holder == null) { return BufferHandle.Null; @@ -387,31 +381,13 @@ namespace Ryujinx.Graphics.Vulkan int size, bool forConditionalRendering = false, bool sparseCompatible = false, - BufferAllocationType baseType = BufferAllocationType.HostMapped, - BufferHandle storageHint = default) + BufferAllocationType baseType = BufferAllocationType.HostMapped) { BufferAllocationType type = baseType; - BufferHolder storageHintHolder = null; if (baseType == BufferAllocationType.Auto) { - if (gd.IsSharedMemory) - { - baseType = BufferAllocationType.HostMapped; - type = baseType; - } - else - { - type = size >= BufferHolder.DeviceLocalSizeThreshold ? BufferAllocationType.DeviceLocal : BufferAllocationType.HostMapped; - } - - if (storageHint != BufferHandle.Null) - { - if (TryGetBuffer(storageHint, out storageHintHolder)) - { - type = storageHintHolder.DesiredType; - } - } + type = BufferAllocationType.HostMapped; } (VkBuffer buffer, MemoryAllocation allocation, BufferAllocationType resultType) = @@ -421,11 +397,6 @@ namespace Ryujinx.Graphics.Vulkan { var holder = new BufferHolder(gd, _device, buffer, allocation, size, baseType, resultType); - if (storageHintHolder != null) - { - holder.InheritMetrics(storageHintHolder); - } - return holder; } diff --git a/src/Ryujinx.Graphics.Vulkan/EnumConversion.cs b/src/Ryujinx.Graphics.Vulkan/EnumConversion.cs index f9243bf8..9d1fd9ff 100644 --- a/src/Ryujinx.Graphics.Vulkan/EnumConversion.cs +++ b/src/Ryujinx.Graphics.Vulkan/EnumConversion.cs @@ -424,10 +424,20 @@ namespace Ryujinx.Graphics.Vulkan public static BufferAllocationType Convert(this BufferAccess access) { - if (access.HasFlag(BufferAccess.FlushPersistent) || access.HasFlag(BufferAccess.Stream)) + BufferAccess memType = access & BufferAccess.MemoryTypeMask; + + if (memType == BufferAccess.HostMemory || access.HasFlag(BufferAccess.Stream)) { return BufferAllocationType.HostMapped; } + else if (memType == BufferAccess.DeviceMemory) + { + return BufferAllocationType.DeviceLocal; + } + else if (memType == BufferAccess.DeviceMemoryMapped) + { + return BufferAllocationType.DeviceLocalMapped; + } return BufferAllocationType.Auto; } diff --git a/src/Ryujinx.Graphics.Vulkan/PipelineFull.cs b/src/Ryujinx.Graphics.Vulkan/PipelineFull.cs index 4987548c..357d517e 100644 --- a/src/Ryujinx.Graphics.Vulkan/PipelineFull.cs +++ b/src/Ryujinx.Graphics.Vulkan/PipelineFull.cs @@ -222,20 +222,6 @@ namespace Ryujinx.Graphics.Vulkan } } - private void TryBackingSwaps() - { - CommandBufferScoped? cbs = null; - - _backingSwaps.RemoveAll(holder => holder.TryBackingSwap(ref cbs)); - - cbs?.Dispose(); - } - - public void AddBackingSwap(BufferHolder holder) - { - _backingSwaps.Add(holder); - } - public void Restore() { if (Pipeline != null) @@ -291,8 +277,6 @@ namespace Ryujinx.Graphics.Vulkan Gd.ResetCounterPool(); - TryBackingSwaps(); - Restore(); } diff --git a/src/Ryujinx.Graphics.Vulkan/VulkanRenderer.cs b/src/Ryujinx.Graphics.Vulkan/VulkanRenderer.cs index 8ef05de3..175d5e3e 100644 --- a/src/Ryujinx.Graphics.Vulkan/VulkanRenderer.cs +++ b/src/Ryujinx.Graphics.Vulkan/VulkanRenderer.cs @@ -486,12 +486,7 @@ namespace Ryujinx.Graphics.Vulkan public BufferHandle CreateBuffer(int size, BufferAccess access) { - return BufferManager.CreateWithHandle(this, size, access.HasFlag(BufferAccess.SparseCompatible), access.Convert(), default, access == BufferAccess.Stream); - } - - public BufferHandle CreateBuffer(int size, BufferAccess access, BufferHandle storageHint) - { - return BufferManager.CreateWithHandle(this, size, access.HasFlag(BufferAccess.SparseCompatible), access.Convert(), storageHint); + return BufferManager.CreateWithHandle(this, size, access.HasFlag(BufferAccess.SparseCompatible), access.Convert(), access.HasFlag(BufferAccess.Stream)); } public BufferHandle CreateBuffer(nint pointer, int size) @@ -675,9 +670,23 @@ namespace Ryujinx.Graphics.Vulkan var limits = _physicalDevice.PhysicalDeviceProperties.Limits; var mainQueueProperties = _physicalDevice.QueueFamilyProperties[QueueFamilyIndex]; + SystemMemoryType memoryType; + + if (IsSharedMemory) + { + memoryType = SystemMemoryType.UnifiedMemory; + } + else + { + memoryType = Vendor == Vendor.Nvidia ? + SystemMemoryType.DedicatedMemorySlowStorage : + SystemMemoryType.DedicatedMemory; + } + return new Capabilities( api: TargetApi.Vulkan, GpuVendor, + memoryType: memoryType, hasFrontFacingBug: IsIntelWindows, hasVectorIndexingBug: Vendor == Vendor.Qualcomm, needsFragmentOutputSpecialization: IsMoltenVk,