From bf77d1cab93467676156ebfbd5cf0ae057266e6f Mon Sep 17 00:00:00 2001 From: riperiperi Date: Sat, 8 Oct 2022 16:04:47 +0100 Subject: [PATCH] GPU: Pass SpanOrArray for Texture SetData to avoid copy (#3745) * GPU: Pass SpanOrArray for Texture SetData to avoid copy Texture data is often converted before upload, meaning that an array was allocated to perform the conversion into. However, the backend SetData methods were being passed a Span of that data, and the Multithreaded layer does `ToArray()` on it so that it can be stored for later! This method can't extract the original array, so it creates a copy. This PR changes the type passed for textures to a new ref struct called SpanOrArray, which is backed by either a ReadOnlySpan or an array. The benefit here is that we can have a ToArray method that doesn't copy if it is originally backed by an array. This will also avoid a copy when running the ASTC decoder. On NieR this was taking 38% of texture upload time, which it does a _lot_ of when you move between areas, so there should be a 1.6x performance boost when strictly uploading textures. No doubt this will also improve texture streaming performance in UE4 games, and maybe a small reduction with video playback. From the numbers, it's probably possible to improve the upload rate by a further 1.6x by performing layout conversion on GPU. I'm not sure if we could improve it further than that - multithreading conversion on CPU would probably result in memory bottleneck. This doesn't extend to buffers, since we don't convert their data on the GPU emulator side. * Remove implicit cast to array. --- Ryujinx.Common/Memory/SpanOrArray.cs | 89 +++++++++++++++++++ Ryujinx.Graphics.GAL/ITexture.cs | 7 +- .../Resources/ThreadedTexture.cs | 9 +- Ryujinx.Graphics.Gpu/Engine/Dma/DmaClass.cs | 2 +- Ryujinx.Graphics.Gpu/Image/Texture.cs | 39 ++++---- Ryujinx.Graphics.Gpu/Image/TextureGroup.cs | 7 +- .../Image/TextureBuffer.cs | 11 ++- Ryujinx.Graphics.OpenGL/Image/TextureView.cs | 27 +++--- Ryujinx.Graphics.Texture/LayoutConverter.cs | 11 +-- Ryujinx.Graphics.Vulkan/TextureBuffer.cs | 9 +- Ryujinx.Graphics.Vulkan/TextureView.cs | 9 +- 11 files changed, 164 insertions(+), 56 deletions(-) create mode 100644 Ryujinx.Common/Memory/SpanOrArray.cs diff --git a/Ryujinx.Common/Memory/SpanOrArray.cs b/Ryujinx.Common/Memory/SpanOrArray.cs new file mode 100644 index 00000000..c1f06655 --- /dev/null +++ b/Ryujinx.Common/Memory/SpanOrArray.cs @@ -0,0 +1,89 @@ +using System; + +namespace Ryujinx.Common.Memory +{ + /// + /// A struct that can represent both a Span and Array. + /// This is useful to keep the Array representation when possible to avoid copies. + /// + /// Element Type + public ref struct SpanOrArray where T : unmanaged + { + public readonly T[] Array; + public readonly ReadOnlySpan Span; + + /// + /// Create a new SpanOrArray from an array. + /// + /// Array to store + public SpanOrArray(T[] array) + { + Array = array; + Span = ReadOnlySpan.Empty; + } + + /// + /// Create a new SpanOrArray from a readonly span. + /// + /// Span to store + public SpanOrArray(ReadOnlySpan span) + { + Array = null; + Span = span; + } + + /// + /// Return the contained array, or convert the span if necessary. + /// + /// An array containing the data + public T[] ToArray() + { + return Array ?? Span.ToArray(); + } + + /// + /// Return a ReadOnlySpan from either the array or ReadOnlySpan. + /// + /// A ReadOnlySpan containing the data + public ReadOnlySpan AsSpan() + { + return Array ?? Span; + } + + /// + /// Cast an array to a SpanOrArray. + /// + /// Source array + public static implicit operator SpanOrArray(T[] array) + { + return new SpanOrArray(array); + } + + /// + /// Cast a ReadOnlySpan to a SpanOrArray. + /// + /// Source ReadOnlySpan + public static implicit operator SpanOrArray(ReadOnlySpan span) + { + return new SpanOrArray(span); + } + + /// + /// Cast a Span to a SpanOrArray. + /// + /// Source Span + public static implicit operator SpanOrArray(Span span) + { + return new SpanOrArray(span); + } + + /// + /// Cast a SpanOrArray to a ReadOnlySpan + /// + /// Source SpanOrArray + public static implicit operator ReadOnlySpan(SpanOrArray spanOrArray) + { + return spanOrArray.AsSpan(); + } + } +} diff --git a/Ryujinx.Graphics.GAL/ITexture.cs b/Ryujinx.Graphics.GAL/ITexture.cs index 7f46806c..4dc93303 100644 --- a/Ryujinx.Graphics.GAL/ITexture.cs +++ b/Ryujinx.Graphics.GAL/ITexture.cs @@ -1,3 +1,4 @@ +using Ryujinx.Common.Memory; using System; namespace Ryujinx.Graphics.GAL @@ -17,9 +18,9 @@ namespace Ryujinx.Graphics.GAL ReadOnlySpan GetData(); ReadOnlySpan GetData(int layer, int level); - void SetData(ReadOnlySpan data); - void SetData(ReadOnlySpan data, int layer, int level); - void SetData(ReadOnlySpan data, int layer, int level, Rectangle region); + void SetData(SpanOrArray data); + void SetData(SpanOrArray data, int layer, int level); + void SetData(SpanOrArray data, int layer, int level, Rectangle region); void SetStorage(BufferRange buffer); void Release(); } diff --git a/Ryujinx.Graphics.GAL/Multithreading/Resources/ThreadedTexture.cs b/Ryujinx.Graphics.GAL/Multithreading/Resources/ThreadedTexture.cs index 1e7d86ba..1267ab79 100644 --- a/Ryujinx.Graphics.GAL/Multithreading/Resources/ThreadedTexture.cs +++ b/Ryujinx.Graphics.GAL/Multithreading/Resources/ThreadedTexture.cs @@ -1,4 +1,5 @@ -using Ryujinx.Graphics.GAL.Multithreading.Commands.Texture; +using Ryujinx.Common.Memory; +using Ryujinx.Graphics.GAL.Multithreading.Commands.Texture; using Ryujinx.Graphics.GAL.Multithreading.Model; using System; @@ -107,19 +108,19 @@ namespace Ryujinx.Graphics.GAL.Multithreading.Resources } } - public void SetData(ReadOnlySpan data) + public void SetData(SpanOrArray data) { _renderer.New().Set(Ref(this), Ref(data.ToArray())); _renderer.QueueCommand(); } - public void SetData(ReadOnlySpan data, int layer, int level) + public void SetData(SpanOrArray data, int layer, int level) { _renderer.New().Set(Ref(this), Ref(data.ToArray()), layer, level); _renderer.QueueCommand(); } - public void SetData(ReadOnlySpan data, int layer, int level, Rectangle region) + public void SetData(SpanOrArray data, int layer, int level, Rectangle region) { _renderer.New().Set(Ref(this), Ref(data.ToArray()), layer, level, region); _renderer.QueueCommand(); diff --git a/Ryujinx.Graphics.Gpu/Engine/Dma/DmaClass.cs b/Ryujinx.Graphics.Gpu/Engine/Dma/DmaClass.cs index da25a89d..aa94f1f8 100644 --- a/Ryujinx.Graphics.Gpu/Engine/Dma/DmaClass.cs +++ b/Ryujinx.Graphics.Gpu/Engine/Dma/DmaClass.cs @@ -229,7 +229,7 @@ namespace Ryujinx.Graphics.Gpu.Engine.Dma if (target != null) { - ReadOnlySpan data; + byte[] data; if (srcLinear) { data = LayoutConverter.ConvertLinearStridedToLinear( diff --git a/Ryujinx.Graphics.Gpu/Image/Texture.cs b/Ryujinx.Graphics.Gpu/Image/Texture.cs index 320bc014..c104e860 100644 --- a/Ryujinx.Graphics.Gpu/Image/Texture.cs +++ b/Ryujinx.Graphics.Gpu/Image/Texture.cs @@ -1,5 +1,6 @@ using Ryujinx.Common; using Ryujinx.Common.Logging; +using Ryujinx.Common.Memory; using Ryujinx.Graphics.GAL; using Ryujinx.Graphics.Gpu.Memory; using Ryujinx.Graphics.Texture; @@ -720,9 +721,9 @@ namespace Ryujinx.Graphics.Gpu.Image } } - data = ConvertToHostCompatibleFormat(data); + SpanOrArray result = ConvertToHostCompatibleFormat(data); - HostTexture.SetData(data); + HostTexture.SetData(result); _hasData = true; } @@ -731,7 +732,7 @@ namespace Ryujinx.Graphics.Gpu.Image /// Uploads new texture data to the host GPU. /// /// New data - public void SetData(ReadOnlySpan data) + public void SetData(SpanOrArray data) { BlacklistScale(); @@ -750,7 +751,7 @@ namespace Ryujinx.Graphics.Gpu.Image /// New data /// Target layer /// Target level - public void SetData(ReadOnlySpan data, int layer, int level) + public void SetData(SpanOrArray data, int layer, int level) { BlacklistScale(); @@ -786,7 +787,7 @@ namespace Ryujinx.Graphics.Gpu.Image /// Mip level to convert /// True to convert a single slice /// Converted data - public ReadOnlySpan ConvertToHostCompatibleFormat(ReadOnlySpan data, int level = 0, bool single = false) + public SpanOrArray ConvertToHostCompatibleFormat(ReadOnlySpan data, int level = 0, bool single = false) { int width = Info.Width; int height = Info.Height; @@ -799,9 +800,11 @@ namespace Ryujinx.Graphics.Gpu.Image height = Math.Max(height >> level, 1); depth = Math.Max(depth >> level, 1); + SpanOrArray result; + if (Info.IsLinear) { - data = LayoutConverter.ConvertLinearStridedToLinear( + result = LayoutConverter.ConvertLinearStridedToLinear( width, height, Info.FormatInfo.BlockWidth, @@ -813,7 +816,7 @@ namespace Ryujinx.Graphics.Gpu.Image } else { - data = LayoutConverter.ConvertBlockLinearToLinear( + result = LayoutConverter.ConvertBlockLinearToLinear( width, height, depth, @@ -836,7 +839,7 @@ namespace Ryujinx.Graphics.Gpu.Image if (!_context.Capabilities.SupportsAstcCompression && Format.IsAstc()) { if (!AstcDecoder.TryDecodeToRgba8P( - data.ToArray(), + result.ToArray(), Info.FormatInfo.BlockWidth, Info.FormatInfo.BlockHeight, width, @@ -856,11 +859,11 @@ namespace Ryujinx.Graphics.Gpu.Image decoded = BCnEncoder.EncodeBC7(decoded, width, height, depth, levels, layers); } - data = decoded; + result = decoded; } else if (!_context.Capabilities.SupportsR4G4Format && Format == Format.R4G4Unorm) { - data = PixelConverter.ConvertR4G4ToR4G4B4A4(data); + result = PixelConverter.ConvertR4G4ToR4G4B4A4(result); } else if (!TextureCompatibility.HostSupportsBcFormat(Format, Target, _context.Capabilities)) { @@ -868,36 +871,36 @@ namespace Ryujinx.Graphics.Gpu.Image { case Format.Bc1RgbaSrgb: case Format.Bc1RgbaUnorm: - data = BCnDecoder.DecodeBC1(data, width, height, depth, levels, layers); + result = BCnDecoder.DecodeBC1(result, width, height, depth, levels, layers); break; case Format.Bc2Srgb: case Format.Bc2Unorm: - data = BCnDecoder.DecodeBC2(data, width, height, depth, levels, layers); + result = BCnDecoder.DecodeBC2(result, width, height, depth, levels, layers); break; case Format.Bc3Srgb: case Format.Bc3Unorm: - data = BCnDecoder.DecodeBC3(data, width, height, depth, levels, layers); + result = BCnDecoder.DecodeBC3(result, width, height, depth, levels, layers); break; case Format.Bc4Snorm: case Format.Bc4Unorm: - data = BCnDecoder.DecodeBC4(data, width, height, depth, levels, layers, Format == Format.Bc4Snorm); + result = BCnDecoder.DecodeBC4(result, width, height, depth, levels, layers, Format == Format.Bc4Snorm); break; case Format.Bc5Snorm: case Format.Bc5Unorm: - data = BCnDecoder.DecodeBC5(data, width, height, depth, levels, layers, Format == Format.Bc5Snorm); + result = BCnDecoder.DecodeBC5(result, width, height, depth, levels, layers, Format == Format.Bc5Snorm); break; case Format.Bc6HSfloat: case Format.Bc6HUfloat: - data = BCnDecoder.DecodeBC6(data, width, height, depth, levels, layers, Format == Format.Bc6HSfloat); + result = BCnDecoder.DecodeBC6(result, width, height, depth, levels, layers, Format == Format.Bc6HSfloat); break; case Format.Bc7Srgb: case Format.Bc7Unorm: - data = BCnDecoder.DecodeBC7(data, width, height, depth, levels, layers); + result = BCnDecoder.DecodeBC7(result, width, height, depth, levels, layers); break; } } - return data; + return result; } /// diff --git a/Ryujinx.Graphics.Gpu/Image/TextureGroup.cs b/Ryujinx.Graphics.Gpu/Image/TextureGroup.cs index 4bdc5078..9efd1802 100644 --- a/Ryujinx.Graphics.Gpu/Image/TextureGroup.cs +++ b/Ryujinx.Graphics.Gpu/Image/TextureGroup.cs @@ -1,4 +1,5 @@ -using Ryujinx.Cpu.Tracking; +using Ryujinx.Common.Memory; +using Ryujinx.Cpu.Tracking; using Ryujinx.Graphics.GAL; using Ryujinx.Graphics.Gpu.Memory; using Ryujinx.Graphics.Texture; @@ -348,9 +349,9 @@ namespace Ryujinx.Graphics.Gpu.Image ReadOnlySpan data = _physicalMemory.GetSpan(Storage.Range.GetSlice((ulong)offset, (ulong)size)); - data = Storage.ConvertToHostCompatibleFormat(data, info.BaseLevel, true); + SpanOrArray result = Storage.ConvertToHostCompatibleFormat(data, info.BaseLevel, true); - Storage.SetData(data, info.BaseLayer, info.BaseLevel); + Storage.SetData(result, info.BaseLayer, info.BaseLevel); offsetIndex++; } diff --git a/Ryujinx.Graphics.OpenGL/Image/TextureBuffer.cs b/Ryujinx.Graphics.OpenGL/Image/TextureBuffer.cs index e46d5c48..76d0149b 100644 --- a/Ryujinx.Graphics.OpenGL/Image/TextureBuffer.cs +++ b/Ryujinx.Graphics.OpenGL/Image/TextureBuffer.cs @@ -1,4 +1,5 @@ using OpenTK.Graphics.OpenGL; +using Ryujinx.Common.Memory; using Ryujinx.Graphics.GAL; using System; @@ -48,17 +49,19 @@ namespace Ryujinx.Graphics.OpenGL.Image return GetData(); } - public void SetData(ReadOnlySpan data) + public void SetData(SpanOrArray data) { - Buffer.SetData(_buffer, _bufferOffset, data.Slice(0, Math.Min(data.Length, _bufferSize))); + var dataSpan = data.AsSpan(); + + Buffer.SetData(_buffer, _bufferOffset, dataSpan.Slice(0, Math.Min(dataSpan.Length, _bufferSize))); } - public void SetData(ReadOnlySpan data, int layer, int level) + public void SetData(SpanOrArray data, int layer, int level) { throw new NotSupportedException(); } - public void SetData(ReadOnlySpan data, int layer, int level, Rectangle region) + public void SetData(SpanOrArray data, int layer, int level, Rectangle region) { throw new NotSupportedException(); } diff --git a/Ryujinx.Graphics.OpenGL/Image/TextureView.cs b/Ryujinx.Graphics.OpenGL/Image/TextureView.cs index f17243d2..3e7da6e3 100644 --- a/Ryujinx.Graphics.OpenGL/Image/TextureView.cs +++ b/Ryujinx.Graphics.OpenGL/Image/TextureView.cs @@ -1,5 +1,6 @@ using OpenTK.Graphics.OpenGL; using Ryujinx.Common; +using Ryujinx.Common.Memory; using Ryujinx.Graphics.GAL; using System; @@ -317,32 +318,36 @@ namespace Ryujinx.Graphics.OpenGL.Image } } - public void SetData(ReadOnlySpan data) + public void SetData(SpanOrArray data) { + var dataSpan = data.AsSpan(); + if (Format == Format.S8UintD24Unorm) { - data = FormatConverter.ConvertS8D24ToD24S8(data); + dataSpan = FormatConverter.ConvertS8D24ToD24S8(dataSpan); } unsafe { - fixed (byte* ptr = data) + fixed (byte* ptr = dataSpan) { - ReadFrom((IntPtr)ptr, data.Length); + ReadFrom((IntPtr)ptr, dataSpan.Length); } } } - public void SetData(ReadOnlySpan data, int layer, int level) + public void SetData(SpanOrArray data, int layer, int level) { + var dataSpan = data.AsSpan(); + if (Format == Format.S8UintD24Unorm) { - data = FormatConverter.ConvertS8D24ToD24S8(data); + dataSpan = FormatConverter.ConvertS8D24ToD24S8(dataSpan); } unsafe { - fixed (byte* ptr = data) + fixed (byte* ptr = dataSpan) { int width = Math.Max(Info.Width >> level, 1); int height = Math.Max(Info.Height >> level, 1); @@ -352,11 +357,13 @@ namespace Ryujinx.Graphics.OpenGL.Image } } - public void SetData(ReadOnlySpan data, int layer, int level, Rectangle region) + public void SetData(SpanOrArray data, int layer, int level, Rectangle region) { + var dataSpan = data.AsSpan(); + if (Format == Format.S8UintD24Unorm) { - data = FormatConverter.ConvertS8D24ToD24S8(data); + dataSpan = FormatConverter.ConvertS8D24ToD24S8(dataSpan); } int wInBlocks = BitUtils.DivRoundUp(region.Width, Info.BlockWidth); @@ -364,7 +371,7 @@ namespace Ryujinx.Graphics.OpenGL.Image unsafe { - fixed (byte* ptr = data) + fixed (byte* ptr = dataSpan) { ReadFrom2D( (IntPtr)ptr, diff --git a/Ryujinx.Graphics.Texture/LayoutConverter.cs b/Ryujinx.Graphics.Texture/LayoutConverter.cs index 2b327375..188ae0c1 100644 --- a/Ryujinx.Graphics.Texture/LayoutConverter.cs +++ b/Ryujinx.Graphics.Texture/LayoutConverter.cs @@ -93,7 +93,7 @@ namespace Ryujinx.Graphics.Texture }; } - public static Span ConvertBlockLinearToLinear( + public static byte[] ConvertBlockLinearToLinear( int width, int height, int depth, @@ -119,7 +119,7 @@ namespace Ryujinx.Graphics.Texture blockHeight, bytesPerPixel); - Span output = new byte[outSize]; + byte[] output = new byte[outSize]; int outOffs = 0; @@ -246,7 +246,7 @@ namespace Ryujinx.Graphics.Texture return output; } - public static Span ConvertLinearStridedToLinear( + public static byte[] ConvertLinearStridedToLinear( int width, int height, int blockWidth, @@ -262,14 +262,15 @@ namespace Ryujinx.Graphics.Texture int outStride = BitUtils.AlignUp(w * bytesPerPixel, HostStrideAlignment); lineSize = Math.Min(lineSize, outStride); - Span output = new byte[h * outStride]; + byte[] output = new byte[h * outStride]; + Span outSpan = output; int outOffs = 0; int inOffs = 0; for (int y = 0; y < h; y++) { - data.Slice(inOffs, lineSize).CopyTo(output.Slice(outOffs, lineSize)); + data.Slice(inOffs, lineSize).CopyTo(outSpan.Slice(outOffs, lineSize)); inOffs += stride; outOffs += outStride; diff --git a/Ryujinx.Graphics.Vulkan/TextureBuffer.cs b/Ryujinx.Graphics.Vulkan/TextureBuffer.cs index fca0598f..bf9a6ead 100644 --- a/Ryujinx.Graphics.Vulkan/TextureBuffer.cs +++ b/Ryujinx.Graphics.Vulkan/TextureBuffer.cs @@ -1,4 +1,5 @@ -using Ryujinx.Graphics.GAL; +using Ryujinx.Common.Memory; +using Ryujinx.Graphics.GAL; using Silk.NET.Vulkan; using System; using System.Collections.Generic; @@ -90,17 +91,17 @@ namespace Ryujinx.Graphics.Vulkan _bufferView = null; } - public void SetData(ReadOnlySpan data) + public void SetData(SpanOrArray data) { _gd.SetBufferData(_bufferHandle, _offset, data); } - public void SetData(ReadOnlySpan data, int layer, int level) + public void SetData(SpanOrArray data, int layer, int level) { throw new NotSupportedException(); } - public void SetData(ReadOnlySpan data, int layer, int level, Rectangle region) + public void SetData(SpanOrArray data, int layer, int level, Rectangle region) { throw new NotSupportedException(); } diff --git a/Ryujinx.Graphics.Vulkan/TextureView.cs b/Ryujinx.Graphics.Vulkan/TextureView.cs index fbe32eca..129a77ef 100644 --- a/Ryujinx.Graphics.Vulkan/TextureView.cs +++ b/Ryujinx.Graphics.Vulkan/TextureView.cs @@ -1,4 +1,5 @@ -using Ryujinx.Graphics.GAL; +using Ryujinx.Common.Memory; +using Ryujinx.Graphics.GAL; using Silk.NET.Vulkan; using System; using System.Collections.Generic; @@ -873,17 +874,17 @@ namespace Ryujinx.Graphics.Vulkan return GetDataFromBuffer(result, size, result); } - public void SetData(ReadOnlySpan data) + public void SetData(SpanOrArray data) { SetData(data, 0, 0, Info.GetLayers(), Info.Levels, singleSlice: false); } - public void SetData(ReadOnlySpan data, int layer, int level) + public void SetData(SpanOrArray data, int layer, int level) { SetData(data, layer, level, 1, 1, singleSlice: true); } - public void SetData(ReadOnlySpan data, int layer, int level, Rectangle region) + public void SetData(SpanOrArray data, int layer, int level, Rectangle region) { SetData(data, layer, level, 1, 1, singleSlice: true, region); }