From 470be03c2ff22346a1f0ae53fa25f53c4d1790b5 Mon Sep 17 00:00:00 2001 From: riperiperi Date: Mon, 26 Dec 2022 18:50:27 +0000 Subject: [PATCH] GPU: Add fallback when 16-bit formats are not supported (#4108) * Add conversion for 16 bit RGBA formats (not supported in Rosetta) * Rebase fix Rebase fix * Forgot to remove this * Fix RGBA16 format conversion * Add RGBA4 -> RGBA8 conversion * Handle host stride alignment * Address Feedback Part 1 * Can't count * Don't zero out rgb when alpha is 0 * Separate RGBA4 and 5-bit component formats Not sure of a better way to name them... * Add A1B5G5R5 conversion * Put this in the right place. * Make format naming consistent for capabilities * Change method names --- Ryujinx.Graphics.GAL/Capabilities.cs | 6 + Ryujinx.Graphics.GAL/Format.cs | 21 ++ Ryujinx.Graphics.Gpu/Image/Texture.cs | 35 ++- .../Image/TextureCompatibility.cs | 21 +- Ryujinx.Graphics.OpenGL/OpenGLRenderer.cs | 2 + Ryujinx.Graphics.Texture/LayoutConverter.cs | 2 +- Ryujinx.Graphics.Texture/PixelConverter.cs | 211 ++++++++++++++++-- Ryujinx.Graphics.Vulkan/VulkanRenderer.cs | 13 ++ 8 files changed, 291 insertions(+), 20 deletions(-) diff --git a/Ryujinx.Graphics.GAL/Capabilities.cs b/Ryujinx.Graphics.GAL/Capabilities.cs index bc93908b..abacdcfa 100644 --- a/Ryujinx.Graphics.GAL/Capabilities.cs +++ b/Ryujinx.Graphics.GAL/Capabilities.cs @@ -18,7 +18,9 @@ namespace Ryujinx.Graphics.GAL public readonly bool Supports3DTextureCompression; public readonly bool SupportsBgraFormat; public readonly bool SupportsR4G4Format; + public readonly bool SupportsR4G4B4A4Format; public readonly bool SupportsSnormBufferTextureFormat; + public readonly bool Supports5BitComponentFormat; public readonly bool SupportsFragmentShaderInterlock; public readonly bool SupportsFragmentShaderOrderingIntel; public readonly bool SupportsGeometryShaderPassthrough; @@ -55,7 +57,9 @@ namespace Ryujinx.Graphics.GAL bool supports3DTextureCompression, bool supportsBgraFormat, bool supportsR4G4Format, + bool supportsR4G4B4A4Format, bool supportsSnormBufferTextureFormat, + bool supports5BitComponentFormat, bool supportsFragmentShaderInterlock, bool supportsFragmentShaderOrderingIntel, bool supportsGeometryShaderPassthrough, @@ -89,7 +93,9 @@ namespace Ryujinx.Graphics.GAL Supports3DTextureCompression = supports3DTextureCompression; SupportsBgraFormat = supportsBgraFormat; SupportsR4G4Format = supportsR4G4Format; + SupportsR4G4B4A4Format = supportsR4G4B4A4Format; SupportsSnormBufferTextureFormat = supportsSnormBufferTextureFormat; + Supports5BitComponentFormat = supports5BitComponentFormat; SupportsFragmentShaderInterlock = supportsFragmentShaderInterlock; SupportsFragmentShaderOrderingIntel = supportsFragmentShaderOrderingIntel; SupportsGeometryShaderPassthrough = supportsGeometryShaderPassthrough; diff --git a/Ryujinx.Graphics.GAL/Format.cs b/Ryujinx.Graphics.GAL/Format.cs index 87d08803..5e0274e5 100644 --- a/Ryujinx.Graphics.GAL/Format.cs +++ b/Ryujinx.Graphics.GAL/Format.cs @@ -448,6 +448,27 @@ namespace Ryujinx.Graphics.GAL return false; } + /// + /// Checks if the texture format is 16 bit packed. + /// + /// Texture format + /// True if the texture format is 16 bit packed, false otherwise + public static bool Is16BitPacked(this Format format) + { + switch (format) + { + case Format.B5G6R5Unorm: + case Format.B5G5R5A1Unorm: + case Format.R5G5B5X1Unorm: + case Format.R5G5B5A1Unorm: + case Format.R5G6B5Unorm: + case Format.R4G4B4A4Unorm: + return true; + } + + return false; + } + /// /// Checks if the texture format is an ASTC format. /// diff --git a/Ryujinx.Graphics.Gpu/Image/Texture.cs b/Ryujinx.Graphics.Gpu/Image/Texture.cs index 904c908f..0995314d 100644 --- a/Ryujinx.Graphics.Gpu/Image/Texture.cs +++ b/Ryujinx.Graphics.Gpu/Image/Texture.cs @@ -911,7 +911,40 @@ namespace Ryujinx.Graphics.Gpu.Image } else if (!_context.Capabilities.SupportsR4G4Format && Format == Format.R4G4Unorm) { - result = PixelConverter.ConvertR4G4ToR4G4B4A4(result); + result = PixelConverter.ConvertR4G4ToR4G4B4A4(result, width); + + if (!_context.Capabilities.SupportsR4G4B4A4Format) + { + result = PixelConverter.ConvertR4G4B4A4ToR8G8B8A8(result, width); + } + } + else if (Format == Format.R4G4B4A4Unorm) + { + if (!_context.Capabilities.SupportsR4G4B4A4Format) + { + result = PixelConverter.ConvertR4G4B4A4ToR8G8B8A8(result, width); + } + } + else if (!_context.Capabilities.Supports5BitComponentFormat && Format.Is16BitPacked()) + { + switch (Format) + { + case Format.B5G6R5Unorm: + case Format.R5G6B5Unorm: + result = PixelConverter.ConvertR5G6B5ToR8G8B8A8(result, width); + break; + case Format.B5G5R5A1Unorm: + case Format.R5G5B5X1Unorm: + case Format.R5G5B5A1Unorm: + result = PixelConverter.ConvertR5G5B5ToR8G8B8A8(result, width, Format == Format.R5G5B5X1Unorm); + break; + case Format.A1B5G5R5Unorm: + result = PixelConverter.ConvertA1B5G5R5ToR8G8B8A8(result, width); + break; + case Format.R4G4B4A4Unorm: + result = PixelConverter.ConvertR4G4B4A4ToR8G8B8A8(result, width); + break; + } } return result; diff --git a/Ryujinx.Graphics.Gpu/Image/TextureCompatibility.cs b/Ryujinx.Graphics.Gpu/Image/TextureCompatibility.cs index 642e03b6..7ec4c7ac 100644 --- a/Ryujinx.Graphics.Gpu/Image/TextureCompatibility.cs +++ b/Ryujinx.Graphics.Gpu/Image/TextureCompatibility.cs @@ -132,7 +132,26 @@ namespace Ryujinx.Graphics.Gpu.Image if (!caps.SupportsR4G4Format && info.FormatInfo.Format == Format.R4G4Unorm) { - return new FormatInfo(Format.R4G4B4A4Unorm, 1, 1, 2, 4); + if (caps.SupportsR4G4B4A4Format) + { + return new FormatInfo(Format.R4G4B4A4Unorm, 1, 1, 2, 4); + } + else + { + return new FormatInfo(Format.R8G8B8A8Unorm, 1, 1, 4, 4); + } + } + + if (info.FormatInfo.Format == Format.R4G4B4A4Unorm) + { + if (!caps.SupportsR4G4B4A4Format) + { + return new FormatInfo(Format.R8G8B8A8Unorm, 1, 1, 4, 4); + } + } + else if (!caps.Supports5BitComponentFormat && info.FormatInfo.Format.Is16BitPacked()) + { + return new FormatInfo(info.FormatInfo.Format.IsBgr() ? Format.B8G8R8A8Unorm : Format.R8G8B8A8Unorm, 1, 1, 4, 4); } return info.FormatInfo; diff --git a/Ryujinx.Graphics.OpenGL/OpenGLRenderer.cs b/Ryujinx.Graphics.OpenGL/OpenGLRenderer.cs index 59ca6afd..de1ce4a3 100644 --- a/Ryujinx.Graphics.OpenGL/OpenGLRenderer.cs +++ b/Ryujinx.Graphics.OpenGL/OpenGLRenderer.cs @@ -114,7 +114,9 @@ namespace Ryujinx.Graphics.OpenGL supports3DTextureCompression: false, supportsBgraFormat: false, supportsR4G4Format: false, + supportsR4G4B4A4Format: true, supportsSnormBufferTextureFormat: false, + supports5BitComponentFormat: true, supportsFragmentShaderInterlock: HwCapabilities.SupportsFragmentShaderInterlock, supportsFragmentShaderOrderingIntel: HwCapabilities.SupportsFragmentShaderOrdering, supportsGeometryShaderPassthrough: HwCapabilities.SupportsGeometryShaderPassthrough, diff --git a/Ryujinx.Graphics.Texture/LayoutConverter.cs b/Ryujinx.Graphics.Texture/LayoutConverter.cs index 188ae0c1..b8ec9748 100644 --- a/Ryujinx.Graphics.Texture/LayoutConverter.cs +++ b/Ryujinx.Graphics.Texture/LayoutConverter.cs @@ -7,7 +7,7 @@ namespace Ryujinx.Graphics.Texture { public static class LayoutConverter { - private const int HostStrideAlignment = 4; + public const int HostStrideAlignment = 4; public static void ConvertBlockLinearToLinear( Span dst, diff --git a/Ryujinx.Graphics.Texture/PixelConverter.cs b/Ryujinx.Graphics.Texture/PixelConverter.cs index d7e45a69..add25cd3 100644 --- a/Ryujinx.Graphics.Texture/PixelConverter.cs +++ b/Ryujinx.Graphics.Texture/PixelConverter.cs @@ -1,3 +1,4 @@ +using Ryujinx.Common; using System; using System.Runtime.InteropServices; using System.Runtime.Intrinsics; @@ -7,30 +8,206 @@ namespace Ryujinx.Graphics.Texture { public static class PixelConverter { - public unsafe static byte[] ConvertR4G4ToR4G4B4A4(ReadOnlySpan data) + private static (int remainder, int outRemainder, int height) GetLineRemainders(int length, int width, int bpp, int outBpp) + { + int stride = BitUtils.AlignUp(width * bpp, LayoutConverter.HostStrideAlignment); + int remainder = stride / bpp - width; + + int outStride = BitUtils.AlignUp(width * outBpp, LayoutConverter.HostStrideAlignment); + int outRemainder = outStride / outBpp - width; + + return (remainder, outRemainder, length / stride); + } + + public unsafe static byte[] ConvertR4G4ToR4G4B4A4(ReadOnlySpan data, int width) { byte[] output = new byte[data.Length * 2]; - int start = 0; - if (Sse41.IsSupported) - { - int sizeTrunc = data.Length & ~7; - start = sizeTrunc; - - fixed (byte* inputPtr = data, outputPtr = output) - { - for (ulong offset = 0; offset < (ulong)sizeTrunc; offset += 8) - { - Sse2.Store(outputPtr + offset * 2, Sse41.ConvertToVector128Int16(inputPtr + offset).AsByte()); - } - } - } + (int remainder, int outRemainder, int height) = GetLineRemainders(data.Length, width, 1, 2); Span outputSpan = MemoryMarshal.Cast(output); - for (int i = start; i < data.Length; i++) + if (remainder == 0) { - outputSpan[i] = (ushort)data[i]; + int start = 0; + + if (Sse41.IsSupported) + { + int sizeTrunc = data.Length & ~7; + start = sizeTrunc; + + fixed (byte* inputPtr = data, outputPtr = output) + { + for (ulong offset = 0; offset < (ulong)sizeTrunc; offset += 8) + { + Sse2.Store(outputPtr + offset * 2, Sse41.ConvertToVector128Int16(inputPtr + offset).AsByte()); + } + } + } + + for (int i = start; i < data.Length; i++) + { + outputSpan[i] = (ushort)data[i]; + } + } + else + { + int offset = 0; + int outOffset = 0; + + for (int y = 0; y < height; y++) + { + for (int x = 0; x < width; x++) + { + outputSpan[outOffset++] = data[offset++]; + } + + offset += remainder; + outOffset += outRemainder; + } + } + + return output; + } + + public unsafe static byte[] ConvertR5G6B5ToR8G8B8A8(ReadOnlySpan data, int width) + { + byte[] output = new byte[data.Length * 2]; + int offset = 0; + int outOffset = 0; + + (int remainder, int outRemainder, int height) = GetLineRemainders(data.Length, width, 2, 4); + + ReadOnlySpan inputSpan = MemoryMarshal.Cast(data); + Span outputSpan = MemoryMarshal.Cast(output); + + for (int y = 0; y < height; y++) + { + for (int x = 0; x < width; x++) + { + uint packed = inputSpan[offset++]; + + uint outputPacked = 0xff000000; + outputPacked |= (packed << 3) & 0x000000f8; + outputPacked |= (packed << 8) & 0x00f80000; + + // Replicate 5 bit components. + outputPacked |= (outputPacked >> 5) & 0x00070007; + + // Include and replicate 6 bit component. + outputPacked |= ((packed << 5) & 0x0000fc00) | ((packed >> 1) & 0x00000300); + + outputSpan[outOffset++] = outputPacked; + } + + offset += remainder; + outOffset += outRemainder; + } + + return output; + } + + public unsafe static byte[] ConvertR5G5B5ToR8G8B8A8(ReadOnlySpan data, int width, bool forceAlpha) + { + byte[] output = new byte[data.Length * 2]; + int offset = 0; + int outOffset = 0; + + (int remainder, int outRemainder, int height) = GetLineRemainders(data.Length, width, 2, 4); + + ReadOnlySpan inputSpan = MemoryMarshal.Cast(data); + Span outputSpan = MemoryMarshal.Cast(output); + + for (int y = 0; y < height; y++) + { + for (int x = 0; x < width; x++) + { + uint packed = inputSpan[offset++]; + + uint a = forceAlpha ? 1 : (packed >> 15); + + uint outputPacked = a * 0xff000000; + outputPacked |= (packed << 3) & 0x000000f8; + outputPacked |= (packed << 6) & 0x0000f800; + outputPacked |= (packed << 9) & 0x00f80000; + + // Replicate 5 bit components. + outputPacked |= (outputPacked >> 5) & 0x00070707; + + outputSpan[outOffset++] = outputPacked; + } + + offset += remainder; + outOffset += outRemainder; + } + + return output; + } + + public unsafe static byte[] ConvertA1B5G5R5ToR8G8B8A8(ReadOnlySpan data, int width) + { + byte[] output = new byte[data.Length * 2]; + int offset = 0; + int outOffset = 0; + + (int remainder, int outRemainder, int height) = GetLineRemainders(data.Length, width, 2, 4); + + ReadOnlySpan inputSpan = MemoryMarshal.Cast(data); + Span outputSpan = MemoryMarshal.Cast(output); + + for (int y = 0; y < height; y++) + { + for (int x = 0; x < width; x++) + { + uint packed = inputSpan[offset++]; + + uint a = packed >> 15; + + uint outputPacked = a * 0xff000000; + outputPacked |= (packed >> 8) & 0x000000f8; + outputPacked |= (packed << 5) & 0x0000f800; + outputPacked |= (packed << 18) & 0x00f80000; + + // Replicate 5 bit components. + outputPacked |= (outputPacked >> 5) & 0x00070707; + + outputSpan[outOffset++] = outputPacked; + } + + offset += remainder; + outOffset += outRemainder; + } + + return output; + } + + public unsafe static byte[] ConvertR4G4B4A4ToR8G8B8A8(ReadOnlySpan data, int width) + { + byte[] output = new byte[data.Length * 2]; + int offset = 0; + int outOffset = 0; + + (int remainder, int outRemainder, int height) = GetLineRemainders(data.Length, width, 2, 4); + + ReadOnlySpan inputSpan = MemoryMarshal.Cast(data); + Span outputSpan = MemoryMarshal.Cast(output); + + for (int y = 0; y < height; y++) + { + for (int x = 0; x < width; x++) + { + uint packed = inputSpan[offset++]; + + uint outputPacked = packed & 0x0000000f; + outputPacked |= (packed << 4) & 0x00000f00; + outputPacked |= (packed << 8) & 0x000f0000; + outputPacked |= (packed << 12) & 0x0f000000; + + outputSpan[outOffset++] = outputPacked * 0x11; + } + + offset += remainder; + outOffset += outRemainder; } return output; diff --git a/Ryujinx.Graphics.Vulkan/VulkanRenderer.cs b/Ryujinx.Graphics.Vulkan/VulkanRenderer.cs index a32400f5..3c446abf 100644 --- a/Ryujinx.Graphics.Vulkan/VulkanRenderer.cs +++ b/Ryujinx.Graphics.Vulkan/VulkanRenderer.cs @@ -396,6 +396,17 @@ namespace Ryujinx.Graphics.Vulkan GAL.Format.Etc2RgbSrgb, GAL.Format.Etc2RgbUnorm); + bool supports5BitComponentFormat = FormatCapabilities.OptimalFormatsSupport(compressedFormatFeatureFlags, + GAL.Format.R5G6B5Unorm, + GAL.Format.R5G5B5A1Unorm, + GAL.Format.R5G5B5X1Unorm, + GAL.Format.B5G6R5Unorm, + GAL.Format.B5G5R5A1Unorm, + GAL.Format.A1B5G5R5Unorm); + + bool supportsR4G4B4A4Format = FormatCapabilities.OptimalFormatsSupport(compressedFormatFeatureFlags, + GAL.Format.R4G4B4A4Unorm); + PhysicalDeviceVulkan12Features featuresVk12 = new PhysicalDeviceVulkan12Features() { SType = StructureType.PhysicalDeviceVulkan12Features @@ -425,7 +436,9 @@ namespace Ryujinx.Graphics.Vulkan supports3DTextureCompression: true, supportsBgraFormat: true, supportsR4G4Format: false, + supportsR4G4B4A4Format: supportsR4G4B4A4Format, supportsSnormBufferTextureFormat: true, + supports5BitComponentFormat: supports5BitComponentFormat, supportsFragmentShaderInterlock: Capabilities.SupportsFragmentShaderInterlock, supportsFragmentShaderOrderingIntel: false, supportsGeometryShaderPassthrough: Capabilities.SupportsGeometryShaderPassthrough,