diff --git a/src/Ryujinx.Graphics.Gpu/Constants.cs b/src/Ryujinx.Graphics.Gpu/Constants.cs index b559edc2..ff90e61b 100644 --- a/src/Ryujinx.Graphics.Gpu/Constants.cs +++ b/src/Ryujinx.Graphics.Gpu/Constants.cs @@ -80,11 +80,6 @@ namespace Ryujinx.Graphics.Gpu /// public const int GobAlignment = 64; - /// - /// Expected byte alignment for storage buffers - /// - public const int StorageAlignment = 16; - /// /// Number of the uniform buffer reserved by the driver to store the storage buffer base addresses. /// diff --git a/src/Ryujinx.Graphics.Gpu/Engine/Compute/ComputeClass.cs b/src/Ryujinx.Graphics.Gpu/Engine/Compute/ComputeClass.cs index 998ece22..8227a7ff 100644 --- a/src/Ryujinx.Graphics.Gpu/Engine/Compute/ComputeClass.cs +++ b/src/Ryujinx.Graphics.Gpu/Engine/Compute/ComputeClass.cs @@ -187,30 +187,6 @@ namespace Ryujinx.Graphics.Gpu.Engine.Compute info = cs.Shaders[0].Info; } - for (int index = 0; index < info.CBuffers.Count; index++) - { - BufferDescriptor cb = info.CBuffers[index]; - - // NVN uses the "hardware" constant buffer for anything that is less than 8, - // and those are already bound above. - // Anything greater than or equal to 8 uses the emulated constant buffers. - // They are emulated using global memory loads. - if (cb.Slot < 8) - { - continue; - } - - ulong cbDescAddress = _channel.BufferManager.GetComputeUniformBufferAddress(0); - - int cbDescOffset = 0x260 + (cb.Slot - 8) * 0x10; - - cbDescAddress += (ulong)cbDescOffset; - - SbDescriptor cbDescriptor = _channel.MemoryManager.Physical.Read(cbDescAddress); - - _channel.BufferManager.SetComputeUniformBuffer(cb.Slot, cbDescriptor.PackAddress(), (uint)cbDescriptor.Size); - } - _channel.BufferManager.SetComputeBufferBindings(cs.Bindings); _channel.TextureManager.SetComputeBindings(cs.Bindings); diff --git a/src/Ryujinx.Graphics.Gpu/Memory/BufferManager.cs b/src/Ryujinx.Graphics.Gpu/Memory/BufferManager.cs index e20e1bb6..48cb33b4 100644 --- a/src/Ryujinx.Graphics.Gpu/Memory/BufferManager.cs +++ b/src/Ryujinx.Graphics.Gpu/Memory/BufferManager.cs @@ -222,7 +222,7 @@ namespace Ryujinx.Graphics.Gpu.Memory /// Start GPU virtual address of the buffer private void RecordStorageAlignment(BuffersPerStage buffers, int index, ulong gpuVa) { - bool unaligned = (gpuVa & (Constants.StorageAlignment - 1)) != 0; + bool unaligned = (gpuVa & ((ulong)_context.Capabilities.StorageBufferOffsetAlignment - 1)) != 0; if (unaligned || HasUnalignedStorageBuffers) { diff --git a/src/Ryujinx.Graphics.Gpu/Shader/DiskCache/DiskCacheHostStorage.cs b/src/Ryujinx.Graphics.Gpu/Shader/DiskCache/DiskCacheHostStorage.cs index 7f83f588..4b828080 100644 --- a/src/Ryujinx.Graphics.Gpu/Shader/DiskCache/DiskCacheHostStorage.cs +++ b/src/Ryujinx.Graphics.Gpu/Shader/DiskCache/DiskCacheHostStorage.cs @@ -22,7 +22,7 @@ namespace Ryujinx.Graphics.Gpu.Shader.DiskCache private const ushort FileFormatVersionMajor = 1; private const ushort FileFormatVersionMinor = 2; private const uint FileFormatVersionPacked = ((uint)FileFormatVersionMajor << 16) | FileFormatVersionMinor; - private const uint CodeGenVersion = 5027; + private const uint CodeGenVersion = 4992; private const string SharedTocFileName = "shared.toc"; private const string SharedDataFileName = "shared.data"; diff --git a/src/Ryujinx.Graphics.Gpu/Shader/ShaderInfoBuilder.cs b/src/Ryujinx.Graphics.Gpu/Shader/ShaderInfoBuilder.cs index 39b31cf6..3fc32d71 100644 --- a/src/Ryujinx.Graphics.Gpu/Shader/ShaderInfoBuilder.cs +++ b/src/Ryujinx.Graphics.Gpu/Shader/ShaderInfoBuilder.cs @@ -92,7 +92,7 @@ namespace Ryujinx.Graphics.Gpu.Shader int imageBinding = stageIndex * imagesPerStage * 2; AddDescriptor(stages, ResourceType.UniformBuffer, UniformSetIndex, uniformBinding, uniformsPerStage); - AddArrayDescriptor(stages, ResourceType.StorageBuffer, StorageSetIndex, storageBinding, storagesPerStage); + AddDescriptor(stages, ResourceType.StorageBuffer, StorageSetIndex, storageBinding, storagesPerStage); AddDualDescriptor(stages, ResourceType.TextureAndSampler, ResourceType.BufferTexture, TextureSetIndex, textureBinding, texturesPerStage); AddDualDescriptor(stages, ResourceType.Image, ResourceType.BufferImage, ImageSetIndex, imageBinding, imagesPerStage); @@ -133,19 +133,6 @@ namespace Ryujinx.Graphics.Gpu.Shader AddDescriptor(stages, type2, setIndex, binding + count, count); } - /// - /// Adds an array resource to the list of descriptors. - /// - /// Shader stages where the resource is used - /// Type of the resource - /// Descriptor set number where the resource will be bound - /// Binding number where the resource will be bound - /// Number of resources bound at the binding location - private void AddArrayDescriptor(ResourceStages stages, ResourceType type, int setIndex, int binding, int count) - { - _resourceDescriptors[setIndex].Add(new ResourceDescriptor(binding, count, type, stages)); - } - /// /// Adds buffer usage information to the list of usages. /// diff --git a/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/Declarations.cs b/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/Declarations.cs index 1bd0182b..958f1cef 100644 --- a/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/Declarations.cs +++ b/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/Declarations.cs @@ -104,14 +104,7 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl } DeclareConstantBuffers(context, context.Config.Properties.ConstantBuffers.Values); - - var sBufferDescriptors = context.Config.GetStorageBufferDescriptors(); - if (sBufferDescriptors.Length != 0) - { - DeclareStorages(context, sBufferDescriptors); - - context.AppendLine(); - } + DeclareStorageBuffers(context, context.Config.Properties.StorageBuffers.Values); var textureDescriptors = context.Config.GetTextureDescriptors(); if (textureDescriptors.Length != 0) @@ -250,11 +243,6 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl AppendHelperFunction(context, "Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/AtomicMinMaxS32Shared.glsl"); } - if ((info.HelperFunctionsMask & HelperFunctionsMask.AtomicMinMaxS32Storage) != 0) - { - AppendHelperFunction(context, "Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/AtomicMinMaxS32Storage.glsl"); - } - if ((info.HelperFunctionsMask & HelperFunctionsMask.MultiplyHighS32) != 0) { AppendHelperFunction(context, "Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/MultiplyHighS32.glsl"); @@ -290,11 +278,6 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl AppendHelperFunction(context, "Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/StoreSharedSmallInt.glsl"); } - if ((info.HelperFunctionsMask & HelperFunctionsMask.StoreStorageSmallInt) != 0) - { - AppendHelperFunction(context, "Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/StoreStorageSmallInt.glsl"); - } - if ((info.HelperFunctionsMask & HelperFunctionsMask.SwizzleAdd) != 0) { AppendHelperFunction(context, "Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/SwizzleAdd.glsl"); @@ -356,6 +339,16 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl } private static void DeclareConstantBuffers(CodeGenContext context, IEnumerable buffers) + { + DeclareBuffers(context, buffers, "uniform"); + } + + private static void DeclareStorageBuffers(CodeGenContext context, IEnumerable buffers) + { + DeclareBuffers(context, buffers, "buffer"); + } + + private static void DeclareBuffers(CodeGenContext context, IEnumerable buffers, string declType) { foreach (BufferDefinition buffer in buffers) { @@ -365,7 +358,7 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl _ => "std430" }; - context.AppendLine($"layout (binding = {buffer.Binding}, {layout}) uniform _{buffer.Name}"); + context.AppendLine($"layout (binding = {buffer.Binding}, {layout}) {declType} _{buffer.Name}"); context.EnterScope(); foreach (StructureField field in buffer.Type.Fields) @@ -373,9 +366,17 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl if (field.Type.HasFlag(AggregateType.Array)) { string typeName = GetVarTypeName(context, field.Type & ~AggregateType.Array); - string arraySize = field.ArrayLength.ToString(CultureInfo.InvariantCulture); - context.AppendLine($"{typeName} {field.Name}[{arraySize}];"); + if (field.ArrayLength > 0) + { + string arraySize = field.ArrayLength.ToString(CultureInfo.InvariantCulture); + + context.AppendLine($"{typeName} {field.Name}[{arraySize}];"); + } + else + { + context.AppendLine($"{typeName} {field.Name}[];"); + } } else { @@ -390,22 +391,6 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl } } - private static void DeclareStorages(CodeGenContext context, BufferDescriptor[] descriptors) - { - string sbName = OperandManager.GetShaderStagePrefix(context.Config.Stage); - - sbName += "_" + DefaultNames.StorageNamePrefix; - - string blockName = $"{sbName}_{DefaultNames.BlockSuffix}"; - - string layout = context.Config.Options.TargetApi == TargetApi.Vulkan ? ", set = 1" : string.Empty; - - context.AppendLine($"layout (binding = {context.Config.FirstStorageBufferBinding}{layout}, std430) buffer {blockName}"); - context.EnterScope(); - context.AppendLine("uint " + DefaultNames.DataName + "[];"); - context.LeaveScope($" {sbName}[{NumberFormatter.FormatInt(descriptors.Max(x => x.Slot) + 1)}];"); - } - private static void DeclareSamplers(CodeGenContext context, TextureDescriptor[] descriptors) { int arraySize = 0; @@ -733,7 +718,6 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl code = code.Replace("\t", CodeGenContext.Tab); code = code.Replace("$SHARED_MEM$", DefaultNames.SharedMemoryName); - code = code.Replace("$STORAGE_MEM$", OperandManager.GetShaderStagePrefix(context.Config.Stage) + "_" + DefaultNames.StorageNamePrefix); if (context.Config.GpuAccessor.QueryHostSupportsShaderBallot()) { diff --git a/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/DefaultNames.cs b/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/DefaultNames.cs index fc3004a8..5ee8259c 100644 --- a/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/DefaultNames.cs +++ b/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/DefaultNames.cs @@ -11,12 +11,6 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl public const string IAttributePrefix = "in_attr"; public const string OAttributePrefix = "out_attr"; - public const string StorageNamePrefix = "s"; - - public const string DataName = "data"; - - public const string BlockSuffix = "block"; - public const string LocalMemoryName = "local_mem"; public const string SharedMemoryName = "shared_mem"; diff --git a/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/AtomicMinMaxS32Storage.glsl b/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/AtomicMinMaxS32Storage.glsl deleted file mode 100644 index 0862a71b..00000000 --- a/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/AtomicMinMaxS32Storage.glsl +++ /dev/null @@ -1,21 +0,0 @@ -int Helper_AtomicMaxS32(int index, int offset, int value) -{ - uint oldValue, newValue; - do - { - oldValue = $STORAGE_MEM$[index].data[offset]; - newValue = uint(max(int(oldValue), value)); - } while (atomicCompSwap($STORAGE_MEM$[index].data[offset], oldValue, newValue) != oldValue); - return int(oldValue); -} - -int Helper_AtomicMinS32(int index, int offset, int value) -{ - uint oldValue, newValue; - do - { - oldValue = $STORAGE_MEM$[index].data[offset]; - newValue = uint(min(int(oldValue), value)); - } while (atomicCompSwap($STORAGE_MEM$[index].data[offset], oldValue, newValue) != oldValue); - return int(oldValue); -} \ No newline at end of file diff --git a/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/StoreStorageSmallInt.glsl b/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/StoreStorageSmallInt.glsl deleted file mode 100644 index f2253a79..00000000 --- a/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/StoreStorageSmallInt.glsl +++ /dev/null @@ -1,23 +0,0 @@ -void Helper_StoreStorage16(int index, int offset, uint value) -{ - int wordOffset = offset >> 2; - int bitOffset = (offset & 3) * 8; - uint oldValue, newValue; - do - { - oldValue = $STORAGE_MEM$[index].data[wordOffset]; - newValue = bitfieldInsert(oldValue, value, bitOffset, 16); - } while (atomicCompSwap($STORAGE_MEM$[index].data[wordOffset], oldValue, newValue) != oldValue); -} - -void Helper_StoreStorage8(int index, int offset, uint value) -{ - int wordOffset = offset >> 2; - int bitOffset = (offset & 3) * 8; - uint oldValue, newValue; - do - { - oldValue = $STORAGE_MEM$[index].data[wordOffset]; - newValue = bitfieldInsert(oldValue, value, bitOffset, 8); - } while (atomicCompSwap($STORAGE_MEM$[index].data[wordOffset], oldValue, newValue) != oldValue); -} \ No newline at end of file diff --git a/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/InstGen.cs b/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/InstGen.cs index 24ea66d0..01d8a6e7 100644 --- a/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/InstGen.cs +++ b/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/InstGen.cs @@ -68,33 +68,45 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions string args = string.Empty; - for (int argIndex = 0; argIndex < arity; argIndex++) + if (atomic && operation.StorageKind == StorageKind.StorageBuffer) { + args = GenerateLoadOrStore(context, operation, isStore: false); + + AggregateType dstType = operation.Inst == Instruction.AtomicMaxS32 || operation.Inst == Instruction.AtomicMinS32 + ? AggregateType.S32 + : AggregateType.U32; + + for (int argIndex = operation.SourcesCount - arity + 2; argIndex < operation.SourcesCount; argIndex++) + { + args += ", " + GetSoureExpr(context, operation.GetSource(argIndex), dstType); + } + } + else if (atomic && operation.StorageKind == StorageKind.SharedMemory) + { + args = LoadShared(context, operation); + // For shared memory access, the second argument is unused and should be ignored. // It is there to make both storage and shared access have the same number of arguments. // For storage, both inputs are consumed when the argument index is 0, so we should skip it here. - if (argIndex == 1 && (atomic || operation.StorageKind == StorageKind.SharedMemory)) - { - continue; - } - if (argIndex != 0) + for (int argIndex = 2; argIndex < arity; argIndex++) { args += ", "; - } - if (argIndex == 0 && atomic) + AggregateType dstType = GetSrcVarType(inst, argIndex); + + args += GetSoureExpr(context, operation.GetSource(argIndex), dstType); + } + } + else + { + for (int argIndex = 0; argIndex < arity; argIndex++) { - switch (operation.StorageKind) + if (argIndex != 0) { - case StorageKind.SharedMemory: args += LoadShared(context, operation); break; - case StorageKind.StorageBuffer: args += LoadStorage(context, operation); break; - - default: throw new InvalidOperationException($"Invalid storage kind \"{operation.StorageKind}\"."); + args += ", "; } - } - else - { + AggregateType dstType = GetSrcVarType(inst, argIndex); args += GetSoureExpr(context, operation.GetSource(argIndex), dstType); @@ -173,9 +185,6 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions case Instruction.LoadShared: return LoadShared(context, operation); - case Instruction.LoadStorage: - return LoadStorage(context, operation); - case Instruction.Lod: return Lod(context, operation); @@ -203,15 +212,6 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions case Instruction.StoreShared8: return StoreShared8(context, operation); - case Instruction.StoreStorage: - return StoreStorage(context, operation); - - case Instruction.StoreStorage16: - return StoreStorage16(context, operation); - - case Instruction.StoreStorage8: - return StoreStorage8(context, operation); - case Instruction.TextureSample: return TextureSample(context, operation); diff --git a/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/InstGenHelper.cs b/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/InstGenHelper.cs index 6cf36a2a..f42d9898 100644 --- a/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/InstGenHelper.cs +++ b/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/InstGenHelper.cs @@ -85,7 +85,6 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions Add(Instruction.Load, InstType.Special); Add(Instruction.LoadLocal, InstType.Special); Add(Instruction.LoadShared, InstType.Special); - Add(Instruction.LoadStorage, InstType.Special); Add(Instruction.Lod, InstType.Special); Add(Instruction.LogarithmB2, InstType.CallUnary, "log2"); Add(Instruction.LogicalAnd, InstType.OpBinaryCom, "&&", 9); @@ -123,9 +122,6 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions Add(Instruction.StoreShared, InstType.Special); Add(Instruction.StoreShared16, InstType.Special); Add(Instruction.StoreShared8, InstType.Special); - Add(Instruction.StoreStorage, InstType.Special); - Add(Instruction.StoreStorage16, InstType.Special); - Add(Instruction.StoreStorage8, InstType.Special); Add(Instruction.Subtract, InstType.OpBinary, "-", 2); Add(Instruction.SwizzleAdd, InstType.CallTernary, HelperFunctionNames.SwizzleAdd); Add(Instruction.TextureSample, InstType.Special); diff --git a/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/InstGenMemory.cs b/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/InstGenMemory.cs index dfc8197b..c8084d9d 100644 --- a/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/InstGenMemory.cs +++ b/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/InstGenMemory.cs @@ -210,17 +210,6 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions return $"{arrayName}[{offsetExpr}]"; } - public static string LoadStorage(CodeGenContext context, AstOperation operation) - { - IAstNode src1 = operation.GetSource(0); - IAstNode src2 = operation.GetSource(1); - - string indexExpr = GetSoureExpr(context, src1, GetSrcVarType(operation.Inst, 0)); - string offsetExpr = GetSoureExpr(context, src2, GetSrcVarType(operation.Inst, 1)); - - return GetStorageBufferAccessor(indexExpr, offsetExpr, context.Config.Stage); - } - public static string Lod(CodeGenContext context, AstOperation operation) { AstTextureOperation texOp = (AstTextureOperation)operation; @@ -326,60 +315,6 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions return $"{HelperFunctionNames.StoreShared8}({offsetExpr}, {src})"; } - public static string StoreStorage(CodeGenContext context, AstOperation operation) - { - IAstNode src1 = operation.GetSource(0); - IAstNode src2 = operation.GetSource(1); - IAstNode src3 = operation.GetSource(2); - - string indexExpr = GetSoureExpr(context, src1, GetSrcVarType(operation.Inst, 0)); - string offsetExpr = GetSoureExpr(context, src2, GetSrcVarType(operation.Inst, 1)); - - AggregateType srcType = OperandManager.GetNodeDestType(context, src3); - - string src = TypeConversion.ReinterpretCast(context, src3, srcType, AggregateType.U32); - - string sb = GetStorageBufferAccessor(indexExpr, offsetExpr, context.Config.Stage); - - return $"{sb} = {src}"; - } - - public static string StoreStorage16(CodeGenContext context, AstOperation operation) - { - IAstNode src1 = operation.GetSource(0); - IAstNode src2 = operation.GetSource(1); - IAstNode src3 = operation.GetSource(2); - - string indexExpr = GetSoureExpr(context, src1, GetSrcVarType(operation.Inst, 0)); - string offsetExpr = GetSoureExpr(context, src2, GetSrcVarType(operation.Inst, 1)); - - AggregateType srcType = OperandManager.GetNodeDestType(context, src3); - - string src = TypeConversion.ReinterpretCast(context, src3, srcType, AggregateType.U32); - - string sb = GetStorageBufferAccessor(indexExpr, offsetExpr, context.Config.Stage); - - return $"{HelperFunctionNames.StoreStorage16}({indexExpr}, {offsetExpr}, {src})"; - } - - public static string StoreStorage8(CodeGenContext context, AstOperation operation) - { - IAstNode src1 = operation.GetSource(0); - IAstNode src2 = operation.GetSource(1); - IAstNode src3 = operation.GetSource(2); - - string indexExpr = GetSoureExpr(context, src1, GetSrcVarType(operation.Inst, 0)); - string offsetExpr = GetSoureExpr(context, src2, GetSrcVarType(operation.Inst, 1)); - - AggregateType srcType = OperandManager.GetNodeDestType(context, src3); - - string src = TypeConversion.ReinterpretCast(context, src3, srcType, AggregateType.U32); - - string sb = GetStorageBufferAccessor(indexExpr, offsetExpr, context.Config.Stage); - - return $"{HelperFunctionNames.StoreStorage8}({indexExpr}, {offsetExpr}, {src})"; - } - public static string TextureSample(CodeGenContext context, AstOperation operation) { AstTextureOperation texOp = (AstTextureOperation)operation; @@ -701,25 +636,34 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions } } - private static string GenerateLoadOrStore(CodeGenContext context, AstOperation operation, bool isStore) + public static string GenerateLoadOrStore(CodeGenContext context, AstOperation operation, bool isStore) { StorageKind storageKind = operation.StorageKind; string varName; AggregateType varType; int srcIndex = 0; - int inputsCount = isStore ? operation.SourcesCount - 1 : operation.SourcesCount; + bool isStoreOrAtomic = operation.Inst == Instruction.Store || operation.Inst.IsAtomic(); + int inputsCount = isStoreOrAtomic ? operation.SourcesCount - 1 : operation.SourcesCount; + + if (operation.Inst == Instruction.AtomicCompareAndSwap) + { + inputsCount--; + } switch (storageKind) { case StorageKind.ConstantBuffer: + case StorageKind.StorageBuffer: if (!(operation.GetSource(srcIndex++) is AstOperand bindingIndex) || bindingIndex.Type != OperandType.Constant) { throw new InvalidOperationException($"First input of {operation.Inst} with {storageKind} storage must be a constant operand."); } int binding = bindingIndex.Value; - BufferDefinition buffer = context.Config.Properties.ConstantBuffers[binding]; + BufferDefinition buffer = storageKind == StorageKind.ConstantBuffer + ? context.Config.Properties.ConstantBuffers[binding] + : context.Config.Properties.StorageBuffers[binding]; if (!(operation.GetSource(srcIndex++) is AstOperand fieldIndex) || fieldIndex.Type != OperandType.Constant) { @@ -825,15 +769,6 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions return varName; } - private static string GetStorageBufferAccessor(string slotExpr, string offsetExpr, ShaderStage stage) - { - string sbName = OperandManager.GetShaderStagePrefix(stage); - - sbName += "_" + DefaultNames.StorageNamePrefix; - - return $"{sbName}[{slotExpr}].{DefaultNames.DataName}[{offsetExpr}]"; - } - private static string GetMask(int index) { return $".{"rgba".AsSpan(index, 1)}"; diff --git a/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/OperandManager.cs b/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/OperandManager.cs index e34e4e07..4fd1d17c 100644 --- a/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/OperandManager.cs +++ b/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/OperandManager.cs @@ -118,6 +118,7 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl switch (operation.StorageKind) { case StorageKind.ConstantBuffer: + case StorageKind.StorageBuffer: if (!(operation.GetSource(0) is AstOperand bindingIndex) || bindingIndex.Type != OperandType.Constant) { throw new InvalidOperationException($"First input of {operation.Inst} with {operation.StorageKind} storage must be a constant operand."); @@ -128,7 +129,9 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl throw new InvalidOperationException($"Second input of {operation.Inst} with {operation.StorageKind} storage must be a constant operand."); } - BufferDefinition buffer = context.Config.Properties.ConstantBuffers[bindingIndex.Value]; + BufferDefinition buffer = operation.StorageKind == StorageKind.ConstantBuffer + ? context.Config.Properties.ConstantBuffers[bindingIndex.Value] + : context.Config.Properties.StorageBuffers[bindingIndex.Value]; StructureField field = buffer.Type.Fields[fieldIndex.Value]; return field.Type & AggregateType.ElementTypeMask; diff --git a/src/Ryujinx.Graphics.Shader/CodeGen/Spirv/CodeGenContext.cs b/src/Ryujinx.Graphics.Shader/CodeGen/Spirv/CodeGenContext.cs index 7af6d316..c1bfa088 100644 --- a/src/Ryujinx.Graphics.Shader/CodeGen/Spirv/CodeGenContext.cs +++ b/src/Ryujinx.Graphics.Shader/CodeGen/Spirv/CodeGenContext.cs @@ -24,7 +24,7 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Spirv public int InputVertices { get; } public Dictionary ConstantBuffers { get; } = new Dictionary(); - public Instruction StorageBuffersArray { get; set; } + public Dictionary StorageBuffers { get; } = new Dictionary(); public Instruction LocalMemory { get; set; } public Instruction SharedMemory { get; set; } public Dictionary SamplersTypes { get; } = new Dictionary(); @@ -308,7 +308,14 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Spirv { if ((type & AggregateType.Array) != 0) { - return TypeArray(GetType(type & ~AggregateType.Array), Constant(TypeU32(), length)); + if (length > 0) + { + return TypeArray(GetType(type & ~AggregateType.Array), Constant(TypeU32(), length)); + } + else + { + return TypeRuntimeArray(GetType(type & ~AggregateType.Array)); + } } else if ((type & AggregateType.ElementCountMask) != 0) { diff --git a/src/Ryujinx.Graphics.Shader/CodeGen/Spirv/Declarations.cs b/src/Ryujinx.Graphics.Shader/CodeGen/Spirv/Declarations.cs index 7c242589..eb2db514 100644 --- a/src/Ryujinx.Graphics.Shader/CodeGen/Spirv/Declarations.cs +++ b/src/Ryujinx.Graphics.Shader/CodeGen/Spirv/Declarations.cs @@ -5,6 +5,7 @@ using Ryujinx.Graphics.Shader.Translation; using Spv.Generator; using System; using System.Collections.Generic; +using System.Diagnostics; using System.Linq; using System.Numerics; using static Spv.Specification; @@ -99,7 +100,7 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Spirv } DeclareConstantBuffers(context, context.Config.Properties.ConstantBuffers.Values); - DeclareStorageBuffers(context, context.Config.GetStorageBufferDescriptors()); + DeclareStorageBuffers(context, context.Config.Properties.StorageBuffers.Values); DeclareSamplers(context, context.Config.GetTextureDescriptors()); DeclareImages(context, context.Config.GetImageDescriptors()); DeclareInputsAndOutputs(context, info); @@ -127,6 +128,16 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Spirv } private static void DeclareConstantBuffers(CodeGenContext context, IEnumerable buffers) + { + DeclareBuffers(context, buffers, isBuffer: false); + } + + private static void DeclareStorageBuffers(CodeGenContext context, IEnumerable buffers) + { + DeclareBuffers(context, buffers, isBuffer: true); + } + + private static void DeclareBuffers(CodeGenContext context, IEnumerable buffers, bool isBuffer) { HashSet decoratedTypes = new HashSet(); @@ -155,6 +166,12 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Spirv context.Decorate(structFieldTypes[fieldIndex], Decoration.ArrayStride, (LiteralInteger)fieldSize); } + // Zero lengths are assumed to be a "runtime array" (which does not have a explicit length + // specified on the shader, and instead assumes the bound buffer length). + // It is only valid as the last struct element. + + Debug.Assert(field.ArrayLength > 0 || fieldIndex == buffer.Type.Fields.Length - 1); + offset += fieldSize * field.ArrayLength; } else @@ -163,56 +180,37 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Spirv } } - var ubStructType = context.TypeStruct(false, structFieldTypes); + var structType = context.TypeStruct(false, structFieldTypes); - if (decoratedTypes.Add(ubStructType)) + if (decoratedTypes.Add(structType)) { - context.Decorate(ubStructType, Decoration.Block); + context.Decorate(structType, isBuffer ? Decoration.BufferBlock : Decoration.Block); for (int fieldIndex = 0; fieldIndex < structFieldOffsets.Length; fieldIndex++) { - context.MemberDecorate(ubStructType, fieldIndex, Decoration.Offset, (LiteralInteger)structFieldOffsets[fieldIndex]); + context.MemberDecorate(structType, fieldIndex, Decoration.Offset, (LiteralInteger)structFieldOffsets[fieldIndex]); } } - var ubPointerType = context.TypePointer(StorageClass.Uniform, ubStructType); - var ubVariable = context.Variable(ubPointerType, StorageClass.Uniform); + var pointerType = context.TypePointer(StorageClass.Uniform, structType); + var variable = context.Variable(pointerType, StorageClass.Uniform); - context.Name(ubVariable, buffer.Name); - context.Decorate(ubVariable, Decoration.DescriptorSet, (LiteralInteger)buffer.Set); - context.Decorate(ubVariable, Decoration.Binding, (LiteralInteger)buffer.Binding); - context.AddGlobalVariable(ubVariable); - context.ConstantBuffers.Add(buffer.Binding, ubVariable); + context.Name(variable, buffer.Name); + context.Decorate(variable, Decoration.DescriptorSet, (LiteralInteger)buffer.Set); + context.Decorate(variable, Decoration.Binding, (LiteralInteger)buffer.Binding); + context.AddGlobalVariable(variable); + + if (isBuffer) + { + context.StorageBuffers.Add(buffer.Binding, variable); + } + else + { + context.ConstantBuffers.Add(buffer.Binding, variable); + } } } - private static void DeclareStorageBuffers(CodeGenContext context, BufferDescriptor[] descriptors) - { - if (descriptors.Length == 0) - { - return; - } - - int setIndex = context.Config.Options.TargetApi == TargetApi.Vulkan ? 1 : 0; - int count = descriptors.Max(x => x.Slot) + 1; - - var sbArrayType = context.TypeRuntimeArray(context.TypeU32()); - context.Decorate(sbArrayType, Decoration.ArrayStride, (LiteralInteger)4); - var sbStructType = context.TypeStruct(true, sbArrayType); - context.Decorate(sbStructType, Decoration.BufferBlock); - context.MemberDecorate(sbStructType, 0, Decoration.Offset, (LiteralInteger)0); - var sbStructArrayType = context.TypeArray(sbStructType, context.Constant(context.TypeU32(), count)); - var sbPointerType = context.TypePointer(StorageClass.Uniform, sbStructArrayType); - var sbVariable = context.Variable(sbPointerType, StorageClass.Uniform); - - context.Name(sbVariable, $"{GetStagePrefix(context.Config.Stage)}_s"); - context.Decorate(sbVariable, Decoration.DescriptorSet, (LiteralInteger)setIndex); - context.Decorate(sbVariable, Decoration.Binding, (LiteralInteger)context.Config.FirstStorageBufferBinding); - context.AddGlobalVariable(sbVariable); - - context.StorageBuffersArray = sbVariable; - } - private static void DeclareSamplers(CodeGenContext context, TextureDescriptor[] descriptors) { foreach (var descriptor in descriptors) diff --git a/src/Ryujinx.Graphics.Shader/CodeGen/Spirv/Instructions.cs b/src/Ryujinx.Graphics.Shader/CodeGen/Spirv/Instructions.cs index f088a47f..4be0c62b 100644 --- a/src/Ryujinx.Graphics.Shader/CodeGen/Spirv/Instructions.cs +++ b/src/Ryujinx.Graphics.Shader/CodeGen/Spirv/Instructions.cs @@ -99,7 +99,6 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Spirv Add(Instruction.Load, GenerateLoad); Add(Instruction.LoadLocal, GenerateLoadLocal); Add(Instruction.LoadShared, GenerateLoadShared); - Add(Instruction.LoadStorage, GenerateLoadStorage); Add(Instruction.Lod, GenerateLod); Add(Instruction.LogarithmB2, GenerateLogarithmB2); Add(Instruction.LogicalAnd, GenerateLogicalAnd); @@ -137,9 +136,6 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Spirv Add(Instruction.StoreShared, GenerateStoreShared); Add(Instruction.StoreShared16, GenerateStoreShared16); Add(Instruction.StoreShared8, GenerateStoreShared8); - Add(Instruction.StoreStorage, GenerateStoreStorage); - Add(Instruction.StoreStorage16, GenerateStoreStorage16); - Add(Instruction.StoreStorage8, GenerateStoreStorage8); Add(Instruction.Subtract, GenerateSubtract); Add(Instruction.SwizzleAdd, GenerateSwizzleAdd); Add(Instruction.TextureSample, GenerateTextureSample); @@ -889,14 +885,6 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Spirv return new OperationResult(AggregateType.U32, value); } - private static OperationResult GenerateLoadStorage(CodeGenContext context, AstOperation operation) - { - var elemPointer = GetStorageElemPointer(context, operation); - var value = context.Load(context.TypeU32(), elemPointer); - - return new OperationResult(AggregateType.U32, value); - } - private static OperationResult GenerateLod(CodeGenContext context, AstOperation operation) { AstTextureOperation texOp = (AstTextureOperation)operation; @@ -1307,28 +1295,6 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Spirv return OperationResult.Invalid; } - private static OperationResult GenerateStoreStorage(CodeGenContext context, AstOperation operation) - { - var elemPointer = GetStorageElemPointer(context, operation); - context.Store(elemPointer, context.Get(AggregateType.U32, operation.GetSource(2))); - - return OperationResult.Invalid; - } - - private static OperationResult GenerateStoreStorage16(CodeGenContext context, AstOperation operation) - { - GenerateStoreStorageSmallInt(context, operation, 16); - - return OperationResult.Invalid; - } - - private static OperationResult GenerateStoreStorage8(CodeGenContext context, AstOperation operation) - { - GenerateStoreStorageSmallInt(context, operation, 8); - - return OperationResult.Invalid; - } - private static OperationResult GenerateSubtract(CodeGenContext context, AstOperation operation) { return GenerateBinary(context, operation, context.Delegates.FSub, context.Delegates.ISub); @@ -1849,13 +1815,13 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Spirv AstOperation operation, Func emitU) { - var value = context.GetU32(operation.GetSource(2)); + var value = context.GetU32(operation.GetSource(operation.SourcesCount - 1)); SpvInstruction elemPointer; if (operation.StorageKind == StorageKind.StorageBuffer) { - elemPointer = GetStorageElemPointer(context, operation); + elemPointer = GetStoragePointer(context, operation, out _); } else if (operation.StorageKind == StorageKind.SharedMemory) { @@ -1875,14 +1841,14 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Spirv private static OperationResult GenerateAtomicMemoryCas(CodeGenContext context, AstOperation operation) { - var value0 = context.GetU32(operation.GetSource(2)); - var value1 = context.GetU32(operation.GetSource(3)); + var value0 = context.GetU32(operation.GetSource(operation.SourcesCount - 2)); + var value1 = context.GetU32(operation.GetSource(operation.SourcesCount - 1)); SpvInstruction elemPointer; if (operation.StorageKind == StorageKind.StorageBuffer) { - elemPointer = GetStorageElemPointer(context, operation); + elemPointer = GetStoragePointer(context, operation, out _); } else if (operation.StorageKind == StorageKind.SharedMemory) { @@ -1901,17 +1867,33 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Spirv } private static OperationResult GenerateLoadOrStore(CodeGenContext context, AstOperation operation, bool isStore) + { + SpvInstruction pointer = GetStoragePointer(context, operation, out AggregateType varType); + + if (isStore) + { + context.Store(pointer, context.Get(varType, operation.GetSource(operation.SourcesCount - 1))); + return OperationResult.Invalid; + } + else + { + var result = context.Load(context.GetType(varType), pointer); + return new OperationResult(varType, result); + } + } + + private static SpvInstruction GetStoragePointer(CodeGenContext context, AstOperation operation, out AggregateType varType) { StorageKind storageKind = operation.StorageKind; StorageClass storageClass; SpvInstruction baseObj; - AggregateType varType; int srcIndex = 0; switch (storageKind) { case StorageKind.ConstantBuffer: + case StorageKind.StorageBuffer: if (!(operation.GetSource(srcIndex++) is AstOperand bindingIndex) || bindingIndex.Type != OperandType.Constant) { throw new InvalidOperationException($"First input of {operation.Inst} with {storageKind} storage must be a constant operand."); @@ -1922,12 +1904,16 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Spirv throw new InvalidOperationException($"Second input of {operation.Inst} with {storageKind} storage must be a constant operand."); } - BufferDefinition buffer = context.Config.Properties.ConstantBuffers[bindingIndex.Value]; + BufferDefinition buffer = storageKind == StorageKind.ConstantBuffer + ? context.Config.Properties.ConstantBuffers[bindingIndex.Value] + : context.Config.Properties.StorageBuffers[bindingIndex.Value]; StructureField field = buffer.Type.Fields[fieldIndex.Value]; storageClass = StorageClass.Uniform; varType = field.Type & AggregateType.ElementTypeMask; - baseObj = context.ConstantBuffers[bindingIndex.Value]; + baseObj = storageKind == StorageKind.ConstantBuffer + ? context.ConstantBuffers[bindingIndex.Value] + : context.StorageBuffers[bindingIndex.Value]; break; case StorageKind.Input: @@ -1993,7 +1979,14 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Spirv throw new InvalidOperationException($"Invalid storage kind {storageKind}."); } - int inputsCount = (isStore ? operation.SourcesCount - 1 : operation.SourcesCount) - srcIndex; + bool isStoreOrAtomic = operation.Inst == Instruction.Store || operation.Inst.IsAtomic(); + int inputsCount = (isStoreOrAtomic ? operation.SourcesCount - 1 : operation.SourcesCount) - srcIndex; + + if (operation.Inst == Instruction.AtomicCompareAndSwap) + { + inputsCount--; + } + SpvInstruction e0, e1, e2; SpvInstruction pointer; @@ -2030,16 +2023,7 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Spirv break; } - if (isStore) - { - context.Store(pointer, context.Get(varType, operation.GetSource(srcIndex))); - return OperationResult.Invalid; - } - else - { - var result = context.Load(context.GetType(varType), pointer); - return new OperationResult(varType, result); - } + return pointer; } private static SpvInstruction GetScalarInput(CodeGenContext context, IoVariable ioVariable) @@ -2068,25 +2052,6 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Spirv GenerateStoreSmallInt(context, elemPointer, bitOffset, value, bitSize); } - private static void GenerateStoreStorageSmallInt(CodeGenContext context, AstOperation operation, int bitSize) - { - var i0 = context.Get(AggregateType.S32, operation.GetSource(0)); - var offset = context.Get(AggregateType.U32, operation.GetSource(1)); - var value = context.Get(AggregateType.U32, operation.GetSource(2)); - - var wordOffset = context.ShiftRightLogical(context.TypeU32(), offset, context.Constant(context.TypeU32(), 2)); - var bitOffset = context.BitwiseAnd(context.TypeU32(), offset, context.Constant(context.TypeU32(), 3)); - bitOffset = context.ShiftLeftLogical(context.TypeU32(), bitOffset, context.Constant(context.TypeU32(), 3)); - - var sbVariable = context.StorageBuffersArray; - - var i1 = context.Constant(context.TypeS32(), 0); - - var elemPointer = context.AccessChain(context.TypePointer(StorageClass.Uniform, context.TypeU32()), sbVariable, i0, i1, wordOffset); - - GenerateStoreSmallInt(context, elemPointer, bitOffset, value, bitSize); - } - private static void GenerateStoreSmallInt( CodeGenContext context, SpvInstruction elemPointer, @@ -2173,16 +2138,6 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Spirv } } - private static SpvInstruction GetStorageElemPointer(CodeGenContext context, AstOperation operation) - { - var sbVariable = context.StorageBuffersArray; - var i0 = context.Get(AggregateType.S32, operation.GetSource(0)); - var i1 = context.Constant(context.TypeS32(), 0); - var i2 = context.Get(AggregateType.S32, operation.GetSource(1)); - - return context.AccessChain(context.TypePointer(StorageClass.Uniform, context.TypeU32()), sbVariable, i0, i1, i2); - } - private static OperationResult GenerateUnary( CodeGenContext context, AstOperation operation, diff --git a/src/Ryujinx.Graphics.Shader/Constants.cs b/src/Ryujinx.Graphics.Shader/Constants.cs index c6f9ef49..7f1445ed 100644 --- a/src/Ryujinx.Graphics.Shader/Constants.cs +++ b/src/Ryujinx.Graphics.Shader/Constants.cs @@ -10,7 +10,5 @@ namespace Ryujinx.Graphics.Shader public const int NvnBaseVertexByteOffset = 0x640; public const int NvnBaseInstanceByteOffset = 0x644; public const int NvnDrawIndexByteOffset = 0x648; - - public const int StorageAlignment = 16; } } \ No newline at end of file diff --git a/src/Ryujinx.Graphics.Shader/IGpuAccessor.cs b/src/Ryujinx.Graphics.Shader/IGpuAccessor.cs index 3be5088e..473964de 100644 --- a/src/Ryujinx.Graphics.Shader/IGpuAccessor.cs +++ b/src/Ryujinx.Graphics.Shader/IGpuAccessor.cs @@ -79,7 +79,7 @@ namespace Ryujinx.Graphics.Shader /// Binding number int QueryBindingConstantBuffer(int index) { - return index; + return index + 1; } /// diff --git a/src/Ryujinx.Graphics.Shader/Instructions/InstEmitFlowControl.cs b/src/Ryujinx.Graphics.Shader/Instructions/InstEmitFlowControl.cs index 91c23230..73696355 100644 --- a/src/Ryujinx.Graphics.Shader/Instructions/InstEmitFlowControl.cs +++ b/src/Ryujinx.Graphics.Shader/Instructions/InstEmitFlowControl.cs @@ -164,6 +164,7 @@ namespace Ryujinx.Graphics.Shader.Instructions if (op.Ccc == Ccc.T) { + context.PrepareForReturn(); context.Return(); } else @@ -175,6 +176,7 @@ namespace Ryujinx.Graphics.Shader.Instructions { Operand lblSkip = Label(); context.BranchIfFalse(lblSkip, cond); + context.PrepareForReturn(); context.Return(); context.MarkLabel(lblSkip); } diff --git a/src/Ryujinx.Graphics.Shader/Instructions/InstEmitMemory.cs b/src/Ryujinx.Graphics.Shader/Instructions/InstEmitMemory.cs index 6f5913eb..9aa73820 100644 --- a/src/Ryujinx.Graphics.Shader/Instructions/InstEmitMemory.cs +++ b/src/Ryujinx.Graphics.Shader/Instructions/InstEmitMemory.cs @@ -336,13 +336,12 @@ namespace Ryujinx.Graphics.Shader.Instructions int offset, bool extended) { - bool isSmallInt = size < LsSize.B32; - int count = GetVectorCount(size); + StorageKind storageKind = GetStorageKind(size); - (Operand addrLow, Operand addrHigh) = Get40BitsAddress(context, new Register(ra, RegisterType.Gpr), extended, offset); + (_, Operand addrHigh) = Get40BitsAddress(context, new Register(ra, RegisterType.Gpr), extended, offset); - Operand bitOffset = GetBitOffset(context, addrLow); + Operand srcA = context.Copy(new Operand(new Register(ra, RegisterType.Gpr))); for (int index = 0; index < count; index++) { @@ -353,12 +352,7 @@ namespace Ryujinx.Graphics.Shader.Instructions break; } - Operand value = context.LoadGlobal(context.IAdd(addrLow, Const(index * 4)), addrHigh); - - if (isSmallInt) - { - value = ExtractSmallInt(context, size, bitOffset, value); - } + Operand value = context.Load(storageKind, context.IAdd(srcA, Const(offset + index * 4)), addrHigh); context.Copy(Register(dest), value); } @@ -445,10 +439,11 @@ namespace Ryujinx.Graphics.Shader.Instructions } int count = GetVectorCount((LsSize)size); + StorageKind storageKind = GetStorageKind((LsSize)size); - (Operand addrLow, Operand addrHigh) = Get40BitsAddress(context, new Register(ra, RegisterType.Gpr), extended, offset); + (_, Operand addrHigh) = Get40BitsAddress(context, new Register(ra, RegisterType.Gpr), extended, offset); - Operand bitOffset = GetBitOffset(context, addrLow); + Operand srcA = context.Copy(new Operand(new Register(ra, RegisterType.Gpr))); for (int index = 0; index < count; index++) { @@ -456,23 +451,24 @@ namespace Ryujinx.Graphics.Shader.Instructions Operand value = Register(isRz ? rd : rd + index, RegisterType.Gpr); - Operand addrLowOffset = context.IAdd(addrLow, Const(index * 4)); + Operand addrLowOffset = context.IAdd(srcA, Const(offset + index * 4)); - if (size == LsSize2.U8 || size == LsSize2.S8) - { - context.StoreGlobal8(addrLowOffset, addrHigh, value); - } - else if (size == LsSize2.U16 || size == LsSize2.S16) - { - context.StoreGlobal16(addrLowOffset, addrHigh, value); - } - else - { - context.StoreGlobal(addrLowOffset, addrHigh, value); - } + context.Store(storageKind, addrLowOffset, addrHigh, value); } } + private static StorageKind GetStorageKind(LsSize size) + { + return size switch + { + LsSize.U8 => StorageKind.GlobalMemoryU8, + LsSize.S8 => StorageKind.GlobalMemoryS8, + LsSize.U16 => StorageKind.GlobalMemoryU16, + LsSize.S16 => StorageKind.GlobalMemoryS16, + _ => StorageKind.GlobalMemory + }; + } + private static int GetVectorCount(LsSize size) { switch (size) diff --git a/src/Ryujinx.Graphics.Shader/IntermediateRepresentation/Instruction.cs b/src/Ryujinx.Graphics.Shader/IntermediateRepresentation/Instruction.cs index f7afe507..aecb6724 100644 --- a/src/Ryujinx.Graphics.Shader/IntermediateRepresentation/Instruction.cs +++ b/src/Ryujinx.Graphics.Shader/IntermediateRepresentation/Instruction.cs @@ -79,10 +79,8 @@ namespace Ryujinx.Graphics.Shader.IntermediateRepresentation ImageAtomic, IsNan, Load, - LoadGlobal, LoadLocal, LoadShared, - LoadStorage, Lod, LogarithmB2, LogicalAnd, @@ -117,16 +115,10 @@ namespace Ryujinx.Graphics.Shader.IntermediateRepresentation Sine, SquareRoot, Store, - StoreGlobal, - StoreGlobal16, - StoreGlobal8, StoreLocal, StoreShared, StoreShared16, StoreShared8, - StoreStorage, - StoreStorage16, - StoreStorage8, Subtract, SwizzleAdd, TextureSample, diff --git a/src/Ryujinx.Graphics.Shader/IntermediateRepresentation/StorageKind.cs b/src/Ryujinx.Graphics.Shader/IntermediateRepresentation/StorageKind.cs index 59357443..2b5dd1de 100644 --- a/src/Ryujinx.Graphics.Shader/IntermediateRepresentation/StorageKind.cs +++ b/src/Ryujinx.Graphics.Shader/IntermediateRepresentation/StorageKind.cs @@ -11,7 +11,12 @@ namespace Ryujinx.Graphics.Shader.IntermediateRepresentation StorageBuffer, LocalMemory, SharedMemory, - GlobalMemory + GlobalMemory, + // TODO: Remove those and store type as a field on the Operation class itself. + GlobalMemoryS8, + GlobalMemoryS16, + GlobalMemoryU8, + GlobalMemoryU16 } static class StorageKindExtensions diff --git a/src/Ryujinx.Graphics.Shader/Ryujinx.Graphics.Shader.csproj b/src/Ryujinx.Graphics.Shader/Ryujinx.Graphics.Shader.csproj index 2efcbca4..86de2e75 100644 --- a/src/Ryujinx.Graphics.Shader/Ryujinx.Graphics.Shader.csproj +++ b/src/Ryujinx.Graphics.Shader/Ryujinx.Graphics.Shader.csproj @@ -11,7 +11,6 @@ - @@ -19,7 +18,6 @@ - diff --git a/src/Ryujinx.Graphics.Shader/StructuredIr/HelperFunctionsMask.cs b/src/Ryujinx.Graphics.Shader/StructuredIr/HelperFunctionsMask.cs index d45f8d4e..c348b5d9 100644 --- a/src/Ryujinx.Graphics.Shader/StructuredIr/HelperFunctionsMask.cs +++ b/src/Ryujinx.Graphics.Shader/StructuredIr/HelperFunctionsMask.cs @@ -6,7 +6,6 @@ namespace Ryujinx.Graphics.Shader.StructuredIr enum HelperFunctionsMask { AtomicMinMaxS32Shared = 1 << 0, - AtomicMinMaxS32Storage = 1 << 1, MultiplyHighS32 = 1 << 2, MultiplyHighU32 = 1 << 3, Shuffle = 1 << 4, @@ -14,7 +13,6 @@ namespace Ryujinx.Graphics.Shader.StructuredIr ShuffleUp = 1 << 6, ShuffleXor = 1 << 7, StoreSharedSmallInt = 1 << 8, - StoreStorageSmallInt = 1 << 9, SwizzleAdd = 1 << 10, FSI = 1 << 11 } diff --git a/src/Ryujinx.Graphics.Shader/StructuredIr/InstructionInfo.cs b/src/Ryujinx.Graphics.Shader/StructuredIr/InstructionInfo.cs index 44f0fad9..6e201350 100644 --- a/src/Ryujinx.Graphics.Shader/StructuredIr/InstructionInfo.cs +++ b/src/Ryujinx.Graphics.Shader/StructuredIr/InstructionInfo.cs @@ -90,10 +90,8 @@ namespace Ryujinx.Graphics.Shader.StructuredIr Add(Instruction.ImageAtomic, AggregateType.S32); Add(Instruction.IsNan, AggregateType.Bool, AggregateType.Scalar); Add(Instruction.Load, AggregateType.FP32); - Add(Instruction.LoadGlobal, AggregateType.U32, AggregateType.S32, AggregateType.S32); Add(Instruction.LoadLocal, AggregateType.U32, AggregateType.S32); Add(Instruction.LoadShared, AggregateType.U32, AggregateType.S32); - Add(Instruction.LoadStorage, AggregateType.U32, AggregateType.S32, AggregateType.S32); Add(Instruction.Lod, AggregateType.FP32); Add(Instruction.LogarithmB2, AggregateType.Scalar, AggregateType.Scalar); Add(Instruction.LogicalAnd, AggregateType.Bool, AggregateType.Bool, AggregateType.Bool); @@ -123,14 +121,10 @@ namespace Ryujinx.Graphics.Shader.StructuredIr Add(Instruction.Sine, AggregateType.Scalar, AggregateType.Scalar); Add(Instruction.SquareRoot, AggregateType.Scalar, AggregateType.Scalar); Add(Instruction.Store, AggregateType.Void); - Add(Instruction.StoreGlobal, AggregateType.Void, AggregateType.S32, AggregateType.S32, AggregateType.U32); Add(Instruction.StoreLocal, AggregateType.Void, AggregateType.S32, AggregateType.U32); Add(Instruction.StoreShared, AggregateType.Void, AggregateType.S32, AggregateType.U32); Add(Instruction.StoreShared16, AggregateType.Void, AggregateType.S32, AggregateType.U32); Add(Instruction.StoreShared8, AggregateType.Void, AggregateType.S32, AggregateType.U32); - Add(Instruction.StoreStorage, AggregateType.Void, AggregateType.S32, AggregateType.S32, AggregateType.U32); - Add(Instruction.StoreStorage16, AggregateType.Void, AggregateType.S32, AggregateType.S32, AggregateType.U32); - Add(Instruction.StoreStorage8, AggregateType.Void, AggregateType.S32, AggregateType.S32, AggregateType.U32); Add(Instruction.Subtract, AggregateType.Scalar, AggregateType.Scalar, AggregateType.Scalar); Add(Instruction.SwizzleAdd, AggregateType.FP32, AggregateType.FP32, AggregateType.FP32, AggregateType.S32); Add(Instruction.TextureSample, AggregateType.FP32); @@ -166,7 +160,7 @@ namespace Ryujinx.Graphics.Shader.StructuredIr { return AggregateType.FP32; } - else if (inst == Instruction.Call || inst == Instruction.Load || inst == Instruction.Store) + else if (inst == Instruction.Call || inst == Instruction.Load || inst == Instruction.Store || inst.IsAtomic()) { return AggregateType.S32; } diff --git a/src/Ryujinx.Graphics.Shader/StructuredIr/ShaderProperties.cs b/src/Ryujinx.Graphics.Shader/StructuredIr/ShaderProperties.cs index 061c89ed..157c5937 100644 --- a/src/Ryujinx.Graphics.Shader/StructuredIr/ShaderProperties.cs +++ b/src/Ryujinx.Graphics.Shader/StructuredIr/ShaderProperties.cs @@ -5,17 +5,25 @@ namespace Ryujinx.Graphics.Shader.StructuredIr class ShaderProperties { private readonly Dictionary _constantBuffers; + private readonly Dictionary _storageBuffers; public IReadOnlyDictionary ConstantBuffers => _constantBuffers; + public IReadOnlyDictionary StorageBuffers => _storageBuffers; public ShaderProperties() { _constantBuffers = new Dictionary(); + _storageBuffers = new Dictionary(); } public void AddConstantBuffer(int binding, BufferDefinition definition) { _constantBuffers[binding] = definition; } + + public void AddStorageBuffer(int binding, BufferDefinition definition) + { + _storageBuffers[binding] = definition; + } } } \ No newline at end of file diff --git a/src/Ryujinx.Graphics.Shader/StructuredIr/StructuredProgram.cs b/src/Ryujinx.Graphics.Shader/StructuredIr/StructuredProgram.cs index 4405c07a..a8f13276 100644 --- a/src/Ryujinx.Graphics.Shader/StructuredIr/StructuredProgram.cs +++ b/src/Ryujinx.Graphics.Shader/StructuredIr/StructuredProgram.cs @@ -280,10 +280,6 @@ namespace Ryujinx.Graphics.Shader.StructuredIr { context.Info.HelperFunctionsMask |= HelperFunctionsMask.AtomicMinMaxS32Shared; } - else if (operation.StorageKind == StorageKind.StorageBuffer) - { - context.Info.HelperFunctionsMask |= HelperFunctionsMask.AtomicMinMaxS32Storage; - } break; case Instruction.MultiplyHighS32: context.Info.HelperFunctionsMask |= HelperFunctionsMask.MultiplyHighS32; @@ -307,10 +303,6 @@ namespace Ryujinx.Graphics.Shader.StructuredIr case Instruction.StoreShared8: context.Info.HelperFunctionsMask |= HelperFunctionsMask.StoreSharedSmallInt; break; - case Instruction.StoreStorage16: - case Instruction.StoreStorage8: - context.Info.HelperFunctionsMask |= HelperFunctionsMask.StoreStorageSmallInt; - break; case Instruction.SwizzleAdd: context.Info.HelperFunctionsMask |= HelperFunctionsMask.SwizzleAdd; break; diff --git a/src/Ryujinx.Graphics.Shader/Translation/EmitterContextInsts.cs b/src/Ryujinx.Graphics.Shader/Translation/EmitterContextInsts.cs index 6d4104ce..be0cba80 100644 --- a/src/Ryujinx.Graphics.Shader/Translation/EmitterContextInsts.cs +++ b/src/Ryujinx.Graphics.Shader/Translation/EmitterContextInsts.cs @@ -57,6 +57,56 @@ namespace Ryujinx.Graphics.Shader.Translation return context.Add(Instruction.AtomicXor, storageKind, Local(), a, b, c); } + public static Operand AtomicAdd(this EmitterContext context, StorageKind storageKind, int binding, Operand e0, Operand e1, Operand value) + { + return context.Add(Instruction.AtomicAdd, storageKind, Local(), Const(binding), e0, e1, value); + } + + public static Operand AtomicAnd(this EmitterContext context, StorageKind storageKind, int binding, Operand e0, Operand e1, Operand value) + { + return context.Add(Instruction.AtomicAnd, storageKind, Local(), Const(binding), e0, e1, value); + } + + public static Operand AtomicCompareAndSwap(this EmitterContext context, StorageKind storageKind, int binding, Operand e0, Operand e1, Operand compare, Operand value) + { + return context.Add(Instruction.AtomicCompareAndSwap, storageKind, Local(), Const(binding), e0, e1, compare, value); + } + + public static Operand AtomicMaxS32(this EmitterContext context, StorageKind storageKind, int binding, Operand e0, Operand e1, Operand value) + { + return context.Add(Instruction.AtomicMaxS32, storageKind, Local(), Const(binding), e0, e1, value); + } + + public static Operand AtomicMaxU32(this EmitterContext context, StorageKind storageKind, int binding, Operand e0, Operand e1, Operand value) + { + return context.Add(Instruction.AtomicMaxU32, storageKind, Local(), Const(binding), e0, e1, value); + } + + public static Operand AtomicMinS32(this EmitterContext context, StorageKind storageKind, int binding, Operand e0, Operand e1, Operand value) + { + return context.Add(Instruction.AtomicMinS32, storageKind, Local(), Const(binding), e0, e1, value); + } + + public static Operand AtomicMinU32(this EmitterContext context, StorageKind storageKind, int binding, Operand e0, Operand e1, Operand value) + { + return context.Add(Instruction.AtomicMinU32, storageKind, Local(), Const(binding), e0, e1, value); + } + + public static Operand AtomicOr(this EmitterContext context, StorageKind storageKind, int binding, Operand e0, Operand e1, Operand value) + { + return context.Add(Instruction.AtomicOr, storageKind, Local(), Const(binding), e0, e1, value); + } + + public static Operand AtomicSwap(this EmitterContext context, StorageKind storageKind, int binding, Operand e0, Operand e1, Operand value) + { + return context.Add(Instruction.AtomicSwap, storageKind, Local(), Const(binding), e0, e1, value); + } + + public static Operand AtomicXor(this EmitterContext context, StorageKind storageKind, int binding, Operand e0, Operand e1, Operand value) + { + return context.Add(Instruction.AtomicXor, storageKind, Local(), Const(binding), e0, e1, value); + } + public static Operand Ballot(this EmitterContext context, Operand a) { return context.Add(Instruction.Ballot, Local(), a); @@ -554,6 +604,11 @@ namespace Ryujinx.Graphics.Shader.Translation return context.Add(fpType | Instruction.IsNan, Local(), a); } + public static Operand Load(this EmitterContext context, StorageKind storageKind, Operand e0, Operand e1) + { + return context.Add(Instruction.Load, storageKind, Local(), e0, e1); + } + public static Operand Load(this EmitterContext context, StorageKind storageKind, int binding) { return context.Add(Instruction.Load, storageKind, Local(), Const(binding)); @@ -606,11 +661,6 @@ namespace Ryujinx.Graphics.Shader.Translation : context.Load(storageKind, (int)ioVariable, arrayIndex, elemIndex); } - public static Operand LoadGlobal(this EmitterContext context, Operand a, Operand b) - { - return context.Add(Instruction.LoadGlobal, Local(), a, b); - } - public static Operand LoadLocal(this EmitterContext context, Operand a) { return context.Add(Instruction.LoadLocal, Local(), a); @@ -655,7 +705,6 @@ namespace Ryujinx.Graphics.Shader.Translation public static void Return(this EmitterContext context) { - context.PrepareForReturn(); context.Add(Instruction.Return); } @@ -699,6 +748,16 @@ namespace Ryujinx.Graphics.Shader.Translation return context.Add(Instruction.ShuffleXor, (Local(), Local()), a, b, c); } + public static Operand Store(this EmitterContext context, StorageKind storageKind, Operand e0, Operand e1, Operand value) + { + return context.Add(Instruction.Store, storageKind, null, e0, e1, value); + } + + public static Operand Store(this EmitterContext context, StorageKind storageKind, int binding, Operand e0, Operand e1, Operand value) + { + return context.Add(Instruction.Store, storageKind, null, Const(binding), e0, e1, value); + } + public static Operand Store( this EmitterContext context, StorageKind storageKind, @@ -738,21 +797,6 @@ namespace Ryujinx.Graphics.Shader.Translation : context.Add(Instruction.Store, storageKind, null, Const((int)ioVariable), arrayIndex, elemIndex, value); } - public static Operand StoreGlobal(this EmitterContext context, Operand a, Operand b, Operand c) - { - return context.Add(Instruction.StoreGlobal, null, a, b, c); - } - - public static Operand StoreGlobal16(this EmitterContext context, Operand a, Operand b, Operand c) - { - return context.Add(Instruction.StoreGlobal16, null, a, b, c); - } - - public static Operand StoreGlobal8(this EmitterContext context, Operand a, Operand b, Operand c) - { - return context.Add(Instruction.StoreGlobal8, null, a, b, c); - } - public static Operand StoreLocal(this EmitterContext context, Operand a, Operand b) { return context.Add(Instruction.StoreLocal, null, a, b); diff --git a/src/Ryujinx.Graphics.Shader/Translation/GlobalMemory.cs b/src/Ryujinx.Graphics.Shader/Translation/GlobalMemory.cs deleted file mode 100644 index a81d0fc4..00000000 --- a/src/Ryujinx.Graphics.Shader/Translation/GlobalMemory.cs +++ /dev/null @@ -1,54 +0,0 @@ -using Ryujinx.Graphics.Shader.IntermediateRepresentation; - -namespace Ryujinx.Graphics.Shader.Translation -{ - static class GlobalMemory - { - private const int StorageDescsBaseOffset = 0x44; // In words. - - public const int StorageDescSize = 4; // In words. - public const int StorageMaxCount = 16; - - public const int StorageDescsSize = StorageDescSize * StorageMaxCount; - - public const int UbeBaseOffset = 0x98; // In words. - public const int UbeMaxCount = 9; - public const int UbeDescsSize = StorageDescSize * UbeMaxCount; - public const int UbeFirstCbuf = 8; - - public const int DriverReservedCb = 0; - - public static bool UsesGlobalMemory(Instruction inst, StorageKind storageKind) - { - return (inst.IsAtomic() && storageKind == StorageKind.GlobalMemory) || - inst == Instruction.LoadGlobal || - inst == Instruction.StoreGlobal || - inst == Instruction.StoreGlobal16 || - inst == Instruction.StoreGlobal8; - } - - public static int GetStorageCbOffset(ShaderStage stage, int slot) - { - return GetStorageBaseCbOffset(stage) + slot * StorageDescSize; - } - - public static int GetStorageBaseCbOffset(ShaderStage stage) - { - return stage switch - { - ShaderStage.Compute => StorageDescsBaseOffset + 2 * StorageDescsSize, - ShaderStage.Vertex => StorageDescsBaseOffset, - ShaderStage.TessellationControl => StorageDescsBaseOffset + 1 * StorageDescsSize, - ShaderStage.TessellationEvaluation => StorageDescsBaseOffset + 2 * StorageDescsSize, - ShaderStage.Geometry => StorageDescsBaseOffset + 3 * StorageDescsSize, - ShaderStage.Fragment => StorageDescsBaseOffset + 4 * StorageDescsSize, - _ => 0 - }; - } - - public static int GetConstantUbeOffset(int slot) - { - return UbeBaseOffset + slot * StorageDescSize; - } - } -} \ No newline at end of file diff --git a/src/Ryujinx.Graphics.Shader/Translation/HelperFunctionManager.cs b/src/Ryujinx.Graphics.Shader/Translation/HelperFunctionManager.cs index 206facd4..7dd267f3 100644 --- a/src/Ryujinx.Graphics.Shader/Translation/HelperFunctionManager.cs +++ b/src/Ryujinx.Graphics.Shader/Translation/HelperFunctionManager.cs @@ -19,6 +19,14 @@ namespace Ryujinx.Graphics.Shader.Translation _stage = stage; } + public int AddFunction(Function function) + { + int functionId = _functionList.Count; + _functionList.Add(function); + + return functionId; + } + public int GetOrCreateFunctionId(HelperFunctionName functionName) { if (_functionIds.TryGetValue(functionName, out int functionId)) @@ -27,8 +35,7 @@ namespace Ryujinx.Graphics.Shader.Translation } Function function = GenerateFunction(functionName); - functionId = _functionList.Count; - _functionList.Add(function); + functionId = AddFunction(function); _functionIds.Add(functionName, functionId); return functionId; diff --git a/src/Ryujinx.Graphics.Shader/Translation/Optimizations/GlobalToStorage.cs b/src/Ryujinx.Graphics.Shader/Translation/Optimizations/GlobalToStorage.cs index 7758b4c6..14904b26 100644 --- a/src/Ryujinx.Graphics.Shader/Translation/Optimizations/GlobalToStorage.cs +++ b/src/Ryujinx.Graphics.Shader/Translation/Optimizations/GlobalToStorage.cs @@ -1,483 +1,1140 @@ using Ryujinx.Graphics.Shader.IntermediateRepresentation; +using System; using System.Collections.Generic; +using System.Linq; using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper; -using static Ryujinx.Graphics.Shader.Translation.GlobalMemory; namespace Ryujinx.Graphics.Shader.Translation.Optimizations { static class GlobalToStorage { + private const int DriverReservedCb = 0; + + enum LsMemoryType + { + Local, + Shared + } + + private class GtsContext + { + private struct Entry + { + public readonly int FunctionId; + public readonly Instruction Inst; + public readonly StorageKind StorageKind; + public readonly bool IsMultiTarget; + public readonly IReadOnlyList TargetCbs; + + public Entry( + int functionId, + Instruction inst, + StorageKind storageKind, + bool isMultiTarget, + IReadOnlyList targetCbs) + { + FunctionId = functionId; + Inst = inst; + StorageKind = storageKind; + IsMultiTarget = isMultiTarget; + TargetCbs = targetCbs; + } + } + + private struct LsKey : IEquatable + { + public readonly Operand BaseOffset; + public readonly int ConstOffset; + public readonly LsMemoryType Type; + + public LsKey(Operand baseOffset, int constOffset, LsMemoryType type) + { + BaseOffset = baseOffset; + ConstOffset = constOffset; + Type = type; + } + + public override int GetHashCode() + { + return HashCode.Combine(BaseOffset, ConstOffset, Type); + } + + public override bool Equals(object obj) + { + return obj is LsKey other && Equals(other); + } + + public bool Equals(LsKey other) + { + return other.BaseOffset == BaseOffset && other.ConstOffset == ConstOffset && other.Type == Type; + } + } + + private readonly List _entries; + private readonly Dictionary> _sharedEntries; + private readonly HelperFunctionManager _hfm; + + public GtsContext(HelperFunctionManager hfm) + { + _entries = new List(); + _sharedEntries = new Dictionary>(); + _hfm = hfm; + } + + public int AddFunction(Operation baseOp, bool isMultiTarget, IReadOnlyList targetCbs, Function function) + { + int functionId = _hfm.AddFunction(function); + + _entries.Add(new Entry(functionId, baseOp.Inst, baseOp.StorageKind, isMultiTarget, targetCbs)); + + return functionId; + } + + public bool TryGetFunctionId(Operation baseOp, bool isMultiTarget, IReadOnlyList targetCbs, out int functionId) + { + foreach (Entry entry in _entries) + { + if (entry.Inst != baseOp.Inst || + entry.StorageKind != baseOp.StorageKind || + entry.IsMultiTarget != isMultiTarget || + entry.TargetCbs.Count != targetCbs.Count) + { + continue; + } + + bool allEqual = true; + + for (int index = 0; index < targetCbs.Count; index++) + { + if (targetCbs[index] != entry.TargetCbs[index]) + { + allEqual = false; + break; + } + } + + if (allEqual) + { + functionId = entry.FunctionId; + return true; + } + } + + functionId = -1; + return false; + } + + public void AddMemoryTargetCb(LsMemoryType type, Operand baseOffset, int constOffset, uint targetCb, SearchResult result) + { + LsKey key = new LsKey(baseOffset, constOffset, type); + + if (!_sharedEntries.TryGetValue(key, out Dictionary targetCbs)) + { + // No entry with this base offset, create a new one. + + targetCbs = new Dictionary() { { targetCb, result } }; + + _sharedEntries.Add(key, targetCbs); + } + else if (targetCbs.TryGetValue(targetCb, out SearchResult existingResult)) + { + // If our entry already exists, but does not match the new result, + // we set the offset to null to indicate there are multiple possible offsets. + // This will be used on the multi-target access that does not need to know the offset. + + if (existingResult.Offset != null && + (existingResult.Offset != result.Offset || + existingResult.ConstOffset != result.ConstOffset)) + { + targetCbs[targetCb] = new SearchResult(result.SbCbSlot, result.SbCbOffset); + } + } + else + { + // An entry for this base offset already exists, but not for the specified + // constant buffer region where the storage buffer base address and size + // comes from. + + targetCbs.Add(targetCb, result); + } + } + + public bool TryGetMemoryTargetCb(LsMemoryType type, Operand baseOffset, int constOffset, out SearchResult result) + { + LsKey key = new LsKey(baseOffset, constOffset, type); + + if (_sharedEntries.TryGetValue(key, out Dictionary targetCbs) && targetCbs.Count == 1) + { + SearchResult candidateResult = targetCbs.Values.First(); + + if (candidateResult.Found) + { + result = candidateResult; + + return true; + } + } + + result = default; + + return false; + } + } + private struct SearchResult { public static SearchResult NotFound => new SearchResult(-1, 0); public bool Found => SbCbSlot != -1; public int SbCbSlot { get; } public int SbCbOffset { get; } + public Operand Offset { get; } + public int ConstOffset { get; } public SearchResult(int sbCbSlot, int sbCbOffset) { SbCbSlot = sbCbSlot; SbCbOffset = sbCbOffset; } + + public SearchResult(int sbCbSlot, int sbCbOffset, Operand offset, int constOffset = 0) + { + SbCbSlot = sbCbSlot; + SbCbOffset = sbCbOffset; + Offset = offset; + ConstOffset = constOffset; + } } - public static void RunPass(BasicBlock block, ShaderConfig config, ref int sbUseMask, ref int ubeUseMask) + public static void RunPass(HelperFunctionManager hfm, BasicBlock[] blocks, ShaderConfig config) { - int sbStart = GetStorageBaseCbOffset(config.Stage); - int sbEnd = sbStart + StorageDescsSize; + GtsContext gtsContext = new GtsContext(hfm); - int ubeStart = UbeBaseOffset; - int ubeEnd = UbeBaseOffset + UbeDescsSize; - - for (LinkedListNode node = block.Operations.First; node != null; node = node.Next) + foreach (BasicBlock block in blocks) { - for (int index = 0; index < node.Value.SourcesCount; index++) + for (LinkedListNode node = block.Operations.First; node != null; node = node.Next) { - Operand src = node.Value.GetSource(index); - - int storageIndex = GetStorageIndex(src, sbStart, sbEnd); - - if (storageIndex >= 0) - { - sbUseMask |= 1 << storageIndex; - } - - if (config.Stage == ShaderStage.Compute) - { - int constantIndex = GetStorageIndex(src, ubeStart, ubeEnd); - - if (constantIndex >= 0) - { - ubeUseMask |= 1 << constantIndex; - } - } - } - - if (!(node.Value is Operation operation)) - { - continue; - } - - if (UsesGlobalMemory(operation.Inst, operation.StorageKind)) - { - Operand source = operation.GetSource(0); - - var result = SearchForStorageBase(config, block, source); - if (!result.Found) + if (!(node.Value is Operation operation)) { continue; } - if (config.Stage == ShaderStage.Compute && - operation.Inst == Instruction.LoadGlobal && - result.SbCbSlot == DriverReservedCb && - result.SbCbOffset >= UbeBaseOffset && - result.SbCbOffset < UbeBaseOffset + UbeDescsSize) + if (IsGlobalMemory(operation.StorageKind)) { - // Here we effectively try to replace a LDG instruction with LDC. - // The hardware only supports a limited amount of constant buffers - // so NVN "emulates" more constant buffers using global memory access. - // Here we try to replace the global access back to a constant buffer - // load. - node = ReplaceLdgWithLdc(node, config, (result.SbCbOffset - UbeBaseOffset) / StorageDescSize); - } - else - { - // Storage buffers are implemented using global memory access. - // If we know from where the base address of the access is loaded, - // we can guess which storage buffer it is accessing. - // We can then replace the global memory access with a storage - // buffer access. - node = ReplaceGlobalWithStorage(block, node, config, config.GetSbSlot((byte)result.SbCbSlot, (ushort)result.SbCbOffset)); - } - } - } + LinkedListNode nextNode = ReplaceGlobalMemoryWithStorage(gtsContext, config, block, node); - config.SetAccessibleBufferMasks(sbUseMask, ubeUseMask); - } - - private static LinkedListNode ReplaceGlobalWithStorage(BasicBlock block, LinkedListNode node, ShaderConfig config, int storageIndex) - { - Operation operation = (Operation)node.Value; - - bool isAtomic = operation.Inst.IsAtomic(); - bool isStg16Or8 = operation.Inst == Instruction.StoreGlobal16 || operation.Inst == Instruction.StoreGlobal8; - bool isWrite = isAtomic || operation.Inst == Instruction.StoreGlobal || isStg16Or8; - - config.SetUsedStorageBuffer(storageIndex, isWrite); - - Operand[] sources = new Operand[operation.SourcesCount]; - - sources[0] = Const(storageIndex); - sources[1] = GetStorageOffset(block, node, config, storageIndex, operation.GetSource(0), isStg16Or8); - - for (int index = 2; index < operation.SourcesCount; index++) - { - sources[index] = operation.GetSource(index); - } - - Operation storageOp; - - if (isAtomic) - { - storageOp = new Operation(operation.Inst, StorageKind.StorageBuffer, operation.Dest, sources); - } - else if (operation.Inst == Instruction.LoadGlobal) - { - storageOp = new Operation(Instruction.LoadStorage, operation.Dest, sources); - } - else - { - Instruction storeInst = operation.Inst switch - { - Instruction.StoreGlobal16 => Instruction.StoreStorage16, - Instruction.StoreGlobal8 => Instruction.StoreStorage8, - _ => Instruction.StoreStorage - }; - - storageOp = new Operation(storeInst, null, sources); - } - - for (int index = 0; index < operation.SourcesCount; index++) - { - operation.SetSource(index, null); - } - - LinkedListNode oldNode = node; - - node = node.List.AddBefore(node, storageOp); - - node.List.Remove(oldNode); - - return node; - } - - private static Operand GetStorageOffset( - BasicBlock block, - LinkedListNode node, - ShaderConfig config, - int storageIndex, - Operand addrLow, - bool isStg16Or8) - { - (int sbCbSlot, int sbCbOffset) = config.GetSbCbInfo(storageIndex); - - bool storageAligned = !(config.GpuAccessor.QueryHasUnalignedStorageBuffer() || config.GpuAccessor.QueryHostStorageBufferOffsetAlignment() > Constants.StorageAlignment); - - (Operand byteOffset, int constantOffset) = storageAligned ? - GetStorageOffset(block, Utils.FindLastOperation(addrLow, block), sbCbSlot, sbCbOffset) : - (null, 0); - - if (byteOffset != null) - { - ReplaceAddressAlignment(node.List, addrLow, byteOffset, constantOffset); - } - - if (byteOffset == null) - { - Operand baseAddrLow = Cbuf(sbCbSlot, sbCbOffset); - Operand baseAddrTrunc = Local(); - - Operand alignMask = Const(-config.GpuAccessor.QueryHostStorageBufferOffsetAlignment()); - - Operation andOp = new Operation(Instruction.BitwiseAnd, baseAddrTrunc, baseAddrLow, alignMask); - - node.List.AddBefore(node, andOp); - - Operand offset = Local(); - Operation subOp = new Operation(Instruction.Subtract, offset, addrLow, baseAddrTrunc); - - node.List.AddBefore(node, subOp); - - byteOffset = offset; - } - else if (constantOffset != 0) - { - Operand offset = Local(); - Operation addOp = new Operation(Instruction.Add, offset, byteOffset, Const(constantOffset)); - - node.List.AddBefore(node, addOp); - - byteOffset = offset; - } - - if (isStg16Or8) - { - return byteOffset; - } - - Operand wordOffset = Local(); - Operation shrOp = new Operation(Instruction.ShiftRightU32, wordOffset, byteOffset, Const(2)); - - node.List.AddBefore(node, shrOp); - - return wordOffset; - } - - private static bool IsCbOffset(Operand operand, int slot, int offset) - { - return operand.Type == OperandType.ConstantBuffer && operand.GetCbufSlot() == slot && operand.GetCbufOffset() == offset; - } - - private static void ReplaceAddressAlignment(LinkedList list, Operand address, Operand byteOffset, int constantOffset) - { - // When we emit 16/8-bit LDG, we add extra code to determine the address alignment. - // Eliminate the storage buffer base address from this too, leaving only the byte offset. - - foreach (INode useNode in address.UseOps) - { - if (useNode is Operation op && op.Inst == Instruction.BitwiseAnd) - { - Operand src1 = op.GetSource(0); - Operand src2 = op.GetSource(1); - - int addressIndex = -1; - - if (src1 == address && src2.Type == OperandType.Constant && src2.Value == 3) - { - addressIndex = 0; - } - else if (src2 == address && src1.Type == OperandType.Constant && src1.Value == 3) - { - addressIndex = 1; - } - - if (addressIndex != -1) - { - LinkedListNode node = list.Find(op); - - // Add offset calculation before the use. Needs to be on the same block. - if (node != null) + if (nextNode == null) { - Operand offset = Local(); - Operation addOp = new Operation(Instruction.Add, offset, byteOffset, Const(constantOffset)); - list.AddBefore(node, addOp); + // The returned value being null means that the global memory replacement failed, + // so we just make loads read 0 and stores do nothing. - op.SetSource(addressIndex, offset); + config.GpuAccessor.Log($"Failed to reserve storage buffer for global memory operation \"{operation.Inst}\"."); + + if (operation.Dest != null) + { + operation.TurnIntoCopy(Const(0)); + } + else + { + Utils.DeleteNode(node, operation); + } + } + else + { + node = nextNode; + } + } + else if (operation.Inst == Instruction.StoreShared || operation.Inst == Instruction.StoreLocal) + { + // The NVIDIA compiler can sometimes use shared or local memory as temporary + // storage to place the base address and size on, so we need + // to be able to find such information stored in memory too. + + if (TryGetMemoryOffsets(operation, out LsMemoryType type, out Operand baseOffset, out int constOffset)) + { + Operand value = operation.GetSource(operation.SourcesCount - 1); + + var result = FindUniqueBaseAddressCb(gtsContext, block, value, needsOffset: false); + if (result.Found) + { + uint targetCb = PackCbSlotAndOffset(result.SbCbSlot, result.SbCbOffset); + gtsContext.AddMemoryTargetCb(type, baseOffset, constOffset, targetCb, result); + } } } } } } - private static (Operand, int) GetStorageOffset(BasicBlock block, Operand address, int cbSlot, int baseAddressCbOffset) + private static bool IsGlobalMemory(StorageKind storageKind) { - if (IsCbOffset(address, cbSlot, baseAddressCbOffset)) - { - // Direct offset: zero. - return (Const(0), 0); - } - - (address, int constantOffset) = GetStorageConstantOffset(block, address); - - address = Utils.FindLastOperation(address, block); - - if (IsCbOffset(address, cbSlot, baseAddressCbOffset)) - { - // Only constant offset - return (Const(0), constantOffset); - } - - if (!(address.AsgOp is Operation offsetAdd) || offsetAdd.Inst != Instruction.Add) - { - return (null, 0); - } - - Operand src1 = offsetAdd.GetSource(0); - Operand src2 = Utils.FindLastOperation(offsetAdd.GetSource(1), block); - - if (IsCbOffset(src2, cbSlot, baseAddressCbOffset)) - { - return (src1, constantOffset); - } - else if (IsCbOffset(src1, cbSlot, baseAddressCbOffset)) - { - return (src2, constantOffset); - } - - return (null, 0); + return storageKind == StorageKind.GlobalMemory || + storageKind == StorageKind.GlobalMemoryS8 || + storageKind == StorageKind.GlobalMemoryS16 || + storageKind == StorageKind.GlobalMemoryU8 || + storageKind == StorageKind.GlobalMemoryU16; } - private static (Operand, int) GetStorageConstantOffset(BasicBlock block, Operand address) + private static bool IsSmallInt(StorageKind storageKind) { - if (!(address.AsgOp is Operation offsetAdd) || offsetAdd.Inst != Instruction.Add) - { - return (address, 0); - } - - Operand src1 = offsetAdd.GetSource(0); - Operand src2 = offsetAdd.GetSource(1); - - if (src2.Type != OperandType.Constant) - { - return (address, 0); - } - - return (src1, src2.Value); + return storageKind == StorageKind.GlobalMemoryS8 || + storageKind == StorageKind.GlobalMemoryS16 || + storageKind == StorageKind.GlobalMemoryU8 || + storageKind == StorageKind.GlobalMemoryU16; } - private static LinkedListNode ReplaceLdgWithLdc(LinkedListNode node, ShaderConfig config, int storageIndex) + private static LinkedListNode ReplaceGlobalMemoryWithStorage( + GtsContext gtsContext, + ShaderConfig config, + BasicBlock block, + LinkedListNode node) { - Operation operation = (Operation)node.Value; + Operation operation = node.Value as Operation; + Operand globalAddress = operation.GetSource(0); + SearchResult result = FindUniqueBaseAddressCb(gtsContext, block, globalAddress, needsOffset: true); - Operand GetCbufOffset() + if (result.Found) { - Operand addrLow = operation.GetSource(0); + // We found the storage buffer that is being accessed. + // There are two possible paths here, if the operation is simple enough, + // we just generate the storage access code inline. + // Otherwise, we generate a function call (and the function if necessary). - Operand baseAddrLow = Cbuf(0, UbeBaseOffset + storageIndex * StorageDescSize); + Operand offset = result.Offset; - Operand baseAddrTrunc = Local(); + bool storageUnaligned = config.GpuAccessor.QueryHasUnalignedStorageBuffer(); - Operand alignMask = Const(-config.GpuAccessor.QueryHostStorageBufferOffsetAlignment()); + if (storageUnaligned) + { + Operand baseAddress = Cbuf(result.SbCbSlot, result.SbCbOffset); - Operation andOp = new Operation(Instruction.BitwiseAnd, baseAddrTrunc, baseAddrLow, alignMask); + Operand baseAddressMasked = Local(); + Operand hostOffset = Local(); - node.List.AddBefore(node, andOp); + int alignment = config.GpuAccessor.QueryHostStorageBufferOffsetAlignment(); - Operand byteOffset = Local(); - Operand wordOffset = Local(); + Operation maskOp = new Operation(Instruction.BitwiseAnd, baseAddressMasked, new[] { baseAddress, Const(-alignment) }); + Operation subOp = new Operation(Instruction.Subtract, hostOffset, new[] { globalAddress, baseAddressMasked }); - Operation subOp = new Operation(Instruction.Subtract, byteOffset, addrLow, baseAddrTrunc); - Operation shrOp = new Operation(Instruction.ShiftRightU32, wordOffset, byteOffset, Const(2)); + node.List.AddBefore(node, maskOp); + node.List.AddBefore(node, subOp); - node.List.AddBefore(node, subOp); - node.List.AddBefore(node, shrOp); + offset = hostOffset; + } + else if (result.ConstOffset != 0) + { + Operand newOffset = Local(); - return wordOffset; + Operation addOp = new Operation(Instruction.Add, newOffset, new[] { offset, Const(result.ConstOffset) }); + + node.List.AddBefore(node, addOp); + + offset = newOffset; + } + + if (CanUseInlineStorageOp(operation, config.Options.TargetLanguage)) + { + return GenerateInlineStorageOp(config, node, operation, offset, result); + } + else + { + if (!TryGenerateSingleTargetStorageOp(gtsContext, config, operation, result, out int functionId)) + { + return null; + } + + return GenerateCallStorageOp(node, operation, offset, functionId); + } } - - Operand cbufOffset = GetCbufOffset(); - Operand vecIndex = Local(); - Operand elemIndex = Local(); - - node.List.AddBefore(node, new Operation(Instruction.ShiftRightU32, 0, vecIndex, cbufOffset, Const(2))); - node.List.AddBefore(node, new Operation(Instruction.BitwiseAnd, 0, elemIndex, cbufOffset, Const(3))); - - Operand[] sources = new Operand[4]; - - int cbSlot = UbeFirstCbuf + storageIndex; - - sources[0] = Const(config.ResourceManager.GetConstantBufferBinding(cbSlot)); - sources[1] = Const(0); - sources[2] = vecIndex; - sources[3] = elemIndex; - - Operation ldcOp = new Operation(Instruction.Load, StorageKind.ConstantBuffer, operation.Dest, sources); - - for (int index = 0; index < operation.SourcesCount; index++) + else { - operation.SetSource(index, null); + // Failed to find the storage buffer directly. + // Try to walk through Phi chains and find all possible constant buffers where + // the base address might be stored. + // Generate a helper function that will check all possible storage buffers and use the right one. + + if (!TryGenerateMultiTargetStorageOp(gtsContext, config, block, operation, out int functionId)) + { + return null; + } + + return GenerateCallStorageOp(node, operation, null, functionId); } - - LinkedListNode oldNode = node; - - node = node.List.AddBefore(node, ldcOp); - - node.List.Remove(oldNode); - - return node; } - private static SearchResult SearchForStorageBase(ShaderConfig config, BasicBlock block, Operand globalAddress) + private static bool CanUseInlineStorageOp(Operation operation, TargetLanguage targetLanguage) + { + if (operation.StorageKind != StorageKind.GlobalMemory) + { + return false; + } + + return (operation.Inst != Instruction.AtomicMaxS32 && + operation.Inst != Instruction.AtomicMinS32) || targetLanguage == TargetLanguage.Spirv; + } + + private static LinkedListNode GenerateInlineStorageOp( + ShaderConfig config, + LinkedListNode node, + Operation operation, + Operand offset, + SearchResult result) + { + bool isStore = operation.Inst == Instruction.Store || operation.Inst.IsAtomic(); + if (!config.ResourceManager.TryGetStorageBufferBinding(result.SbCbSlot, result.SbCbOffset, isStore, out int binding)) + { + return null; + } + + Operand wordOffset = Local(); + + Operand[] sources; + + if (operation.Inst == Instruction.AtomicCompareAndSwap) + { + sources = new Operand[] + { + Const(binding), + Const(0), + wordOffset, + operation.GetSource(operation.SourcesCount - 2), + operation.GetSource(operation.SourcesCount - 1) + }; + } + else if (isStore) + { + sources = new Operand[] { Const(binding), Const(0), wordOffset, operation.GetSource(operation.SourcesCount - 1) }; + } + else + { + sources = new Operand[] { Const(binding), Const(0), wordOffset }; + } + + Operation shiftOp = new Operation(Instruction.ShiftRightU32, wordOffset, new[] { offset, Const(2) }); + Operation storageOp = new Operation(operation.Inst, StorageKind.StorageBuffer, operation.Dest, sources); + + node.List.AddBefore(node, shiftOp); + LinkedListNode newNode = node.List.AddBefore(node, storageOp); + + Utils.DeleteNode(node, operation); + + return newNode; + } + + private static LinkedListNode GenerateCallStorageOp(LinkedListNode node, Operation operation, Operand offset, int functionId) + { + // Generate call to a helper function that will perform the storage buffer operation. + + Operand[] sources = new Operand[operation.SourcesCount - 1 + (offset == null ? 2 : 1)]; + + sources[0] = Const(functionId); + + if (offset != null) + { + // If the offset was supplised, we use that and skip the global address. + + sources[1] = offset; + + for (int srcIndex = 2; srcIndex < operation.SourcesCount; srcIndex++) + { + sources[srcIndex] = operation.GetSource(srcIndex); + } + } + else + { + // Use the 64-bit global address which is split in 2 32-bit arguments. + + for (int srcIndex = 0; srcIndex < operation.SourcesCount; srcIndex++) + { + sources[srcIndex + 1] = operation.GetSource(srcIndex); + } + } + + bool returnsValue = operation.Dest != null; + Operand returnValue = returnsValue ? Local() : null; + + Operation callOp = new Operation(Instruction.Call, returnValue, sources); + + LinkedListNode newNode = node.List.AddBefore(node, callOp); + + if (returnsValue) + { + operation.TurnIntoCopy(returnValue); + + return node; + } + else + { + Utils.DeleteNode(node, operation); + + return newNode; + } + } + + private static bool TryGenerateSingleTargetStorageOp( + GtsContext gtsContext, + ShaderConfig config, + Operation operation, + SearchResult result, + out int functionId) + { + List targetCbs = new List() { PackCbSlotAndOffset(result.SbCbSlot, result.SbCbOffset) }; + + if (gtsContext.TryGetFunctionId(operation, isMultiTarget: false, targetCbs, out functionId)) + { + return true; + } + + int inArgumentsCount = 1; + + if (operation.Inst == Instruction.AtomicCompareAndSwap) + { + inArgumentsCount = 3; + } + else if (operation.Inst == Instruction.Store || operation.Inst.IsAtomic()) + { + inArgumentsCount = 2; + } + + EmitterContext context = new EmitterContext(); + + Operand offset = Argument(0); + Operand compare = null; + Operand value = null; + + if (inArgumentsCount == 3) + { + compare = Argument(1); + value = Argument(2); + } + else if (inArgumentsCount == 2) + { + value = Argument(1); + } + + if (!TryGenerateStorageOp( + config, + context, + operation.Inst, + operation.StorageKind, + offset, + compare, + value, + result, + out Operand resultValue)) + { + functionId = 0; + return false; + } + + bool returnsValue = resultValue != null; + + if (returnsValue) + { + context.Return(resultValue); + } + else + { + context.Return(); + } + + string functionName = GetFunctionName(operation, isMultiTarget: false, targetCbs); + + Function function = new Function( + ControlFlowGraph.Create(context.GetOperations()).Blocks, + functionName, + returnsValue, + inArgumentsCount, + 0); + + functionId = gtsContext.AddFunction(operation, isMultiTarget: false, targetCbs, function); + + return true; + } + + private static bool TryGenerateMultiTargetStorageOp( + GtsContext gtsContext, + ShaderConfig config, + BasicBlock block, + Operation operation, + out int functionId) + { + Queue phis = new Queue(); + HashSet visited = new HashSet(); + List targetCbs = new List(); + + Operand globalAddress = operation.GetSource(0); + + if (globalAddress.AsgOp is Operation addOp && addOp.Inst == Instruction.Add) + { + Operand src1 = addOp.GetSource(0); + Operand src2 = addOp.GetSource(1); + + if (src1.Type == OperandType.Constant && src2.Type == OperandType.LocalVariable) + { + globalAddress = src2; + } + else if (src1.Type == OperandType.LocalVariable && src2.Type == OperandType.Constant) + { + globalAddress = src1; + } + } + + if (globalAddress.AsgOp is PhiNode phi && visited.Add(phi)) + { + phis.Enqueue(phi); + } + else + { + SearchResult result = FindUniqueBaseAddressCb(gtsContext, block, operation.GetSource(0), needsOffset: false); + + if (result.Found) + { + targetCbs.Add(PackCbSlotAndOffset(result.SbCbSlot, result.SbCbOffset)); + } + } + + while (phis.TryDequeue(out phi)) + { + for (int srcIndex = 0; srcIndex < phi.SourcesCount; srcIndex++) + { + BasicBlock phiBlock = phi.GetBlock(srcIndex); + Operand phiSource = phi.GetSource(srcIndex); + + SearchResult result = FindUniqueBaseAddressCb(gtsContext, phiBlock, phiSource, needsOffset: false); + + if (result.Found) + { + uint targetCb = PackCbSlotAndOffset(result.SbCbSlot, result.SbCbOffset); + + if (!targetCbs.Contains(targetCb)) + { + targetCbs.Add(targetCb); + } + } + else if (phiSource.AsgOp is PhiNode phi2 && visited.Add(phi2)) + { + phis.Enqueue(phi2); + } + } + } + + targetCbs.Sort(); + + if (targetCbs.Count == 0) + { + config.GpuAccessor.Log($"Failed to find storage buffer for global memory operation \"{operation.Inst}\"."); + } + + if (gtsContext.TryGetFunctionId(operation, isMultiTarget: true, targetCbs, out functionId)) + { + return true; + } + + int inArgumentsCount = 2; + + if (operation.Inst == Instruction.AtomicCompareAndSwap) + { + inArgumentsCount = 4; + } + else if (operation.Inst == Instruction.Store || operation.Inst.IsAtomic()) + { + inArgumentsCount = 3; + } + + EmitterContext context = new EmitterContext(); + + Operand globalAddressLow = Argument(0); + Operand globalAddressHigh = Argument(1); + + foreach (uint targetCb in targetCbs) + { + (int sbCbSlot, int sbCbOffset) = UnpackCbSlotAndOffset(targetCb); + + Operand baseAddrLow = Cbuf(sbCbSlot, sbCbOffset); + Operand baseAddrHigh = Cbuf(sbCbSlot, sbCbOffset + 1); + Operand size = Cbuf(sbCbSlot, sbCbOffset + 2); + + Operand offset = context.ISubtract(globalAddressLow, baseAddrLow); + Operand borrow = context.ICompareLessUnsigned(globalAddressLow, baseAddrLow); + + Operand inRangeLow = context.ICompareLessUnsigned(offset, size); + + Operand addrHighBorrowed = context.IAdd(globalAddressHigh, borrow); + + Operand inRangeHigh = context.ICompareEqual(addrHighBorrowed, baseAddrHigh); + + Operand inRange = context.BitwiseAnd(inRangeLow, inRangeHigh); + + Operand lblSkip = Label(); + context.BranchIfFalse(lblSkip, inRange); + + Operand compare = null; + Operand value = null; + + if (inArgumentsCount == 4) + { + compare = Argument(2); + value = Argument(3); + } + else if (inArgumentsCount == 3) + { + value = Argument(2); + } + + SearchResult result = new SearchResult(sbCbSlot, sbCbOffset); + + int alignment = config.GpuAccessor.QueryHostStorageBufferOffsetAlignment(); + + Operand baseAddressMasked = context.BitwiseAnd(baseAddrLow, Const(-alignment)); + Operand hostOffset = context.ISubtract(globalAddressLow, baseAddressMasked); + + if (!TryGenerateStorageOp( + config, + context, + operation.Inst, + operation.StorageKind, + hostOffset, + compare, + value, + result, + out Operand resultValue)) + { + functionId = 0; + return false; + } + + if (resultValue != null) + { + context.Return(resultValue); + } + else + { + context.Return(); + } + + context.MarkLabel(lblSkip); + } + + bool returnsValue = operation.Dest != null; + + if (returnsValue) + { + context.Return(Const(0)); + } + else + { + context.Return(); + } + + string functionName = GetFunctionName(operation, isMultiTarget: true, targetCbs); + + Function function = new Function( + ControlFlowGraph.Create(context.GetOperations()).Blocks, + functionName, + returnsValue, + inArgumentsCount, + 0); + + functionId = gtsContext.AddFunction(operation, isMultiTarget: true, targetCbs, function); + + return true; + } + + private static uint PackCbSlotAndOffset(int cbSlot, int cbOffset) + { + return (uint)((ushort)cbSlot | ((ushort)cbOffset << 16)); + } + + private static (int, int) UnpackCbSlotAndOffset(uint packed) + { + return ((ushort)packed, (ushort)(packed >> 16)); + } + + private static string GetFunctionName(Operation baseOp, bool isMultiTarget, IReadOnlyList targetCbs) + { + string name = baseOp.Inst.ToString(); + + name += baseOp.StorageKind switch + { + StorageKind.GlobalMemoryS8 => "S8", + StorageKind.GlobalMemoryS16 => "S16", + StorageKind.GlobalMemoryU8 => "U8", + StorageKind.GlobalMemoryU16 => "U16", + _ => string.Empty + }; + + if (isMultiTarget) + { + name += "Multi"; + } + + foreach (uint targetCb in targetCbs) + { + (int sbCbSlot, int sbCbOffset) = UnpackCbSlotAndOffset(targetCb); + + name += $"_c{sbCbSlot}o{sbCbOffset}"; + } + + return name; + } + + private static bool TryGenerateStorageOp( + ShaderConfig config, + EmitterContext context, + Instruction inst, + StorageKind storageKind, + Operand offset, + Operand compare, + Operand value, + SearchResult result, + out Operand resultValue) + { + resultValue = null; + bool isStore = inst.IsAtomic() || inst == Instruction.Store; + + if (!config.ResourceManager.TryGetStorageBufferBinding(result.SbCbSlot, result.SbCbOffset, isStore, out int binding)) + { + return false; + } + + Operand wordOffset = context.ShiftRightU32(offset, Const(2)); + + if (inst.IsAtomic()) + { + if (IsSmallInt(storageKind)) + { + throw new NotImplementedException(); + } + + switch (inst) + { + case Instruction.AtomicAdd: + resultValue = context.AtomicAdd(StorageKind.StorageBuffer, binding, Const(0), wordOffset, value); + break; + case Instruction.AtomicAnd: + resultValue = context.AtomicAnd(StorageKind.StorageBuffer, binding, Const(0), wordOffset, value); + break; + case Instruction.AtomicCompareAndSwap: + resultValue = context.AtomicCompareAndSwap(StorageKind.StorageBuffer, binding, Const(0), wordOffset, compare, value); + break; + case Instruction.AtomicMaxS32: + if (config.Options.TargetLanguage == TargetLanguage.Spirv) + { + resultValue = context.AtomicMaxS32(StorageKind.StorageBuffer, binding, Const(0), wordOffset, value); + } + else + { + resultValue = GenerateAtomicCasLoop(context, wordOffset, binding, (memValue) => + { + return context.IMaximumS32(memValue, value); + }); + } + break; + case Instruction.AtomicMaxU32: + resultValue = context.AtomicMaxU32(StorageKind.StorageBuffer, binding, Const(0), wordOffset, value); + break; + case Instruction.AtomicMinS32: + if (config.Options.TargetLanguage == TargetLanguage.Spirv) + { + resultValue = context.AtomicMinS32(StorageKind.StorageBuffer, binding, Const(0), wordOffset, value); + } + else + { + resultValue = GenerateAtomicCasLoop(context, wordOffset, binding, (memValue) => + { + return context.IMinimumS32(memValue, value); + }); + } + break; + case Instruction.AtomicMinU32: + resultValue = context.AtomicMinU32(StorageKind.StorageBuffer, binding, Const(0), wordOffset, value); + break; + case Instruction.AtomicOr: + resultValue = context.AtomicOr(StorageKind.StorageBuffer, binding, Const(0), wordOffset, value); + break; + case Instruction.AtomicSwap: + resultValue = context.AtomicSwap(StorageKind.StorageBuffer, binding, Const(0), wordOffset, value); + break; + case Instruction.AtomicXor: + resultValue = context.AtomicXor(StorageKind.StorageBuffer, binding, Const(0), wordOffset, value); + break; + } + } + else if (inst == Instruction.Store) + { + int bitSize = storageKind switch + { + StorageKind.GlobalMemoryS8 or + StorageKind.GlobalMemoryU8 => 8, + StorageKind.GlobalMemoryS16 or + StorageKind.GlobalMemoryU16 => 16, + _ => 32 + }; + + if (bitSize < 32) + { + Operand bitOffset = GetBitOffset(context, offset); + + GenerateAtomicCasLoop(context, wordOffset, binding, (memValue) => + { + return context.BitfieldInsert(memValue, value, bitOffset, Const(bitSize)); + }); + } + else + { + context.Store(StorageKind.StorageBuffer, binding, Const(0), wordOffset, value); + } + } + else + { + value = context.Load(StorageKind.StorageBuffer, binding, Const(0), wordOffset); + + if (IsSmallInt(storageKind)) + { + Operand bitOffset = GetBitOffset(context, offset); + + switch (storageKind) + { + case StorageKind.GlobalMemoryS8: + value = context.ShiftRightS32(value, bitOffset); + value = context.BitfieldExtractS32(value, Const(0), Const(8)); + break; + case StorageKind.GlobalMemoryS16: + value = context.ShiftRightS32(value, bitOffset); + value = context.BitfieldExtractS32(value, Const(0), Const(16)); + break; + case StorageKind.GlobalMemoryU8: + value = context.ShiftRightU32(value, bitOffset); + value = context.BitwiseAnd(value, Const(byte.MaxValue)); + break; + case StorageKind.GlobalMemoryU16: + value = context.ShiftRightU32(value, bitOffset); + value = context.BitwiseAnd(value, Const(ushort.MaxValue)); + break; + } + } + + resultValue = value; + } + + return true; + } + + private static Operand GetBitOffset(EmitterContext context, Operand offset) + { + return context.ShiftLeft(context.BitwiseAnd(offset, Const(3)), Const(3)); + } + + private static Operand GenerateAtomicCasLoop(EmitterContext context, Operand wordOffset, int binding, Func opCallback) + { + Operand lblLoopHead = Label(); + + context.MarkLabel(lblLoopHead); + + Operand oldValue = context.Load(StorageKind.StorageBuffer, binding, Const(0), wordOffset); + Operand newValue = opCallback(oldValue); + + Operand casResult = context.AtomicCompareAndSwap( + StorageKind.StorageBuffer, + binding, + Const(0), + wordOffset, + oldValue, + newValue); + + Operand casFail = context.ICompareNotEqual(casResult, oldValue); + + context.BranchIfTrue(lblLoopHead, casFail); + + return oldValue; + } + + private static SearchResult FindUniqueBaseAddressCb(GtsContext gtsContext, BasicBlock block, Operand globalAddress, bool needsOffset) { globalAddress = Utils.FindLastOperation(globalAddress, block); if (globalAddress.Type == OperandType.ConstantBuffer) { - return GetStorageIndex(config, globalAddress); + return GetBaseAddressCbWithOffset(globalAddress, Const(0), 0); } Operation operation = globalAddress.AsgOp as Operation; if (operation == null || operation.Inst != Instruction.Add) { - return SearchResult.NotFound; + return FindBaseAddressCbFromMemory(gtsContext, operation, 0, needsOffset); } Operand src1 = operation.GetSource(0); Operand src2 = operation.GetSource(1); + int constOffset = 0; + if ((src1.Type == OperandType.LocalVariable && src2.Type == OperandType.Constant) || (src2.Type == OperandType.LocalVariable && src1.Type == OperandType.Constant)) { Operand baseAddr; + Operand offset; if (src1.Type == OperandType.LocalVariable) { baseAddr = Utils.FindLastOperation(src1, block); + offset = src2; } else { baseAddr = Utils.FindLastOperation(src2, block); + offset = src1; } - var result = GetStorageIndex(config, baseAddr); + var result = GetBaseAddressCbWithOffset(baseAddr, offset, 0); if (result.Found) { return result; } + constOffset = offset.Value; operation = baseAddr.AsgOp as Operation; if (operation == null || operation.Inst != Instruction.Add) { - return SearchResult.NotFound; + return FindBaseAddressCbFromMemory(gtsContext, operation, constOffset, needsOffset); } } - var selectedResult = SearchResult.NotFound; + src1 = operation.GetSource(0); + src2 = operation.GetSource(1); - for (int index = 0; index < operation.SourcesCount; index++) + // If we have two possible results, we give preference to the ones from + // the driver reserved constant buffer, as those are the ones that + // contains the base address. + + // If both are constant buffer, give preference to the second operand, + // because constant buffer are always encoded as the second operand, + // so the second operand will always be the one from the last instruction. + + if (src1.Type != OperandType.ConstantBuffer || + (src1.Type == OperandType.ConstantBuffer && src2.Type == OperandType.ConstantBuffer) || + (src2.Type == OperandType.ConstantBuffer && src2.GetCbufSlot() == DriverReservedCb)) { - Operand source = operation.GetSource(index); - - var result = GetStorageIndex(config, source); - - // If we already have a result, we give preference to the ones from - // the driver reserved constant buffer, as those are the ones that - // contains the base address. - if (result.Found && (!selectedResult.Found || result.SbCbSlot == GlobalMemory.DriverReservedCb)) - { - selectedResult = result; - } + return GetBaseAddressCbWithOffset(src2, src1, constOffset); } - return selectedResult; + return GetBaseAddressCbWithOffset(src1, src2, constOffset); } - private static SearchResult GetStorageIndex(ShaderConfig config, Operand operand) + private static SearchResult FindBaseAddressCbFromMemory(GtsContext gtsContext, Operation operation, int constOffset, bool needsOffset) { - if (operand.Type == OperandType.ConstantBuffer) + if (operation != null) { - int slot = operand.GetCbufSlot(); - int offset = operand.GetCbufOffset(); - - if ((offset & 3) == 0) + if (TryGetMemoryOffsets(operation, out LsMemoryType type, out Operand bo, out int co) && + gtsContext.TryGetMemoryTargetCb(type, bo, co, out SearchResult result) && + (result.Offset != null || !needsOffset)) { - return new SearchResult(slot, offset); + if (constOffset != 0) + { + return new SearchResult( + result.SbCbSlot, + result.SbCbOffset, + result.Offset, + result.ConstOffset + constOffset); + } + + return result; } } return SearchResult.NotFound; } - private static int GetStorageIndex(Operand operand, int sbStart, int sbEnd) + private static SearchResult GetBaseAddressCbWithOffset(Operand baseAddress, Operand offset, int constOffset) { - if (operand.Type == OperandType.ConstantBuffer) + if (baseAddress.Type == OperandType.ConstantBuffer) { - int slot = operand.GetCbufSlot(); - int offset = operand.GetCbufOffset(); + int sbCbSlot = baseAddress.GetCbufSlot(); + int sbCbOffset = baseAddress.GetCbufOffset(); - if (slot == 0 && offset >= sbStart && offset < sbEnd) + // We require the offset to be aligned to 1 word (64 bits), + // since the address size is 64-bit and the GPU only supports aligned memory access. + if ((sbCbOffset & 1) == 0) { - int storageIndex = (offset - sbStart) / StorageDescSize; - - return storageIndex; + return new SearchResult(sbCbSlot, sbCbOffset, offset, constOffset); } } - return -1; + return SearchResult.NotFound; + } + + private static bool TryGetMemoryOffsets(Operation operation, out LsMemoryType type, out Operand baseOffset, out int constOffset) + { + baseOffset = null; + + if (operation.Inst == Instruction.LoadShared || operation.Inst == Instruction.StoreShared) + { + type = LsMemoryType.Shared; + return TryGetSharedMemoryOffsets(operation, out baseOffset, out constOffset); + } + else if (operation.Inst == Instruction.LoadLocal || operation.Inst == Instruction.StoreLocal) + { + type = LsMemoryType.Local; + return TryGetLocalMemoryOffset(operation, out constOffset); + } + + type = default; + constOffset = 0; + return false; + } + + private static bool TryGetSharedMemoryOffsets(Operation operation, out Operand baseOffset, out int constOffset) + { + baseOffset = null; + constOffset = 0; + + // The byte offset is right shifted by 2 to get the 32-bit word offset, + // so we want to get the byte offset back, since each one of those word + // offsets are a new "local variable" which will not match. + + if (operation.GetSource(0).AsgOp is Operation shiftRightOp && + shiftRightOp.Inst == Instruction.ShiftRightU32 && + shiftRightOp.GetSource(1).Type == OperandType.Constant && + shiftRightOp.GetSource(1).Value == 2) + { + baseOffset = shiftRightOp.GetSource(0); + } + + // Check if we have a constant offset being added to the base offset. + + if (baseOffset?.AsgOp is Operation addOp && addOp.Inst == Instruction.Add) + { + Operand src1 = addOp.GetSource(0); + Operand src2 = addOp.GetSource(1); + + if (src1.Type == OperandType.Constant && src2.Type == OperandType.LocalVariable) + { + constOffset = src1.Value; + baseOffset = src2; + } + else if (src1.Type == OperandType.LocalVariable && src2.Type == OperandType.Constant) + { + baseOffset = src1; + constOffset = src2.Value; + } + } + + return baseOffset != null && baseOffset.Type == OperandType.LocalVariable; + } + + private static bool TryGetLocalMemoryOffset(Operation operation, out int constOffset) + { + if (operation.GetSource(0).Type == OperandType.Constant) + { + constOffset = operation.GetSource(0).Value; + return true; + } + + constOffset = 0; + return false; } } } \ No newline at end of file diff --git a/src/Ryujinx.Graphics.Shader/Translation/Optimizations/Optimizer.cs b/src/Ryujinx.Graphics.Shader/Translation/Optimizations/Optimizer.cs index b126e2c4..bdb3a62e 100644 --- a/src/Ryujinx.Graphics.Shader/Translation/Optimizations/Optimizer.cs +++ b/src/Ryujinx.Graphics.Shader/Translation/Optimizations/Optimizer.cs @@ -7,17 +7,15 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations { static class Optimizer { - public static void RunPass(BasicBlock[] blocks, ShaderConfig config) + public static void RunPass(HelperFunctionManager hfm, BasicBlock[] blocks, ShaderConfig config) { RunOptimizationPasses(blocks, config); - int sbUseMask = 0; - int ubeUseMask = 0; + GlobalToStorage.RunPass(hfm, blocks, config); // Those passes are looking for specific patterns and only needs to run once. for (int blkIndex = 0; blkIndex < blocks.Length; blkIndex++) { - GlobalToStorage.RunPass(blocks[blkIndex], config, ref sbUseMask, ref ubeUseMask); BindlessToIndexed.RunPass(blocks[blkIndex], config); BindlessElimination.RunPass(blocks[blkIndex], config); @@ -28,8 +26,6 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations } } - config.SetAccessibleBufferMasks(sbUseMask, ubeUseMask); - // Run optimizations one last time to remove any code that is now optimizable after above passes. RunOptimizationPasses(blocks, config); } diff --git a/src/Ryujinx.Graphics.Shader/Translation/Optimizations/Simplification.cs b/src/Ryujinx.Graphics.Shader/Translation/Optimizations/Simplification.cs index 8d05f99a..9b78c8aa 100644 --- a/src/Ryujinx.Graphics.Shader/Translation/Optimizations/Simplification.cs +++ b/src/Ryujinx.Graphics.Shader/Translation/Optimizations/Simplification.cs @@ -13,7 +13,6 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations switch (operation.Inst) { case Instruction.Add: - case Instruction.BitwiseExclusiveOr: TryEliminateBinaryOpCommutative(operation, 0); break; @@ -21,6 +20,13 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations TryEliminateBitwiseAnd(operation); break; + case Instruction.BitwiseExclusiveOr: + if (!TryEliminateXorSwap(operation)) + { + TryEliminateBinaryOpCommutative(operation, 0); + } + break; + case Instruction.BitwiseOr: TryEliminateBitwiseOr(operation); break; @@ -49,8 +55,9 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations private static void TryEliminateBitwiseAnd(Operation operation) { // Try to recognize and optimize those 3 patterns (in order): - // x & 0xFFFFFFFF == x, 0xFFFFFFFF & y == y, - // x & 0x00000000 == 0x00000000, 0x00000000 & y == 0x00000000 + // x & 0xFFFFFFFF == x, 0xFFFFFFFF & y == y, + // x & 0x00000000 == 0x00000000, 0x00000000 & y == 0x00000000 + Operand x = operation.GetSource(0); Operand y = operation.GetSource(1); @@ -68,11 +75,62 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations } } + private static bool TryEliminateXorSwap(Operation xCopyOp) + { + // Try to recognize XOR swap pattern: + // x = x ^ y + // y = x ^ y + // x = x ^ y + // Or, in SSA: + // x2 = x ^ y + // y2 = x2 ^ y + // x3 = x2 ^ y2 + // Transform it into something more sane: + // temp = y + // y = x + // x = temp + + // Note that because XOR is commutative, there are actually + // multiple possible combinations of this pattern, for + // simplicity this only catches one of them. + + Operand x = xCopyOp.GetSource(0); + Operand y = xCopyOp.GetSource(1); + + if (x.AsgOp is not Operation tCopyOp || tCopyOp.Inst != Instruction.BitwiseExclusiveOr || + y.AsgOp is not Operation yCopyOp || yCopyOp.Inst != Instruction.BitwiseExclusiveOr) + { + return false; + } + + if (tCopyOp == yCopyOp) + { + return false; + } + + if (yCopyOp.GetSource(0) != x || + yCopyOp.GetSource(1) != tCopyOp.GetSource(1) || + x.UseOps.Count != 2) + { + return false; + } + + x = tCopyOp.GetSource(0); + y = tCopyOp.GetSource(1); + + tCopyOp.TurnIntoCopy(y); // Temp = Y + yCopyOp.TurnIntoCopy(x); // Y = X + xCopyOp.TurnIntoCopy(tCopyOp.Dest); // X = Temp + + return true; + } + private static void TryEliminateBitwiseOr(Operation operation) { // Try to recognize and optimize those 3 patterns (in order): - // x | 0x00000000 == x, 0x00000000 | y == y, - // x | 0xFFFFFFFF == 0xFFFFFFFF, 0xFFFFFFFF | y == 0xFFFFFFFF + // x | 0x00000000 == x, 0x00000000 | y == y, + // x | 0xFFFFFFFF == 0xFFFFFFFF, 0xFFFFFFFF | y == 0xFFFFFFFF + Operand x = operation.GetSource(0); Operand y = operation.GetSource(1); diff --git a/src/Ryujinx.Graphics.Shader/Translation/Optimizations/Utils.cs b/src/Ryujinx.Graphics.Shader/Translation/Optimizations/Utils.cs index a0d58d07..ffbd16f8 100644 --- a/src/Ryujinx.Graphics.Shader/Translation/Optimizations/Utils.cs +++ b/src/Ryujinx.Graphics.Shader/Translation/Optimizations/Utils.cs @@ -1,4 +1,5 @@ using Ryujinx.Graphics.Shader.IntermediateRepresentation; +using System.Collections.Generic; namespace Ryujinx.Graphics.Shader.Translation.Optimizations { @@ -93,5 +94,17 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations return source; } + + public static void DeleteNode(LinkedListNode node, Operation operation) + { + node.List.Remove(node); + + for (int srcIndex = 0; srcIndex < operation.SourcesCount; srcIndex++) + { + operation.SetSource(srcIndex, null); + } + + operation.Dest = null; + } } } diff --git a/src/Ryujinx.Graphics.Shader/Translation/ResourceManager.cs b/src/Ryujinx.Graphics.Shader/Translation/ResourceManager.cs index a2cfbe22..2d19a5a7 100644 --- a/src/Ryujinx.Graphics.Shader/Translation/ResourceManager.cs +++ b/src/Ryujinx.Graphics.Shader/Translation/ResourceManager.cs @@ -14,6 +14,11 @@ namespace Ryujinx.Graphics.Shader.Translation private readonly string _stagePrefix; private readonly int[] _cbSlotToBindingMap; + private readonly int[] _sbSlotToBindingMap; + private uint _sbSlotWritten; + + private readonly Dictionary _sbSlots; + private readonly Dictionary _sbSlotsReverse; private readonly HashSet _usedConstantBufferBindings; @@ -26,7 +31,12 @@ namespace Ryujinx.Graphics.Shader.Translation _stagePrefix = GetShaderStagePrefix(stage); _cbSlotToBindingMap = new int[18]; + _sbSlotToBindingMap = new int[16]; _cbSlotToBindingMap.AsSpan().Fill(-1); + _sbSlotToBindingMap.AsSpan().Fill(-1); + + _sbSlots = new Dictionary(); + _sbSlotsReverse = new Dictionary(); _usedConstantBufferBindings = new HashSet(); @@ -47,6 +57,52 @@ namespace Ryujinx.Graphics.Shader.Translation return binding; } + public bool TryGetStorageBufferBinding(int sbCbSlot, int sbCbOffset, bool write, out int binding) + { + if (!TryGetSbSlot((byte)sbCbSlot, (ushort)sbCbOffset, out int slot)) + { + binding = 0; + return false; + } + + binding = _sbSlotToBindingMap[slot]; + + if (binding < 0) + { + binding = _gpuAccessor.QueryBindingStorageBuffer(slot); + _sbSlotToBindingMap[slot] = binding; + string slotNumber = slot.ToString(CultureInfo.InvariantCulture); + AddNewStorageBuffer(binding, $"{_stagePrefix}_s{slotNumber}"); + } + + if (write) + { + _sbSlotWritten |= 1u << slot; + } + + return true; + } + + private bool TryGetSbSlot(byte sbCbSlot, ushort sbCbOffset, out int slot) + { + int key = PackSbCbInfo(sbCbSlot, sbCbOffset); + + if (!_sbSlots.TryGetValue(key, out slot)) + { + slot = _sbSlots.Count; + + if (slot >= _sbSlotToBindingMap.Length) + { + return false; + } + + _sbSlots.Add(key, slot); + _sbSlotsReverse.Add(slot, key); + } + + return true; + } + public bool TryGetConstantBufferSlot(int binding, out int slot) { for (slot = 0; slot < _cbSlotToBindingMap.Length; slot++) @@ -90,6 +146,34 @@ namespace Ryujinx.Graphics.Shader.Translation return descriptors; } + public BufferDescriptor[] GetStorageBufferDescriptors() + { + var descriptors = new BufferDescriptor[_sbSlots.Count]; + + int descriptorIndex = 0; + + foreach ((int key, int slot) in _sbSlots) + { + int binding = _sbSlotToBindingMap[slot]; + + if (binding >= 0) + { + (int sbCbSlot, int sbCbOffset) = UnpackSbCbInfo(key); + descriptors[descriptorIndex++] = new BufferDescriptor(binding, slot, sbCbSlot, sbCbOffset) + { + Flags = (_sbSlotWritten & (1u << slot)) != 0 ? BufferUsageFlags.Write : BufferUsageFlags.None + }; + } + } + + if (descriptors.Length != descriptorIndex) + { + Array.Resize(ref descriptors, descriptorIndex); + } + + return descriptors; + } + private void AddNewConstantBuffer(int binding, string name) { StructureType type = new StructureType(new[] @@ -100,6 +184,16 @@ namespace Ryujinx.Graphics.Shader.Translation _properties.AddConstantBuffer(binding, new BufferDefinition(BufferLayout.Std140, 0, binding, name, type)); } + private void AddNewStorageBuffer(int binding, string name) + { + StructureType type = new StructureType(new[] + { + new StructureField(AggregateType.Array | AggregateType.U32, "data", 0) + }); + + _properties.AddStorageBuffer(binding, new BufferDefinition(BufferLayout.Std430, 1, binding, name, type)); + } + public static string GetShaderStagePrefix(ShaderStage stage) { uint index = (uint)stage; @@ -111,5 +205,15 @@ namespace Ryujinx.Graphics.Shader.Translation return _stagePrefixes[index]; } + + private static int PackSbCbInfo(int sbCbSlot, int sbCbOffset) + { + return sbCbOffset | ((int)sbCbSlot << 16); + } + + private static (int, int) UnpackSbCbInfo(int key) + { + return ((byte)(key >> 16), (ushort)key); + } } } \ No newline at end of file diff --git a/src/Ryujinx.Graphics.Shader/Translation/Rewriter.cs b/src/Ryujinx.Graphics.Shader/Translation/Rewriter.cs index 866ae522..baa88251 100644 --- a/src/Ryujinx.Graphics.Shader/Translation/Rewriter.cs +++ b/src/Ryujinx.Graphics.Shader/Translation/Rewriter.cs @@ -2,10 +2,8 @@ using Ryujinx.Graphics.Shader.IntermediateRepresentation; using Ryujinx.Graphics.Shader.StructuredIr; using System.Collections.Generic; using System.Linq; -using System.Numerics; using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper; -using static Ryujinx.Graphics.Shader.Translation.GlobalMemory; namespace Ryujinx.Graphics.Shader.Translation { @@ -23,11 +21,10 @@ namespace Ryujinx.Graphics.Shader.Translation { BasicBlock block = blocks[blkIndex]; - for (LinkedListNode node = block.Operations.First; node != null;) + for (LinkedListNode node = block.Operations.First; node != null; node = node.Next) { if (node.Value is not Operation operation) { - node = node.Next; continue; } @@ -56,8 +53,6 @@ namespace Ryujinx.Graphics.Shader.Translation InsertVectorComponentSelect(node, config); } - LinkedListNode nextNode = node.Next; - if (operation is TextureOperation texOp) { node = InsertTexelFetchScale(hfm, node, config); @@ -74,15 +69,7 @@ namespace Ryujinx.Graphics.Shader.Translation node = InsertSnormNormalization(node, config); } } - - nextNode = node.Next; } - else if (UsesGlobalMemory(operation.Inst, operation.StorageKind)) - { - nextNode = RewriteGlobalAccess(node, config)?.Next ?? nextNode; - } - - node = nextNode; } } } @@ -184,196 +171,6 @@ namespace Ryujinx.Graphics.Shader.Translation operation.TurnIntoCopy(result); } - private static LinkedListNode RewriteGlobalAccess(LinkedListNode node, ShaderConfig config) - { - Operation operation = (Operation)node.Value; - - bool isAtomic = operation.Inst.IsAtomic(); - bool isStg16Or8 = operation.Inst == Instruction.StoreGlobal16 || operation.Inst == Instruction.StoreGlobal8; - bool isWrite = isAtomic || operation.Inst == Instruction.StoreGlobal || isStg16Or8; - - Operation storageOp = null; - - Operand PrependOperation(Instruction inst, params Operand[] sources) - { - Operand local = Local(); - - node.List.AddBefore(node, new Operation(inst, local, sources)); - - return local; - } - - Operand PrependStorageOperation(Instruction inst, StorageKind storageKind, params Operand[] sources) - { - Operand local = Local(); - - node.List.AddBefore(node, new Operation(inst, storageKind, local, sources)); - - return local; - } - - Operand PrependExistingOperation(Operation operation) - { - Operand local = Local(); - - operation.Dest = local; - node.List.AddBefore(node, operation); - - return local; - } - - Operand addrLow = operation.GetSource(0); - Operand addrHigh = operation.GetSource(1); - - Operand sbBaseAddrLow = Const(0); - Operand sbSlot = Const(0); - - Operand alignMask = Const(-config.GpuAccessor.QueryHostStorageBufferOffsetAlignment()); - - Operand BindingRangeCheck(int cbOffset, out Operand baseAddrLow) - { - baseAddrLow = Cbuf(DriverReservedCb, cbOffset); - Operand baseAddrHigh = Cbuf(DriverReservedCb, cbOffset + 1); - Operand size = Cbuf(DriverReservedCb, cbOffset + 2); - - Operand offset = PrependOperation(Instruction.Subtract, addrLow, baseAddrLow); - Operand borrow = PrependOperation(Instruction.CompareLessU32, addrLow, baseAddrLow); - - Operand inRangeLow = PrependOperation(Instruction.CompareLessU32, offset, size); - - Operand addrHighBorrowed = PrependOperation(Instruction.Add, addrHigh, borrow); - - Operand inRangeHigh = PrependOperation(Instruction.CompareEqual, addrHighBorrowed, baseAddrHigh); - - return PrependOperation(Instruction.BitwiseAnd, inRangeLow, inRangeHigh); - } - - int sbUseMask = config.AccessibleStorageBuffersMask; - - while (sbUseMask != 0) - { - int slot = BitOperations.TrailingZeroCount(sbUseMask); - - sbUseMask &= ~(1 << slot); - - int cbOffset = GetStorageCbOffset(config.Stage, slot); - slot = config.GetSbSlot(DriverReservedCb, (ushort)cbOffset); - - config.SetUsedStorageBuffer(slot, isWrite); - - Operand inRange = BindingRangeCheck(cbOffset, out Operand baseAddrLow); - - sbBaseAddrLow = PrependOperation(Instruction.ConditionalSelect, inRange, baseAddrLow, sbBaseAddrLow); - sbSlot = PrependOperation(Instruction.ConditionalSelect, inRange, Const(slot), sbSlot); - } - - if (config.AccessibleStorageBuffersMask != 0) - { - Operand baseAddrTrunc = PrependOperation(Instruction.BitwiseAnd, sbBaseAddrLow, alignMask); - Operand byteOffset = PrependOperation(Instruction.Subtract, addrLow, baseAddrTrunc); - - Operand[] sources = new Operand[operation.SourcesCount]; - - sources[0] = sbSlot; - - if (isStg16Or8) - { - sources[1] = byteOffset; - } - else - { - sources[1] = PrependOperation(Instruction.ShiftRightU32, byteOffset, Const(2)); - } - - for (int index = 2; index < operation.SourcesCount; index++) - { - sources[index] = operation.GetSource(index); - } - - if (isAtomic) - { - storageOp = new Operation(operation.Inst, StorageKind.StorageBuffer, operation.Dest, sources); - } - else if (operation.Inst == Instruction.LoadGlobal) - { - storageOp = new Operation(Instruction.LoadStorage, operation.Dest, sources); - } - else - { - Instruction storeInst = operation.Inst switch - { - Instruction.StoreGlobal16 => Instruction.StoreStorage16, - Instruction.StoreGlobal8 => Instruction.StoreStorage8, - _ => Instruction.StoreStorage - }; - - storageOp = new Operation(storeInst, null, sources); - } - } - else if (operation.Dest != null) - { - storageOp = new Operation(Instruction.Copy, operation.Dest, Const(0)); - } - - if (operation.Inst == Instruction.LoadGlobal) - { - int cbeUseMask = config.AccessibleConstantBuffersMask; - - while (cbeUseMask != 0) - { - int slot = BitOperations.TrailingZeroCount(cbeUseMask); - int cbSlot = UbeFirstCbuf + slot; - - cbeUseMask &= ~(1 << slot); - - Operand previousResult = PrependExistingOperation(storageOp); - - int cbOffset = GetConstantUbeOffset(slot); - - Operand inRange = BindingRangeCheck(cbOffset, out Operand baseAddrLow); - - Operand baseAddrTruncConst = PrependOperation(Instruction.BitwiseAnd, baseAddrLow, alignMask); - Operand byteOffsetConst = PrependOperation(Instruction.Subtract, addrLow, baseAddrTruncConst); - - Operand cbIndex = PrependOperation(Instruction.ShiftRightU32, byteOffsetConst, Const(2)); - Operand vecIndex = PrependOperation(Instruction.ShiftRightU32, cbIndex, Const(2)); - Operand elemIndex = PrependOperation(Instruction.BitwiseAnd, cbIndex, Const(3)); - - Operand[] sourcesCb = new Operand[4]; - - sourcesCb[0] = Const(config.ResourceManager.GetConstantBufferBinding(cbSlot)); - sourcesCb[1] = Const(0); - sourcesCb[2] = vecIndex; - sourcesCb[3] = elemIndex; - - Operand ldcResult = PrependStorageOperation(Instruction.Load, StorageKind.ConstantBuffer, sourcesCb); - - storageOp = new Operation(Instruction.ConditionalSelect, operation.Dest, inRange, ldcResult, previousResult); - } - } - - for (int index = 0; index < operation.SourcesCount; index++) - { - operation.SetSource(index, null); - } - - LinkedListNode oldNode = node; - LinkedList oldNodeList = oldNode.List; - - if (storageOp != null) - { - node = node.List.AddBefore(node, storageOp); - } - else - { - node = null; - } - - oldNodeList.Remove(oldNode); - - return node; - } - private static LinkedListNode InsertTexelFetchScale(HelperFunctionManager hfm, LinkedListNode node, ShaderConfig config) { TextureOperation texOp = (TextureOperation)node.Value; diff --git a/src/Ryujinx.Graphics.Shader/Translation/ShaderConfig.cs b/src/Ryujinx.Graphics.Shader/Translation/ShaderConfig.cs index 40a32e2d..5c0a1fb6 100644 --- a/src/Ryujinx.Graphics.Shader/Translation/ShaderConfig.cs +++ b/src/Ryujinx.Graphics.Shader/Translation/ShaderConfig.cs @@ -110,12 +110,6 @@ namespace Ryujinx.Graphics.Shader.Translation public UInt128 NextInputAttributesComponents { get; private set; } public UInt128 ThisInputAttributesComponents { get; private set; } - public int AccessibleStorageBuffersMask { get; private set; } - public int AccessibleConstantBuffersMask { get; private set; } - - private int _usedStorageBuffers; - private int _usedStorageBuffersWrite; - private readonly record struct TextureInfo(int CbufSlot, int Handle, bool Indexed, TextureFormat Format); private struct TextureMeta @@ -127,18 +121,9 @@ namespace Ryujinx.Graphics.Shader.Translation private readonly Dictionary _usedTextures; private readonly Dictionary _usedImages; - - private readonly Dictionary _sbSlots; - private readonly Dictionary _sbSlotsReverse; - - private BufferDescriptor[] _cachedStorageBufferDescriptors; private TextureDescriptor[] _cachedTextureDescriptors; private TextureDescriptor[] _cachedImageDescriptors; - private int _firstStorageBufferBinding; - - public int FirstStorageBufferBinding => _firstStorageBufferBinding; - public ShaderConfig(ShaderStage stage, IGpuAccessor gpuAccessor, TranslationOptions options) { Stage = stage; @@ -147,18 +132,12 @@ namespace Ryujinx.Graphics.Shader.Translation _transformFeedbackDefinitions = new Dictionary(); - AccessibleStorageBuffersMask = (1 << GlobalMemory.StorageMaxCount) - 1; - AccessibleConstantBuffersMask = (1 << GlobalMemory.UbeMaxCount) - 1; - UsedInputAttributesPerPatch = new HashSet(); UsedOutputAttributesPerPatch = new HashSet(); _usedTextures = new Dictionary(); _usedImages = new Dictionary(); - _sbSlots = new Dictionary(); - _sbSlotsReverse = new Dictionary(); - ResourceManager = new ResourceManager(stage, gpuAccessor, new ShaderProperties()); } @@ -173,11 +152,6 @@ namespace Ryujinx.Graphics.Shader.Translation OutputTopology = outputTopology; MaxOutputVertices = maxOutputVertices; TransformFeedbackEnabled = gpuAccessor.QueryTransformFeedbackEnabled(); - - if (Stage != ShaderStage.Compute) - { - AccessibleConstantBuffersMask = 0; - } } public ShaderConfig(ShaderHeader header, IGpuAccessor gpuAccessor, TranslationOptions options) : this(header.Stage, gpuAccessor, options) @@ -433,8 +407,6 @@ namespace Ryujinx.Graphics.Shader.Translation UsedInputAttributes |= other.UsedInputAttributes; UsedOutputAttributes |= other.UsedOutputAttributes; - _usedStorageBuffers |= other._usedStorageBuffers; - _usedStorageBuffersWrite |= other._usedStorageBuffersWrite; foreach (var kv in other._usedTextures) { @@ -634,23 +606,6 @@ namespace Ryujinx.Graphics.Shader.Translation UsedFeatures |= flags; } - public void SetAccessibleBufferMasks(int sbMask, int ubeMask) - { - AccessibleStorageBuffersMask = sbMask; - AccessibleConstantBuffersMask = ubeMask; - } - - public void SetUsedStorageBuffer(int slot, bool write) - { - int mask = 1 << slot; - _usedStorageBuffers |= mask; - - if (write) - { - _usedStorageBuffersWrite |= mask; - } - } - public void SetUsedTexture( Instruction inst, SamplerType type, @@ -756,76 +711,6 @@ namespace Ryujinx.Graphics.Shader.Translation return meta; } - public BufferDescriptor[] GetStorageBufferDescriptors() - { - if (_cachedStorageBufferDescriptors != null) - { - return _cachedStorageBufferDescriptors; - } - - return _cachedStorageBufferDescriptors = GetStorageBufferDescriptors( - _usedStorageBuffers, - _usedStorageBuffersWrite, - true, - out _firstStorageBufferBinding, - GpuAccessor.QueryBindingStorageBuffer); - } - - private BufferDescriptor[] GetStorageBufferDescriptors( - int usedMask, - int writtenMask, - bool isArray, - out int firstBinding, - Func getBindingCallback) - { - firstBinding = 0; - bool hasFirstBinding = false; - var descriptors = new BufferDescriptor[BitOperations.PopCount((uint)usedMask)]; - - int lastSlot = -1; - - for (int i = 0; i < descriptors.Length; i++) - { - int slot = BitOperations.TrailingZeroCount(usedMask); - - if (isArray) - { - // The next array entries also consumes bindings, even if they are unused. - for (int j = lastSlot + 1; j < slot; j++) - { - int binding = getBindingCallback(j); - - if (!hasFirstBinding) - { - firstBinding = binding; - hasFirstBinding = true; - } - } - } - - lastSlot = slot; - - (int sbCbSlot, int sbCbOffset) = GetSbCbInfo(slot); - - descriptors[i] = new BufferDescriptor(getBindingCallback(slot), slot, sbCbSlot, sbCbOffset); - - if (!hasFirstBinding) - { - firstBinding = descriptors[i].Binding; - hasFirstBinding = true; - } - - if ((writtenMask & (1 << slot)) != 0) - { - descriptors[i].SetFlag(BufferUsageFlags.Write); - } - - usedMask &= ~(1 << slot); - } - - return descriptors; - } - public TextureDescriptor[] GetTextureDescriptors() { return _cachedTextureDescriptors ??= GetTextureOrImageDescriptors(_usedTextures, GpuAccessor.QueryBindingTexture); @@ -922,45 +807,11 @@ namespace Ryujinx.Graphics.Shader.Translation return FindDescriptorIndex(GetImageDescriptors(), texOp); } - public int GetSbSlot(byte sbCbSlot, ushort sbCbOffset) - { - int key = PackSbCbInfo(sbCbSlot, sbCbOffset); - - if (!_sbSlots.TryGetValue(key, out int slot)) - { - slot = _sbSlots.Count; - _sbSlots.Add(key, slot); - _sbSlotsReverse.Add(slot, key); - } - - return slot; - } - - public (int, int) GetSbCbInfo(int slot) - { - if (_sbSlotsReverse.TryGetValue(slot, out int key)) - { - return UnpackSbCbInfo(key); - } - - throw new ArgumentException($"Invalid slot {slot}.", nameof(slot)); - } - - private static int PackSbCbInfo(int sbCbSlot, int sbCbOffset) - { - return sbCbOffset | ((int)sbCbSlot << 16); - } - - private static (int, int) UnpackSbCbInfo(int key) - { - return ((byte)(key >> 16), (ushort)key); - } - public ShaderProgramInfo CreateProgramInfo(ShaderIdentification identification = ShaderIdentification.None) { return new ShaderProgramInfo( ResourceManager.GetConstantBufferDescriptors(), - GetStorageBufferDescriptors(), + ResourceManager.GetStorageBufferDescriptors(), GetTextureDescriptors(), GetImageDescriptors(), identification, diff --git a/src/Ryujinx.Graphics.Shader/Translation/ShaderIdentifier.cs b/src/Ryujinx.Graphics.Shader/Translation/ShaderIdentifier.cs index 867e2437..68400437 100644 --- a/src/Ryujinx.Graphics.Shader/Translation/ShaderIdentifier.cs +++ b/src/Ryujinx.Graphics.Shader/Translation/ShaderIdentifier.cs @@ -48,7 +48,7 @@ namespace Ryujinx.Graphics.Shader.Translation continue; } - if (IsResourceWrite(operation.Inst)) + if (IsResourceWrite(operation.Inst, operation.StorageKind)) { return false; } @@ -154,7 +154,7 @@ namespace Ryujinx.Graphics.Shader.Translation return totalVerticesCount + verticesCount == 3 && writesLayer; } - private static bool IsResourceWrite(Instruction inst) + private static bool IsResourceWrite(Instruction inst, StorageKind storageKind) { switch (inst) { @@ -170,13 +170,11 @@ namespace Ryujinx.Graphics.Shader.Translation case Instruction.AtomicXor: case Instruction.ImageAtomic: case Instruction.ImageStore: - case Instruction.StoreGlobal: - case Instruction.StoreGlobal16: - case Instruction.StoreGlobal8: - case Instruction.StoreStorage: - case Instruction.StoreStorage16: - case Instruction.StoreStorage8: return true; + case Instruction.Store: + return storageKind == StorageKind.StorageBuffer || + storageKind == StorageKind.SharedMemory || + storageKind == StorageKind.LocalMemory; } return false; diff --git a/src/Ryujinx.Graphics.Shader/Translation/Translator.cs b/src/Ryujinx.Graphics.Shader/Translation/Translator.cs index 5bbc0009..c0212a5b 100644 --- a/src/Ryujinx.Graphics.Shader/Translation/Translator.cs +++ b/src/Ryujinx.Graphics.Shader/Translation/Translator.cs @@ -78,7 +78,7 @@ namespace Ryujinx.Graphics.Shader.Translation Ssa.Rename(cfg.Blocks); - Optimizer.RunPass(cfg.Blocks, config); + Optimizer.RunPass(hfm, cfg.Blocks, config); Rewriter.RunPass(hfm, cfg.Blocks, config); } diff --git a/src/Ryujinx.Graphics.Vulkan/BufferManager.cs b/src/Ryujinx.Graphics.Vulkan/BufferManager.cs index e046bf89..521a132a 100644 --- a/src/Ryujinx.Graphics.Vulkan/BufferManager.cs +++ b/src/Ryujinx.Graphics.Vulkan/BufferManager.cs @@ -115,8 +115,6 @@ namespace Ryujinx.Graphics.Vulkan holder = Create(gd, size, baseType: baseType, storageHint: storageHint); if (holder == null) { - Logger.Error?.Print(LogClass.Gpu, $"Failed to create buffer with size 0x{size:X} and type \"{baseType}\"."); - return BufferHandle.Null; } @@ -264,6 +262,8 @@ namespace Ryujinx.Graphics.Vulkan return holder; } + Logger.Error?.Print(LogClass.Gpu, $"Failed to create buffer with size 0x{size:X} and type \"{baseType}\"."); + return null; } diff --git a/src/Ryujinx.Graphics.Vulkan/DescriptorSetCollection.cs b/src/Ryujinx.Graphics.Vulkan/DescriptorSetCollection.cs index c57cb1a9..70b3ebfe 100644 --- a/src/Ryujinx.Graphics.Vulkan/DescriptorSetCollection.cs +++ b/src/Ryujinx.Graphics.Vulkan/DescriptorSetCollection.cs @@ -70,30 +70,6 @@ namespace Ryujinx.Graphics.Vulkan } } - public unsafe void UpdateStorageBuffers(int setIndex, int baseBinding, ReadOnlySpan bufferInfo) - { - if (bufferInfo.Length == 0) - { - return; - } - - fixed (DescriptorBufferInfo* pBufferInfo = bufferInfo) - { - var writeDescriptorSet = new WriteDescriptorSet - { - SType = StructureType.WriteDescriptorSet, - DstSet = _descriptorSets[setIndex], - DstBinding = (uint)(baseBinding & ~(Constants.MaxStorageBuffersPerStage - 1)), - DstArrayElement = (uint)(baseBinding & (Constants.MaxStorageBuffersPerStage - 1)), - DescriptorType = DescriptorType.StorageBuffer, - DescriptorCount = (uint)bufferInfo.Length, - PBufferInfo = pBufferInfo - }; - - _holder.Api.UpdateDescriptorSets(_holder.Device, 1, writeDescriptorSet, 0, null); - } - } - public unsafe void UpdateImage(int setIndex, int bindingIndex, DescriptorImageInfo imageInfo, DescriptorType type) { if (imageInfo.ImageView.Handle != 0UL) diff --git a/src/Ryujinx.Graphics.Vulkan/DescriptorSetUpdater.cs b/src/Ryujinx.Graphics.Vulkan/DescriptorSetUpdater.cs index f3ac36e1..cbac1cd4 100644 --- a/src/Ryujinx.Graphics.Vulkan/DescriptorSetUpdater.cs +++ b/src/Ryujinx.Graphics.Vulkan/DescriptorSetUpdater.cs @@ -448,14 +448,7 @@ namespace Ryujinx.Graphics.Vulkan } ReadOnlySpan storageBuffers = _storageBuffers; - if (program.HasMinimalLayout) - { - dsc.UpdateBuffers(0, binding, storageBuffers.Slice(binding, count), DescriptorType.StorageBuffer); - } - else - { - dsc.UpdateStorageBuffers(0, binding, storageBuffers.Slice(binding, count)); - } + dsc.UpdateBuffers(0, binding, storageBuffers.Slice(binding, count), DescriptorType.StorageBuffer); } else if (setIndex == PipelineBase.TextureSetIndex) {