ryujinx-mirror/Ryujinx.Graphics.Gpu/Engine/MME/MacroJitCompiler.cs
gdkchan 40b21cc3c4
Separate GPU engines (part 2/2) (#2440)
* 3D engine now uses DeviceState too, plus new state modification tracking

* Remove old methods code

* Remove GpuState and friends

* Optimize DeviceState, force inline some functions

* This change was not supposed to go in

* Proper channel initialization

* Optimize state read/write methods even more

* Fix debug build

* Do not dirty state if the write is redundant

* The YControl register should dirty either the viewport or front face state too, to update the host origin

* Avoid redundant vertex buffer updates

* Move state and get rid of the Ryujinx.Graphics.Gpu.State namespace

* Comments and nits

* Fix rebase

* PR feedback

* Move changed = false to improve codegen

* PR feedback

* Carry RyuJIT a bit more
2021-07-11 17:20:40 -03:00

517 lines
20 KiB
C#

using Ryujinx.Graphics.Device;
using System;
using System.Collections.Generic;
using System.Reflection.Emit;
namespace Ryujinx.Graphics.Gpu.Engine.MME
{
/// <summary>
/// Represents a Macro Just-in-Time compiler.
/// </summary>R
class MacroJitCompiler
{
private readonly DynamicMethod _meth;
private readonly ILGenerator _ilGen;
private readonly LocalBuilder[] _gprs;
private readonly LocalBuilder _carry;
private readonly LocalBuilder _methAddr;
private readonly LocalBuilder _methIncr;
/// <summary>
/// Creates a new instance of the Macro Just-in-Time compiler.
/// </summary>
public MacroJitCompiler()
{
_meth = new DynamicMethod("Macro", typeof(void), new Type[] { typeof(MacroJitContext), typeof(IDeviceState), typeof(int) });
_ilGen = _meth.GetILGenerator();
_gprs = new LocalBuilder[8];
for (int i = 1; i < 8; i++)
{
_gprs[i] = _ilGen.DeclareLocal(typeof(int));
}
_carry = _ilGen.DeclareLocal(typeof(int));
_methAddr = _ilGen.DeclareLocal(typeof(int));
_methIncr = _ilGen.DeclareLocal(typeof(int));
_ilGen.Emit(OpCodes.Ldarg_2);
_ilGen.Emit(OpCodes.Stloc, _gprs[1]);
}
public delegate void MacroExecute(MacroJitContext context, IDeviceState state, int arg0);
/// <summary>
/// Translates a new piece of GPU Macro code into host executable code.
/// </summary>
/// <param name="code">Code to be translated</param>
/// <returns>Delegate of the host compiled code</returns>
public MacroExecute Compile(ReadOnlySpan<int> code)
{
Dictionary<int, Label> labels = new Dictionary<int, Label>();
int lastTarget = 0;
int i;
// Collect all branch targets.
for (i = 0; i < code.Length; i++)
{
int opCode = code[i];
if ((opCode & 7) == 7)
{
int target = i + (opCode >> 14);
if (!labels.ContainsKey(target))
{
labels.Add(target, _ilGen.DefineLabel());
}
if (lastTarget < target)
{
lastTarget = target;
}
}
bool exit = (opCode & 0x80) != 0;
if (exit && i >= lastTarget)
{
break;
}
}
// Code generation.
for (i = 0; i < code.Length; i++)
{
if (labels.TryGetValue(i, out Label label))
{
_ilGen.MarkLabel(label);
}
Emit(code, i, labels);
int opCode = code[i];
bool exit = (opCode & 0x80) != 0;
if (exit)
{
Emit(code, i + 1, labels);
_ilGen.Emit(OpCodes.Ret);
if (i >= lastTarget)
{
break;
}
}
}
if (i == code.Length)
{
_ilGen.Emit(OpCodes.Ret);
}
return (MacroExecute)_meth.CreateDelegate(typeof(MacroExecute));
}
/// <summary>
/// Emits IL equivalent to the Macro instruction at a given offset.
/// </summary>
/// <param name="code">GPU Macro code</param>
/// <param name="offset">Offset, in words, where the instruction is located</param>
/// <param name="labels">Labels for Macro branch targets, used by branch instructions</param>
private void Emit(ReadOnlySpan<int> code, int offset, Dictionary<int, Label> labels)
{
int opCode = code[offset];
if ((opCode & 7) < 7)
{
// Operation produces a value.
AssignmentOperation asgOp = (AssignmentOperation)((opCode >> 4) & 7);
EmitAluOp(opCode);
switch (asgOp)
{
// Fetch parameter and ignore result.
case AssignmentOperation.IgnoreAndFetch:
_ilGen.Emit(OpCodes.Pop);
EmitFetchParam();
EmitStoreDstGpr(opCode);
break;
// Move result.
case AssignmentOperation.Move:
EmitStoreDstGpr(opCode);
break;
// Move result and use as Method Address.
case AssignmentOperation.MoveAndSetMaddr:
_ilGen.Emit(OpCodes.Dup);
EmitStoreDstGpr(opCode);
EmitStoreMethAddr();
break;
// Fetch parameter and send result.
case AssignmentOperation.FetchAndSend:
EmitFetchParam();
EmitStoreDstGpr(opCode);
EmitSend();
break;
// Move and send result.
case AssignmentOperation.MoveAndSend:
_ilGen.Emit(OpCodes.Dup);
EmitStoreDstGpr(opCode);
EmitSend();
break;
// Fetch parameter and use result as Method Address.
case AssignmentOperation.FetchAndSetMaddr:
EmitFetchParam();
EmitStoreDstGpr(opCode);
EmitStoreMethAddr();
break;
// Move result and use as Method Address, then fetch and send parameter.
case AssignmentOperation.MoveAndSetMaddrThenFetchAndSend:
_ilGen.Emit(OpCodes.Dup);
EmitStoreDstGpr(opCode);
EmitStoreMethAddr();
EmitFetchParam();
EmitSend();
break;
// Move result and use as Method Address, then send bits 17:12 of result.
case AssignmentOperation.MoveAndSetMaddrThenSendHigh:
_ilGen.Emit(OpCodes.Dup);
_ilGen.Emit(OpCodes.Dup);
EmitStoreDstGpr(opCode);
EmitStoreMethAddr();
_ilGen.Emit(OpCodes.Ldc_I4, 12);
_ilGen.Emit(OpCodes.Shr_Un);
_ilGen.Emit(OpCodes.Ldc_I4, 0x3f);
_ilGen.Emit(OpCodes.And);
EmitSend();
break;
}
}
else
{
// Branch.
bool onNotZero = ((opCode >> 4) & 1) != 0;
EmitLoadGprA(opCode);
Label lblSkip = _ilGen.DefineLabel();
if (onNotZero)
{
_ilGen.Emit(OpCodes.Brfalse, lblSkip);
}
else
{
_ilGen.Emit(OpCodes.Brtrue, lblSkip);
}
bool noDelays = (opCode & 0x20) != 0;
if (!noDelays)
{
Emit(code, offset + 1, labels);
}
int target = offset + (opCode >> 14);
_ilGen.Emit(OpCodes.Br, labels[target]);
_ilGen.MarkLabel(lblSkip);
}
}
/// <summary>
/// Emits IL for a Arithmetic and Logic Unit instruction.
/// </summary>
/// <param name="opCode">Instruction to be translated</param>
/// <exception cref="InvalidOperationException">Throw when the instruction encoding is invalid</exception>
private void EmitAluOp(int opCode)
{
AluOperation op = (AluOperation)(opCode & 7);
switch (op)
{
case AluOperation.AluReg:
EmitAluOp((AluRegOperation)((opCode >> 17) & 0x1f), opCode);
break;
case AluOperation.AddImmediate:
EmitLoadGprA(opCode);
EmitLoadImm(opCode);
_ilGen.Emit(OpCodes.Add);
break;
case AluOperation.BitfieldReplace:
case AluOperation.BitfieldExtractLslImm:
case AluOperation.BitfieldExtractLslReg:
int bfSrcBit = (opCode >> 17) & 0x1f;
int bfSize = (opCode >> 22) & 0x1f;
int bfDstBit = (opCode >> 27) & 0x1f;
int bfMask = (1 << bfSize) - 1;
switch (op)
{
case AluOperation.BitfieldReplace:
EmitLoadGprB(opCode);
_ilGen.Emit(OpCodes.Ldc_I4, bfSrcBit);
_ilGen.Emit(OpCodes.Shr_Un);
_ilGen.Emit(OpCodes.Ldc_I4, bfMask);
_ilGen.Emit(OpCodes.And);
_ilGen.Emit(OpCodes.Ldc_I4, bfDstBit);
_ilGen.Emit(OpCodes.Shl);
EmitLoadGprA(opCode);
_ilGen.Emit(OpCodes.Ldc_I4, ~(bfMask << bfDstBit));
_ilGen.Emit(OpCodes.And);
_ilGen.Emit(OpCodes.Or);
break;
case AluOperation.BitfieldExtractLslImm:
EmitLoadGprB(opCode);
EmitLoadGprA(opCode);
_ilGen.Emit(OpCodes.Shr_Un);
_ilGen.Emit(OpCodes.Ldc_I4, bfMask);
_ilGen.Emit(OpCodes.And);
_ilGen.Emit(OpCodes.Ldc_I4, bfDstBit);
_ilGen.Emit(OpCodes.Shl);
break;
case AluOperation.BitfieldExtractLslReg:
EmitLoadGprB(opCode);
_ilGen.Emit(OpCodes.Ldc_I4, bfSrcBit);
_ilGen.Emit(OpCodes.Shr_Un);
_ilGen.Emit(OpCodes.Ldc_I4, bfMask);
_ilGen.Emit(OpCodes.And);
EmitLoadGprA(opCode);
_ilGen.Emit(OpCodes.Shl);
break;
}
break;
case AluOperation.ReadImmediate:
_ilGen.Emit(OpCodes.Ldarg_1);
EmitLoadGprA(opCode);
EmitLoadImm(opCode);
_ilGen.Emit(OpCodes.Add);
_ilGen.Emit(OpCodes.Call, typeof(MacroJitContext).GetMethod(nameof(MacroJitContext.Read)));
break;
default:
throw new InvalidOperationException($"Invalid operation \"{op}\" on instruction 0x{opCode:X8}.");
}
}
/// <summary>
/// Emits IL for a binary Arithmetic and Logic Unit instruction.
/// </summary>
/// <param name="aluOp">Arithmetic and Logic Unit instruction</param>
/// <param name="opCode">Raw instruction</param>
/// <exception cref="InvalidOperationException">Throw when the instruction encoding is invalid</exception>
private void EmitAluOp(AluRegOperation aluOp, int opCode)
{
switch (aluOp)
{
case AluRegOperation.Add:
EmitLoadGprA(opCode);
_ilGen.Emit(OpCodes.Conv_U8);
EmitLoadGprB(opCode);
_ilGen.Emit(OpCodes.Conv_U8);
_ilGen.Emit(OpCodes.Add);
_ilGen.Emit(OpCodes.Dup);
_ilGen.Emit(OpCodes.Ldc_I8, 0xffffffffL);
_ilGen.Emit(OpCodes.Cgt_Un);
_ilGen.Emit(OpCodes.Stloc, _carry);
_ilGen.Emit(OpCodes.Conv_U4);
break;
case AluRegOperation.AddWithCarry:
EmitLoadGprA(opCode);
_ilGen.Emit(OpCodes.Conv_U8);
EmitLoadGprB(opCode);
_ilGen.Emit(OpCodes.Conv_U8);
_ilGen.Emit(OpCodes.Ldloc_S, _carry);
_ilGen.Emit(OpCodes.Conv_U8);
_ilGen.Emit(OpCodes.Add);
_ilGen.Emit(OpCodes.Add);
_ilGen.Emit(OpCodes.Dup);
_ilGen.Emit(OpCodes.Ldc_I8, 0xffffffffL);
_ilGen.Emit(OpCodes.Cgt_Un);
_ilGen.Emit(OpCodes.Stloc, _carry);
_ilGen.Emit(OpCodes.Conv_U4);
break;
case AluRegOperation.Subtract:
EmitLoadGprA(opCode);
_ilGen.Emit(OpCodes.Conv_U8);
EmitLoadGprB(opCode);
_ilGen.Emit(OpCodes.Conv_U8);
_ilGen.Emit(OpCodes.Sub);
_ilGen.Emit(OpCodes.Dup);
_ilGen.Emit(OpCodes.Ldc_I8, 0x100000000L);
_ilGen.Emit(OpCodes.Clt_Un);
_ilGen.Emit(OpCodes.Stloc, _carry);
_ilGen.Emit(OpCodes.Conv_U4);
break;
case AluRegOperation.SubtractWithBorrow:
EmitLoadGprA(opCode);
_ilGen.Emit(OpCodes.Conv_U8);
EmitLoadGprB(opCode);
_ilGen.Emit(OpCodes.Conv_U8);
_ilGen.Emit(OpCodes.Ldc_I4_1);
_ilGen.Emit(OpCodes.Ldloc_S, _carry);
_ilGen.Emit(OpCodes.Sub);
_ilGen.Emit(OpCodes.Conv_U8);
_ilGen.Emit(OpCodes.Sub);
_ilGen.Emit(OpCodes.Sub);
_ilGen.Emit(OpCodes.Dup);
_ilGen.Emit(OpCodes.Ldc_I8, 0x100000000L);
_ilGen.Emit(OpCodes.Clt_Un);
_ilGen.Emit(OpCodes.Stloc, _carry);
_ilGen.Emit(OpCodes.Conv_U4);
break;
case AluRegOperation.BitwiseExclusiveOr:
EmitLoadGprA(opCode);
EmitLoadGprB(opCode);
_ilGen.Emit(OpCodes.Xor);
break;
case AluRegOperation.BitwiseOr:
EmitLoadGprA(opCode);
EmitLoadGprB(opCode);
_ilGen.Emit(OpCodes.Or);
break;
case AluRegOperation.BitwiseAnd:
EmitLoadGprA(opCode);
EmitLoadGprB(opCode);
_ilGen.Emit(OpCodes.And);
break;
case AluRegOperation.BitwiseAndNot:
EmitLoadGprA(opCode);
EmitLoadGprB(opCode);
_ilGen.Emit(OpCodes.Not);
_ilGen.Emit(OpCodes.And);
break;
case AluRegOperation.BitwiseNotAnd:
EmitLoadGprA(opCode);
EmitLoadGprB(opCode);
_ilGen.Emit(OpCodes.And);
_ilGen.Emit(OpCodes.Not);
break;
default:
throw new InvalidOperationException($"Invalid operation \"{aluOp}\" on instruction 0x{opCode:X8}.");
}
}
/// <summary>
/// Loads a immediate value on the IL evaluation stack.
/// </summary>
/// <param name="opCode">Instruction from where the immediate should be extracted</param>
private void EmitLoadImm(int opCode)
{
// Note: The immediate is signed, the sign-extension is intended here.
_ilGen.Emit(OpCodes.Ldc_I4, opCode >> 14);
}
/// <summary>
/// Loads a value from the General Purpose register specified as first operand on the IL evaluation stack.
/// </summary>
/// <param name="opCode">Instruction from where the register number should be extracted</param>
private void EmitLoadGprA(int opCode)
{
EmitLoadGpr((opCode >> 11) & 7);
}
/// <summary>
/// Loads a value from the General Purpose register specified as second operand on the IL evaluation stack.
/// </summary>
/// <param name="opCode">Instruction from where the register number should be extracted</param>
private void EmitLoadGprB(int opCode)
{
EmitLoadGpr((opCode >> 14) & 7);
}
/// <summary>
/// Loads a value a General Purpose register on the IL evaluation stack.
/// </summary>
/// <remarks>
/// Register number 0 has a hardcoded value of 0.
/// </remarks>
/// <param name="index">Register number</param>
private void EmitLoadGpr(int index)
{
if (index == 0)
{
_ilGen.Emit(OpCodes.Ldc_I4_0);
}
else
{
_ilGen.Emit(OpCodes.Ldloc_S, _gprs[index]);
}
}
/// <summary>
/// Emits a call to the method that fetches an argument from the arguments FIFO.
/// The argument is pushed into the IL evaluation stack.
/// </summary>
private void EmitFetchParam()
{
_ilGen.Emit(OpCodes.Ldarg_0);
_ilGen.Emit(OpCodes.Call, typeof(MacroJitContext).GetMethod(nameof(MacroJitContext.FetchParam)));
}
/// <summary>
/// Stores the value on the top of the IL evaluation stack into a General Purpose register.
/// </summary>
/// <remarks>
/// Register number 0 does not exist, reads are hardcoded to 0, and writes are simply discarded.
/// </remarks>
/// <param name="opCode">Instruction from where the register number should be extracted</param>
private void EmitStoreDstGpr(int opCode)
{
int index = (opCode >> 8) & 7;
if (index == 0)
{
_ilGen.Emit(OpCodes.Pop);
}
else
{
_ilGen.Emit(OpCodes.Stloc_S, _gprs[index]);
}
}
/// <summary>
/// Stores the value on the top of the IL evaluation stack as method address.
/// This will be used on subsequent send calls as the destination method address.
/// Additionally, the 6 bits starting at bit 12 will be used as increment value,
/// added to the method address after each sent value.
/// </summary>
private void EmitStoreMethAddr()
{
_ilGen.Emit(OpCodes.Dup);
_ilGen.Emit(OpCodes.Ldc_I4, 0xfff);
_ilGen.Emit(OpCodes.And);
_ilGen.Emit(OpCodes.Stloc_S, _methAddr);
_ilGen.Emit(OpCodes.Ldc_I4, 12);
_ilGen.Emit(OpCodes.Shr_Un);
_ilGen.Emit(OpCodes.Ldc_I4, 0x3f);
_ilGen.Emit(OpCodes.And);
_ilGen.Emit(OpCodes.Stloc_S, _methIncr);
}
/// <summary>
/// Sends the value on the top of the IL evaluation stack to the GPU,
/// using the current method address.
/// </summary>
private void EmitSend()
{
_ilGen.Emit(OpCodes.Ldarg_1);
_ilGen.Emit(OpCodes.Ldloc_S, _methAddr);
_ilGen.Emit(OpCodes.Call, typeof(MacroJitContext).GetMethod(nameof(MacroJitContext.Send)));
_ilGen.Emit(OpCodes.Ldloc_S, _methAddr);
_ilGen.Emit(OpCodes.Ldloc_S, _methIncr);
_ilGen.Emit(OpCodes.Add);
_ilGen.Emit(OpCodes.Stloc_S, _methAddr);
}
}
}