Merge pull request #1264 from degasus/optimizations
video_core: Optimize the command processor.
This commit is contained in:
commit
ae0c95efcc
9 changed files with 123 additions and 126 deletions
|
@ -8,6 +8,7 @@
|
||||||
#include "core/core.h"
|
#include "core/core.h"
|
||||||
#include "core/hle/service/nvdrv/devices/nvhost_gpu.h"
|
#include "core/hle/service/nvdrv/devices/nvhost_gpu.h"
|
||||||
#include "core/memory.h"
|
#include "core/memory.h"
|
||||||
|
#include "video_core/command_processor.h"
|
||||||
#include "video_core/gpu.h"
|
#include "video_core/gpu.h"
|
||||||
#include "video_core/memory_manager.h"
|
#include "video_core/memory_manager.h"
|
||||||
|
|
||||||
|
@ -134,17 +135,16 @@ u32 nvhost_gpu::SubmitGPFIFO(const std::vector<u8>& input, std::vector<u8>& outp
|
||||||
LOG_WARNING(Service_NVDRV, "(STUBBED) called, gpfifo={:X}, num_entries={:X}, flags={:X}",
|
LOG_WARNING(Service_NVDRV, "(STUBBED) called, gpfifo={:X}, num_entries={:X}, flags={:X}",
|
||||||
params.address, params.num_entries, params.flags);
|
params.address, params.num_entries, params.flags);
|
||||||
|
|
||||||
ASSERT_MSG(input.size() ==
|
ASSERT_MSG(input.size() == sizeof(IoctlSubmitGpfifo) +
|
||||||
sizeof(IoctlSubmitGpfifo) + params.num_entries * sizeof(IoctlGpfifoEntry),
|
params.num_entries * sizeof(Tegra::CommandListHeader),
|
||||||
"Incorrect input size");
|
"Incorrect input size");
|
||||||
|
|
||||||
std::vector<IoctlGpfifoEntry> entries(params.num_entries);
|
std::vector<Tegra::CommandListHeader> entries(params.num_entries);
|
||||||
std::memcpy(entries.data(), &input[sizeof(IoctlSubmitGpfifo)],
|
std::memcpy(entries.data(), &input[sizeof(IoctlSubmitGpfifo)],
|
||||||
params.num_entries * sizeof(IoctlGpfifoEntry));
|
params.num_entries * sizeof(Tegra::CommandListHeader));
|
||||||
for (auto entry : entries) {
|
|
||||||
Tegra::GPUVAddr va_addr = entry.Address();
|
Core::System::GetInstance().GPU().ProcessCommandLists(entries);
|
||||||
Core::System::GetInstance().GPU().ProcessCommandList(va_addr, entry.sz);
|
|
||||||
}
|
|
||||||
params.fence_out.id = 0;
|
params.fence_out.id = 0;
|
||||||
params.fence_out.value = 0;
|
params.fence_out.value = 0;
|
||||||
std::memcpy(output.data(), ¶ms, sizeof(IoctlSubmitGpfifo));
|
std::memcpy(output.data(), ¶ms, sizeof(IoctlSubmitGpfifo));
|
||||||
|
@ -160,14 +160,12 @@ u32 nvhost_gpu::KickoffPB(const std::vector<u8>& input, std::vector<u8>& output)
|
||||||
LOG_WARNING(Service_NVDRV, "(STUBBED) called, gpfifo={:X}, num_entries={:X}, flags={:X}",
|
LOG_WARNING(Service_NVDRV, "(STUBBED) called, gpfifo={:X}, num_entries={:X}, flags={:X}",
|
||||||
params.address, params.num_entries, params.flags);
|
params.address, params.num_entries, params.flags);
|
||||||
|
|
||||||
std::vector<IoctlGpfifoEntry> entries(params.num_entries);
|
std::vector<Tegra::CommandListHeader> entries(params.num_entries);
|
||||||
Memory::ReadBlock(params.address, entries.data(),
|
Memory::ReadBlock(params.address, entries.data(),
|
||||||
params.num_entries * sizeof(IoctlGpfifoEntry));
|
params.num_entries * sizeof(Tegra::CommandListHeader));
|
||||||
|
|
||||||
|
Core::System::GetInstance().GPU().ProcessCommandLists(entries);
|
||||||
|
|
||||||
for (auto entry : entries) {
|
|
||||||
Tegra::GPUVAddr va_addr = entry.Address();
|
|
||||||
Core::System::GetInstance().GPU().ProcessCommandList(va_addr, entry.sz);
|
|
||||||
}
|
|
||||||
params.fence_out.id = 0;
|
params.fence_out.id = 0;
|
||||||
params.fence_out.value = 0;
|
params.fence_out.value = 0;
|
||||||
std::memcpy(output.data(), ¶ms, output.size());
|
std::memcpy(output.data(), ¶ms, output.size());
|
||||||
|
|
|
@ -10,7 +10,6 @@
|
||||||
#include "common/common_types.h"
|
#include "common/common_types.h"
|
||||||
#include "common/swap.h"
|
#include "common/swap.h"
|
||||||
#include "core/hle/service/nvdrv/devices/nvdevice.h"
|
#include "core/hle/service/nvdrv/devices/nvdevice.h"
|
||||||
#include "video_core/memory_manager.h"
|
|
||||||
|
|
||||||
namespace Service::Nvidia::Devices {
|
namespace Service::Nvidia::Devices {
|
||||||
|
|
||||||
|
@ -151,22 +150,6 @@ private:
|
||||||
};
|
};
|
||||||
static_assert(sizeof(IoctlAllocObjCtx) == 16, "IoctlAllocObjCtx is incorrect size");
|
static_assert(sizeof(IoctlAllocObjCtx) == 16, "IoctlAllocObjCtx is incorrect size");
|
||||||
|
|
||||||
struct IoctlGpfifoEntry {
|
|
||||||
u32_le entry0; // gpu_va_lo
|
|
||||||
union {
|
|
||||||
u32_le entry1; // gpu_va_hi | (unk_0x02 << 0x08) | (size << 0x0A) | (unk_0x01 << 0x1F)
|
|
||||||
BitField<0, 8, u32_le> gpu_va_hi;
|
|
||||||
BitField<8, 2, u32_le> unk1;
|
|
||||||
BitField<10, 21, u32_le> sz;
|
|
||||||
BitField<31, 1, u32_le> unk2;
|
|
||||||
};
|
|
||||||
|
|
||||||
Tegra::GPUVAddr Address() const {
|
|
||||||
return (static_cast<Tegra::GPUVAddr>(gpu_va_hi) << 32) | entry0;
|
|
||||||
}
|
|
||||||
};
|
|
||||||
static_assert(sizeof(IoctlGpfifoEntry) == 8, "IoctlGpfifoEntry is incorrect size");
|
|
||||||
|
|
||||||
struct IoctlSubmitGpfifo {
|
struct IoctlSubmitGpfifo {
|
||||||
u64_le address; // pointer to gpfifo entry structs
|
u64_le address; // pointer to gpfifo entry structs
|
||||||
u32_le num_entries; // number of fence objects being submitted
|
u32_le num_entries; // number of fence objects being submitted
|
||||||
|
|
|
@ -28,98 +28,106 @@ enum class BufferMethods {
|
||||||
CountBufferMethods = 0x40,
|
CountBufferMethods = 0x40,
|
||||||
};
|
};
|
||||||
|
|
||||||
void GPU::WriteReg(u32 method, u32 subchannel, u32 value, u32 remaining_params) {
|
MICROPROFILE_DEFINE(ProcessCommandLists, "GPU", "Execute command buffer", MP_RGB(128, 128, 192));
|
||||||
LOG_TRACE(HW_GPU,
|
|
||||||
"Processing method {:08X} on subchannel {} value "
|
|
||||||
"{:08X} remaining params {}",
|
|
||||||
method, subchannel, value, remaining_params);
|
|
||||||
|
|
||||||
ASSERT(subchannel < bound_engines.size());
|
void GPU::ProcessCommandLists(const std::vector<CommandListHeader>& commands) {
|
||||||
|
MICROPROFILE_SCOPE(ProcessCommandLists);
|
||||||
|
|
||||||
if (method == static_cast<u32>(BufferMethods::BindObject)) {
|
auto WriteReg = [this](u32 method, u32 subchannel, u32 value, u32 remaining_params) {
|
||||||
// Bind the current subchannel to the desired engine id.
|
LOG_TRACE(HW_GPU,
|
||||||
LOG_DEBUG(HW_GPU, "Binding subchannel {} to engine {}", subchannel, value);
|
"Processing method {:08X} on subchannel {} value "
|
||||||
bound_engines[subchannel] = static_cast<EngineID>(value);
|
"{:08X} remaining params {}",
|
||||||
return;
|
method, subchannel, value, remaining_params);
|
||||||
}
|
|
||||||
|
|
||||||
if (method < static_cast<u32>(BufferMethods::CountBufferMethods)) {
|
ASSERT(subchannel < bound_engines.size());
|
||||||
// TODO(Subv): Research and implement these methods.
|
|
||||||
LOG_ERROR(HW_GPU, "Special buffer methods other than Bind are not implemented");
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
const EngineID engine = bound_engines[subchannel];
|
if (method == static_cast<u32>(BufferMethods::BindObject)) {
|
||||||
|
// Bind the current subchannel to the desired engine id.
|
||||||
switch (engine) {
|
LOG_DEBUG(HW_GPU, "Binding subchannel {} to engine {}", subchannel, value);
|
||||||
case EngineID::FERMI_TWOD_A:
|
bound_engines[subchannel] = static_cast<EngineID>(value);
|
||||||
fermi_2d->WriteReg(method, value);
|
return;
|
||||||
break;
|
|
||||||
case EngineID::MAXWELL_B:
|
|
||||||
maxwell_3d->WriteReg(method, value, remaining_params);
|
|
||||||
break;
|
|
||||||
case EngineID::MAXWELL_COMPUTE_B:
|
|
||||||
maxwell_compute->WriteReg(method, value);
|
|
||||||
break;
|
|
||||||
case EngineID::MAXWELL_DMA_COPY_A:
|
|
||||||
maxwell_dma->WriteReg(method, value);
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
UNIMPLEMENTED_MSG("Unimplemented engine");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void GPU::ProcessCommandList(GPUVAddr address, u32 size) {
|
|
||||||
const boost::optional<VAddr> head_address = memory_manager->GpuToCpuAddress(address);
|
|
||||||
VAddr current_addr = *head_address;
|
|
||||||
while (current_addr < *head_address + size * sizeof(CommandHeader)) {
|
|
||||||
const CommandHeader header = {Memory::Read32(current_addr)};
|
|
||||||
current_addr += sizeof(u32);
|
|
||||||
|
|
||||||
switch (header.mode.Value()) {
|
|
||||||
case SubmissionMode::IncreasingOld:
|
|
||||||
case SubmissionMode::Increasing: {
|
|
||||||
// Increase the method value with each argument.
|
|
||||||
for (unsigned i = 0; i < header.arg_count; ++i) {
|
|
||||||
WriteReg(header.method + i, header.subchannel, Memory::Read32(current_addr),
|
|
||||||
header.arg_count - i - 1);
|
|
||||||
current_addr += sizeof(u32);
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
}
|
}
|
||||||
case SubmissionMode::NonIncreasingOld:
|
|
||||||
case SubmissionMode::NonIncreasing: {
|
|
||||||
// Use the same method value for all arguments.
|
|
||||||
for (unsigned i = 0; i < header.arg_count; ++i) {
|
|
||||||
WriteReg(header.method, header.subchannel, Memory::Read32(current_addr),
|
|
||||||
header.arg_count - i - 1);
|
|
||||||
current_addr += sizeof(u32);
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
case SubmissionMode::IncreaseOnce: {
|
|
||||||
ASSERT(header.arg_count.Value() >= 1);
|
|
||||||
|
|
||||||
// Use the original method for the first argument and then the next method for all other
|
if (method < static_cast<u32>(BufferMethods::CountBufferMethods)) {
|
||||||
// arguments.
|
// TODO(Subv): Research and implement these methods.
|
||||||
WriteReg(header.method, header.subchannel, Memory::Read32(current_addr),
|
LOG_ERROR(HW_GPU, "Special buffer methods other than Bind are not implemented");
|
||||||
header.arg_count - 1);
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
const EngineID engine = bound_engines[subchannel];
|
||||||
|
|
||||||
|
switch (engine) {
|
||||||
|
case EngineID::FERMI_TWOD_A:
|
||||||
|
fermi_2d->WriteReg(method, value);
|
||||||
|
break;
|
||||||
|
case EngineID::MAXWELL_B:
|
||||||
|
maxwell_3d->WriteReg(method, value, remaining_params);
|
||||||
|
break;
|
||||||
|
case EngineID::MAXWELL_COMPUTE_B:
|
||||||
|
maxwell_compute->WriteReg(method, value);
|
||||||
|
break;
|
||||||
|
case EngineID::MAXWELL_DMA_COPY_A:
|
||||||
|
maxwell_dma->WriteReg(method, value);
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
UNIMPLEMENTED_MSG("Unimplemented engine");
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
for (auto entry : commands) {
|
||||||
|
Tegra::GPUVAddr address = entry.Address();
|
||||||
|
u32 size = entry.sz;
|
||||||
|
const boost::optional<VAddr> head_address = memory_manager->GpuToCpuAddress(address);
|
||||||
|
VAddr current_addr = *head_address;
|
||||||
|
while (current_addr < *head_address + size * sizeof(CommandHeader)) {
|
||||||
|
const CommandHeader header = {Memory::Read32(current_addr)};
|
||||||
current_addr += sizeof(u32);
|
current_addr += sizeof(u32);
|
||||||
|
|
||||||
for (unsigned i = 1; i < header.arg_count; ++i) {
|
switch (header.mode.Value()) {
|
||||||
WriteReg(header.method + 1, header.subchannel, Memory::Read32(current_addr),
|
case SubmissionMode::IncreasingOld:
|
||||||
header.arg_count - i - 1);
|
case SubmissionMode::Increasing: {
|
||||||
current_addr += sizeof(u32);
|
// Increase the method value with each argument.
|
||||||
|
for (unsigned i = 0; i < header.arg_count; ++i) {
|
||||||
|
WriteReg(header.method + i, header.subchannel, Memory::Read32(current_addr),
|
||||||
|
header.arg_count - i - 1);
|
||||||
|
current_addr += sizeof(u32);
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case SubmissionMode::NonIncreasingOld:
|
||||||
|
case SubmissionMode::NonIncreasing: {
|
||||||
|
// Use the same method value for all arguments.
|
||||||
|
for (unsigned i = 0; i < header.arg_count; ++i) {
|
||||||
|
WriteReg(header.method, header.subchannel, Memory::Read32(current_addr),
|
||||||
|
header.arg_count - i - 1);
|
||||||
|
current_addr += sizeof(u32);
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case SubmissionMode::IncreaseOnce: {
|
||||||
|
ASSERT(header.arg_count.Value() >= 1);
|
||||||
|
|
||||||
|
// Use the original method for the first argument and then the next method for all
|
||||||
|
// other arguments.
|
||||||
|
WriteReg(header.method, header.subchannel, Memory::Read32(current_addr),
|
||||||
|
header.arg_count - 1);
|
||||||
|
current_addr += sizeof(u32);
|
||||||
|
|
||||||
|
for (unsigned i = 1; i < header.arg_count; ++i) {
|
||||||
|
WriteReg(header.method + 1, header.subchannel, Memory::Read32(current_addr),
|
||||||
|
header.arg_count - i - 1);
|
||||||
|
current_addr += sizeof(u32);
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case SubmissionMode::Inline: {
|
||||||
|
// The register value is stored in the bits 16-28 as an immediate
|
||||||
|
WriteReg(header.method, header.subchannel, header.inline_data, 0);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
default:
|
||||||
|
UNIMPLEMENTED();
|
||||||
}
|
}
|
||||||
break;
|
|
||||||
}
|
|
||||||
case SubmissionMode::Inline: {
|
|
||||||
// The register value is stored in the bits 16-28 as an immediate
|
|
||||||
WriteReg(header.method, header.subchannel, header.inline_data, 0);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
default:
|
|
||||||
UNIMPLEMENTED();
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -7,6 +7,7 @@
|
||||||
#include <type_traits>
|
#include <type_traits>
|
||||||
#include "common/bit_field.h"
|
#include "common/bit_field.h"
|
||||||
#include "common/common_types.h"
|
#include "common/common_types.h"
|
||||||
|
#include "video_core/memory_manager.h"
|
||||||
|
|
||||||
namespace Tegra {
|
namespace Tegra {
|
||||||
|
|
||||||
|
@ -19,6 +20,22 @@ enum class SubmissionMode : u32 {
|
||||||
IncreaseOnce = 5
|
IncreaseOnce = 5
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct CommandListHeader {
|
||||||
|
u32 entry0; // gpu_va_lo
|
||||||
|
union {
|
||||||
|
u32 entry1; // gpu_va_hi | (unk_0x02 << 0x08) | (size << 0x0A) | (unk_0x01 << 0x1F)
|
||||||
|
BitField<0, 8, u32> gpu_va_hi;
|
||||||
|
BitField<8, 2, u32> unk1;
|
||||||
|
BitField<10, 21, u32> sz;
|
||||||
|
BitField<31, 1, u32> unk2;
|
||||||
|
};
|
||||||
|
|
||||||
|
GPUVAddr Address() const {
|
||||||
|
return (static_cast<GPUVAddr>(gpu_va_hi) << 32) | entry0;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
static_assert(sizeof(CommandListHeader) == 8, "CommandListHeader is incorrect size");
|
||||||
|
|
||||||
union CommandHeader {
|
union CommandHeader {
|
||||||
u32 hex;
|
u32 hex;
|
||||||
|
|
||||||
|
|
|
@ -135,8 +135,6 @@ void Maxwell3D::WriteReg(u32 method, u32 value, u32 remaining_params) {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
rasterizer.NotifyMaxwellRegisterChanged(method);
|
|
||||||
|
|
||||||
if (debug_context) {
|
if (debug_context) {
|
||||||
debug_context->OnEvent(Tegra::DebugContext::Event::MaxwellCommandProcessed, nullptr);
|
debug_context->OnEvent(Tegra::DebugContext::Event::MaxwellCommandProcessed, nullptr);
|
||||||
}
|
}
|
||||||
|
|
|
@ -6,6 +6,7 @@
|
||||||
|
|
||||||
#include <array>
|
#include <array>
|
||||||
#include <memory>
|
#include <memory>
|
||||||
|
#include <vector>
|
||||||
#include "common/common_types.h"
|
#include "common/common_types.h"
|
||||||
#include "core/hle/service/nvflinger/buffer_queue.h"
|
#include "core/hle/service/nvflinger/buffer_queue.h"
|
||||||
#include "video_core/memory_manager.h"
|
#include "video_core/memory_manager.h"
|
||||||
|
@ -67,6 +68,7 @@ u32 RenderTargetBytesPerPixel(RenderTargetFormat format);
|
||||||
/// Returns the number of bytes per pixel of each depth format.
|
/// Returns the number of bytes per pixel of each depth format.
|
||||||
u32 DepthFormatBytesPerPixel(DepthFormat format);
|
u32 DepthFormatBytesPerPixel(DepthFormat format);
|
||||||
|
|
||||||
|
struct CommandListHeader;
|
||||||
class DebugContext;
|
class DebugContext;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -115,7 +117,7 @@ public:
|
||||||
~GPU();
|
~GPU();
|
||||||
|
|
||||||
/// Processes a command list stored at the specified address in GPU memory.
|
/// Processes a command list stored at the specified address in GPU memory.
|
||||||
void ProcessCommandList(GPUVAddr address, u32 size);
|
void ProcessCommandLists(const std::vector<CommandListHeader>& commands);
|
||||||
|
|
||||||
/// Returns a reference to the Maxwell3D GPU engine.
|
/// Returns a reference to the Maxwell3D GPU engine.
|
||||||
Engines::Maxwell3D& Maxwell3D();
|
Engines::Maxwell3D& Maxwell3D();
|
||||||
|
@ -130,9 +132,6 @@ public:
|
||||||
const Tegra::MemoryManager& MemoryManager() const;
|
const Tegra::MemoryManager& MemoryManager() const;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
/// Writes a single register in the engine bound to the specified subchannel
|
|
||||||
void WriteReg(u32 method, u32 subchannel, u32 value, u32 remaining_params);
|
|
||||||
|
|
||||||
std::unique_ptr<Tegra::MemoryManager> memory_manager;
|
std::unique_ptr<Tegra::MemoryManager> memory_manager;
|
||||||
|
|
||||||
/// Mapping of command subchannels to their bound engine ids.
|
/// Mapping of command subchannels to their bound engine ids.
|
||||||
|
|
|
@ -20,9 +20,6 @@ public:
|
||||||
/// Clear the current framebuffer
|
/// Clear the current framebuffer
|
||||||
virtual void Clear() = 0;
|
virtual void Clear() = 0;
|
||||||
|
|
||||||
/// Notify rasterizer that the specified Maxwell register has been changed
|
|
||||||
virtual void NotifyMaxwellRegisterChanged(u32 method) = 0;
|
|
||||||
|
|
||||||
/// Notify rasterizer that all caches should be flushed to Switch memory
|
/// Notify rasterizer that all caches should be flushed to Switch memory
|
||||||
virtual void FlushAll() = 0;
|
virtual void FlushAll() = 0;
|
||||||
|
|
||||||
|
|
|
@ -527,8 +527,6 @@ void RasterizerOpenGL::DrawArrays() {
|
||||||
state.Apply();
|
state.Apply();
|
||||||
}
|
}
|
||||||
|
|
||||||
void RasterizerOpenGL::NotifyMaxwellRegisterChanged(u32 method) {}
|
|
||||||
|
|
||||||
void RasterizerOpenGL::FlushAll() {}
|
void RasterizerOpenGL::FlushAll() {}
|
||||||
|
|
||||||
void RasterizerOpenGL::FlushRegion(VAddr addr, u64 size) {}
|
void RasterizerOpenGL::FlushRegion(VAddr addr, u64 size) {}
|
||||||
|
|
|
@ -45,7 +45,6 @@ public:
|
||||||
|
|
||||||
void DrawArrays() override;
|
void DrawArrays() override;
|
||||||
void Clear() override;
|
void Clear() override;
|
||||||
void NotifyMaxwellRegisterChanged(u32 method) override;
|
|
||||||
void FlushAll() override;
|
void FlushAll() override;
|
||||||
void FlushRegion(VAddr addr, u64 size) override;
|
void FlushRegion(VAddr addr, u64 size) override;
|
||||||
void InvalidateRegion(VAddr addr, u64 size) override;
|
void InvalidateRegion(VAddr addr, u64 size) override;
|
||||||
|
|
Loading…
Reference in a new issue