GPU: Implement Flush Requests for Async mode.
This commit is contained in:
parent
b7bc3c2549
commit
1fb516cd97
6 changed files with 70 additions and 8 deletions
|
@ -125,6 +125,28 @@ bool GPU::CancelSyncptInterrupt(const u32 syncpoint_id, const u32 value) {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
u64 GPU::RequestFlush(CacheAddr addr, std::size_t size) {
|
||||||
|
std::unique_lock lck{flush_request_mutex};
|
||||||
|
const u64 fence = ++last_flush_fence;
|
||||||
|
flush_requests.emplace_back(fence, addr, size);
|
||||||
|
return fence;
|
||||||
|
}
|
||||||
|
|
||||||
|
void GPU::TickWork() {
|
||||||
|
std::unique_lock lck{flush_request_mutex};
|
||||||
|
while (!flush_requests.empty()) {
|
||||||
|
auto& request = flush_requests.front();
|
||||||
|
const u64 fence = request.fence;
|
||||||
|
const CacheAddr addr = request.addr;
|
||||||
|
const std::size_t size = request.size;
|
||||||
|
flush_requests.pop_front();
|
||||||
|
flush_request_mutex.unlock();
|
||||||
|
renderer->Rasterizer().FlushRegion(addr, size);
|
||||||
|
current_flush_fence.store(fence);
|
||||||
|
flush_request_mutex.lock();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
u64 GPU::GetTicks() const {
|
u64 GPU::GetTicks() const {
|
||||||
// This values were reversed engineered by fincs from NVN
|
// This values were reversed engineered by fincs from NVN
|
||||||
// The gpu clock is reported in units of 385/625 nanoseconds
|
// The gpu clock is reported in units of 385/625 nanoseconds
|
||||||
|
|
|
@ -159,6 +159,14 @@ public:
|
||||||
void SyncGuestHost();
|
void SyncGuestHost();
|
||||||
virtual void OnCommandListEnd();
|
virtual void OnCommandListEnd();
|
||||||
|
|
||||||
|
u64 RequestFlush(CacheAddr addr, std::size_t size);
|
||||||
|
|
||||||
|
u64 CurrentFlushRequestFence() const {
|
||||||
|
return current_flush_fence.load(std::memory_order_relaxed);
|
||||||
|
}
|
||||||
|
|
||||||
|
void TickWork();
|
||||||
|
|
||||||
/// Returns a reference to the Maxwell3D GPU engine.
|
/// Returns a reference to the Maxwell3D GPU engine.
|
||||||
Engines::Maxwell3D& Maxwell3D();
|
Engines::Maxwell3D& Maxwell3D();
|
||||||
|
|
||||||
|
@ -327,6 +335,19 @@ private:
|
||||||
|
|
||||||
std::condition_variable sync_cv;
|
std::condition_variable sync_cv;
|
||||||
|
|
||||||
|
struct FlushRequest {
|
||||||
|
FlushRequest(u64 fence, CacheAddr addr, std::size_t size)
|
||||||
|
: fence{fence}, addr{addr}, size{size} {}
|
||||||
|
u64 fence;
|
||||||
|
CacheAddr addr;
|
||||||
|
std::size_t size;
|
||||||
|
};
|
||||||
|
|
||||||
|
std::list<FlushRequest> flush_requests;
|
||||||
|
std::atomic<u64> current_flush_fence{};
|
||||||
|
u64 last_flush_fence{};
|
||||||
|
std::mutex flush_request_mutex;
|
||||||
|
|
||||||
const bool is_async;
|
const bool is_async;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
@ -15,8 +15,9 @@
|
||||||
namespace VideoCommon::GPUThread {
|
namespace VideoCommon::GPUThread {
|
||||||
|
|
||||||
/// Runs the GPU thread
|
/// Runs the GPU thread
|
||||||
static void RunThread(VideoCore::RendererBase& renderer, Core::Frontend::GraphicsContext& context,
|
static void RunThread(Core::System& system, VideoCore::RendererBase& renderer,
|
||||||
Tegra::DmaPusher& dma_pusher, SynchState& state) {
|
Core::Frontend::GraphicsContext& context, Tegra::DmaPusher& dma_pusher,
|
||||||
|
SynchState& state) {
|
||||||
MicroProfileOnThreadCreate("GpuThread");
|
MicroProfileOnThreadCreate("GpuThread");
|
||||||
|
|
||||||
// Wait for first GPU command before acquiring the window context
|
// Wait for first GPU command before acquiring the window context
|
||||||
|
@ -40,6 +41,8 @@ static void RunThread(VideoCore::RendererBase& renderer, Core::Frontend::Graphic
|
||||||
renderer.SwapBuffers(data->framebuffer ? &*data->framebuffer : nullptr);
|
renderer.SwapBuffers(data->framebuffer ? &*data->framebuffer : nullptr);
|
||||||
} else if (const auto data = std::get_if<OnCommandListEndCommand>(&next.data)) {
|
} else if (const auto data = std::get_if<OnCommandListEndCommand>(&next.data)) {
|
||||||
renderer.Rasterizer().ReleaseFences();
|
renderer.Rasterizer().ReleaseFences();
|
||||||
|
} else if (const auto data = std::get_if<GPUTickCommand>(&next.data)) {
|
||||||
|
system.GPU().TickWork();
|
||||||
} else if (const auto data = std::get_if<FlushRegionCommand>(&next.data)) {
|
} else if (const auto data = std::get_if<FlushRegionCommand>(&next.data)) {
|
||||||
renderer.Rasterizer().FlushRegion(data->addr, data->size);
|
renderer.Rasterizer().FlushRegion(data->addr, data->size);
|
||||||
} else if (const auto data = std::get_if<InvalidateRegionCommand>(&next.data)) {
|
} else if (const auto data = std::get_if<InvalidateRegionCommand>(&next.data)) {
|
||||||
|
@ -68,8 +71,8 @@ ThreadManager::~ThreadManager() {
|
||||||
void ThreadManager::StartThread(VideoCore::RendererBase& renderer,
|
void ThreadManager::StartThread(VideoCore::RendererBase& renderer,
|
||||||
Core::Frontend::GraphicsContext& context,
|
Core::Frontend::GraphicsContext& context,
|
||||||
Tegra::DmaPusher& dma_pusher) {
|
Tegra::DmaPusher& dma_pusher) {
|
||||||
thread = std::thread{RunThread, std::ref(renderer), std::ref(context), std::ref(dma_pusher),
|
thread = std::thread{RunThread, std::ref(system), std::ref(renderer),
|
||||||
std::ref(state)};
|
std::ref(context), std::ref(dma_pusher), std::ref(state)};
|
||||||
}
|
}
|
||||||
|
|
||||||
void ThreadManager::SubmitList(Tegra::CommandList&& entries) {
|
void ThreadManager::SubmitList(Tegra::CommandList&& entries) {
|
||||||
|
@ -85,8 +88,10 @@ void ThreadManager::FlushRegion(VAddr addr, u64 size) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
if (system.Renderer().Rasterizer().MustFlushRegion(addr, size)) {
|
if (system.Renderer().Rasterizer().MustFlushRegion(addr, size)) {
|
||||||
u64 fence = PushCommand(FlushRegionCommand(addr, size));
|
auto& gpu = system.GPU();
|
||||||
while (fence > state.signaled_fence.load(std::memory_order_relaxed)) {
|
u64 fence = gpu.RequestFlush(addr, size);
|
||||||
|
PushCommand(GPUTickCommand());
|
||||||
|
while (fence > gpu.CurrentFlushRequestFence()) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -70,12 +70,16 @@ struct FlushAndInvalidateRegionCommand final {
|
||||||
u64 size;
|
u64 size;
|
||||||
};
|
};
|
||||||
|
|
||||||
/// Command to signal to the GPU thread that processing has ended
|
/// Command called within the gpu, to schedule actions after a command list end
|
||||||
struct OnCommandListEndCommand final {};
|
struct OnCommandListEndCommand final {};
|
||||||
|
|
||||||
|
/// Command to make the gpu look into pending requests
|
||||||
|
struct GPUTickCommand final {};
|
||||||
|
|
||||||
using CommandData =
|
using CommandData =
|
||||||
std::variant<EndProcessingCommand, SubmitListCommand, SwapBuffersCommand, FlushRegionCommand,
|
std::variant<EndProcessingCommand, SubmitListCommand, SwapBuffersCommand, FlushRegionCommand,
|
||||||
InvalidateRegionCommand, FlushAndInvalidateRegionCommand, OnCommandListEndCommand>;
|
InvalidateRegionCommand, FlushAndInvalidateRegionCommand, OnCommandListEndCommand,
|
||||||
|
GPUTickCommand>;
|
||||||
|
|
||||||
struct CommandDataContainer {
|
struct CommandDataContainer {
|
||||||
CommandDataContainer() = default;
|
CommandDataContainer() = default;
|
||||||
|
|
|
@ -601,6 +601,8 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
|
||||||
EndTransformFeedback();
|
EndTransformFeedback();
|
||||||
|
|
||||||
++num_queued_commands;
|
++num_queued_commands;
|
||||||
|
|
||||||
|
system.GPU().TickWork();
|
||||||
}
|
}
|
||||||
|
|
||||||
void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) {
|
void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) {
|
||||||
|
@ -628,6 +630,7 @@ void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) {
|
||||||
const auto& launch_desc = system.GPU().KeplerCompute().launch_description;
|
const auto& launch_desc = system.GPU().KeplerCompute().launch_description;
|
||||||
glDispatchCompute(launch_desc.grid_dim_x, launch_desc.grid_dim_y, launch_desc.grid_dim_z);
|
glDispatchCompute(launch_desc.grid_dim_x, launch_desc.grid_dim_y, launch_desc.grid_dim_z);
|
||||||
++num_queued_commands;
|
++num_queued_commands;
|
||||||
|
system.GPU().TickWork();
|
||||||
}
|
}
|
||||||
|
|
||||||
void RasterizerOpenGL::ResetCounter(VideoCore::QueryType type) {
|
void RasterizerOpenGL::ResetCounter(VideoCore::QueryType type) {
|
||||||
|
@ -652,6 +655,9 @@ void RasterizerOpenGL::FlushRegion(VAddr addr, u64 size) {
|
||||||
}
|
}
|
||||||
|
|
||||||
bool RasterizerOpenGL::MustFlushRegion(VAddr addr, u64 size) {
|
bool RasterizerOpenGL::MustFlushRegion(VAddr addr, u64 size) {
|
||||||
|
if (!Settings::IsGPULevelExtreme()) {
|
||||||
|
return buffer_cache.MustFlushRegion(addr, size);
|
||||||
|
}
|
||||||
return texture_cache.MustFlushRegion(addr, size) || buffer_cache.MustFlushRegion(addr, size);
|
return texture_cache.MustFlushRegion(addr, size) || buffer_cache.MustFlushRegion(addr, size);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -365,6 +365,8 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) {
|
||||||
});
|
});
|
||||||
|
|
||||||
EndTransformFeedback();
|
EndTransformFeedback();
|
||||||
|
|
||||||
|
system.GPU().TickWork();
|
||||||
}
|
}
|
||||||
|
|
||||||
void RasterizerVulkan::Clear() {
|
void RasterizerVulkan::Clear() {
|
||||||
|
@ -492,6 +494,8 @@ void RasterizerVulkan::DispatchCompute(GPUVAddr code_addr) {
|
||||||
descriptor_set, {});
|
descriptor_set, {});
|
||||||
cmdbuf.Dispatch(grid_x, grid_y, grid_z);
|
cmdbuf.Dispatch(grid_x, grid_y, grid_z);
|
||||||
});
|
});
|
||||||
|
|
||||||
|
system.GPU().TickWork();
|
||||||
}
|
}
|
||||||
|
|
||||||
void RasterizerVulkan::ResetCounter(VideoCore::QueryType type) {
|
void RasterizerVulkan::ResetCounter(VideoCore::QueryType type) {
|
||||||
|
|
Loading…
Reference in a new issue