From 0f29a1f4ef47ebc5eff0a8e39d5d955c0f9136c5 Mon Sep 17 00:00:00 2001 From: pineappleEA Date: Sat, 31 Jul 2021 20:37:34 +0200 Subject: [PATCH] early-access version 1951 --- README.md | 2 +- .../service/nvdrv/devices/nvdisp_disp0.cpp | 9 +- .../hle/service/nvdrv/devices/nvdisp_disp0.h | 2 +- .../hle/service/nvflinger/buffer_queue.cpp | 4 - src/core/hle/service/nvflinger/buffer_queue.h | 1 - src/core/hle/service/nvflinger/nvflinger.cpp | 30 ++-- src/core/hle/service/nvflinger/nvflinger.h | 2 - src/core/hle/service/vi/vi.cpp | 1 - src/video_core/gpu.cpp | 139 ++---------------- src/video_core/gpu.h | 83 ++--------- src/video_core/gpu_thread.cpp | 2 +- .../renderer_opengl/gl_rasterizer.cpp | 5 +- .../renderer_vulkan/vk_rasterizer.cpp | 1 - 13 files changed, 52 insertions(+), 229 deletions(-) diff --git a/README.md b/README.md index 16c6cb2ec..7510f52bc 100755 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ yuzu emulator early access ============= -This is the source code for early-access 1950. +This is the source code for early-access 1951. ## Legal Notice diff --git a/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp b/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp index e5eb397c0..ce6065db2 100755 --- a/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp +++ b/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp @@ -5,8 +5,10 @@ #include "common/assert.h" #include "common/logging/log.h" #include "core/core.h" +#include "core/core_timing.h" #include "core/hle/service/nvdrv/devices/nvdisp_disp0.h" #include "core/hle/service/nvdrv/devices/nvmap.h" +#include "core/perf_stats.h" #include "video_core/gpu.h" #include "video_core/renderer_base.h" @@ -39,7 +41,7 @@ void nvdisp_disp0::OnClose(DeviceFD fd) {} void nvdisp_disp0::flip(u32 buffer_handle, u32 offset, u32 format, u32 width, u32 height, u32 stride, NVFlinger::BufferQueue::BufferTransformFlags transform, - const Common::Rectangle& crop_rect, const MultiFence& fences) { + const Common::Rectangle& crop_rect) { VAddr addr = nvmap_dev->GetObjectAddress(buffer_handle); LOG_TRACE(Service, "Drawing from address {:X} offset {:08X} Width {} Height {} Stride {} Format {}", @@ -50,7 +52,10 @@ void nvdisp_disp0::flip(u32 buffer_handle, u32 offset, u32 format, u32 width, u3 addr, offset, width, height, stride, static_cast(format), transform, crop_rect}; - system.GPU().QueueFrame(&framebuffer, fences); + system.GetPerfStats().EndSystemFrame(); + system.GPU().SwapBuffers(&framebuffer); + system.SpeedLimiter().DoSpeedLimiting(system.CoreTiming().GetGlobalTimeUs()); + system.GetPerfStats().BeginSystemFrame(); } } // namespace Service::Nvidia::Devices diff --git a/src/core/hle/service/nvdrv/devices/nvdisp_disp0.h b/src/core/hle/service/nvdrv/devices/nvdisp_disp0.h index a5bf12a12..de01e1d5f 100755 --- a/src/core/hle/service/nvdrv/devices/nvdisp_disp0.h +++ b/src/core/hle/service/nvdrv/devices/nvdisp_disp0.h @@ -33,7 +33,7 @@ public: /// Performs a screen flip, drawing the buffer pointed to by the handle. void flip(u32 buffer_handle, u32 offset, u32 format, u32 width, u32 height, u32 stride, NVFlinger::BufferQueue::BufferTransformFlags transform, - const Common::Rectangle& crop_rect, const MultiFence& fence); + const Common::Rectangle& crop_rect); private: std::shared_ptr nvmap_dev; diff --git a/src/core/hle/service/nvflinger/buffer_queue.cpp b/src/core/hle/service/nvflinger/buffer_queue.cpp index 94f027bd3..59ddf6298 100755 --- a/src/core/hle/service/nvflinger/buffer_queue.cpp +++ b/src/core/hle/service/nvflinger/buffer_queue.cpp @@ -88,10 +88,6 @@ const IGBPBuffer& BufferQueue::RequestBuffer(u32 slot) const { return buffers[slot].igbp_buffer; } -const BufferQueue::Buffer& BufferQueue::AccessBuffer(u32 slot) const { - return buffers[slot]; -} - void BufferQueue::QueueBuffer(u32 slot, BufferTransformFlags transform, const Common::Rectangle& crop_rect, u32 swap_interval, Service::Nvidia::MultiFence& multi_fence) { diff --git a/src/core/hle/service/nvflinger/buffer_queue.h b/src/core/hle/service/nvflinger/buffer_queue.h index 088e13446..61e337ac5 100755 --- a/src/core/hle/service/nvflinger/buffer_queue.h +++ b/src/core/hle/service/nvflinger/buffer_queue.h @@ -107,7 +107,6 @@ public: void Connect(); void Disconnect(); u32 Query(QueryType type); - const Buffer& AccessBuffer(u32 slot) const; u32 GetId() const { return id; diff --git a/src/core/hle/service/nvflinger/nvflinger.cpp b/src/core/hle/service/nvflinger/nvflinger.cpp index 802fc2bd8..941748970 100755 --- a/src/core/hle/service/nvflinger/nvflinger.cpp +++ b/src/core/hle/service/nvflinger/nvflinger.cpp @@ -274,6 +274,8 @@ void NVFlinger::Compose() { continue; } + const auto& igbp_buffer = buffer->get().igbp_buffer; + if (!system.IsPoweredOn()) { return; // We are likely shutting down } @@ -287,31 +289,23 @@ void NVFlinger::Compose() { } guard->lock(); - system.GetPerfStats().EndSystemFrame(); MicroProfileFlip(); - system.SpeedLimiter().DoSpeedLimiting(system.CoreTiming().GetGlobalTimeUs()); - system.GetPerfStats().BeginSystemFrame(); + + // Now send the buffer to the GPU for drawing. + // TODO(Subv): Support more than just disp0. The display device selection is probably based + // on which display we're drawing (Default, Internal, External, etc) + auto nvdisp = nvdrv->GetDevice("/dev/nvdisp_disp0"); + ASSERT(nvdisp); + + nvdisp->flip(igbp_buffer.gpu_buffer_id, igbp_buffer.offset, igbp_buffer.format, + igbp_buffer.width, igbp_buffer.height, igbp_buffer.stride, + buffer->get().transform, buffer->get().crop_rect); swap_interval = buffer->get().swap_interval; buffer_queue.ReleaseBuffer(buffer->get().slot); } } -void NVFlinger::PrequeeFrame(u32 buffer_queue_id, u32 slot) { - auto& buffer_queue = *FindBufferQueue(buffer_queue_id); - const auto& buffer = buffer_queue.AccessBuffer(slot); - const auto& igbp_buffer = buffer.igbp_buffer; - - // Now send the buffer to the GPU for drawing. - // TODO(Subv): Support more than just disp0. The display device selection is probably based - // on which display we're drawing (Default, Internal, External, etc) - auto nvdisp = nvdrv->GetDevice("/dev/nvdisp_disp0"); - ASSERT(nvdisp); - nvdisp->flip(igbp_buffer.gpu_buffer_id, igbp_buffer.offset, igbp_buffer.format, - igbp_buffer.width, igbp_buffer.height, igbp_buffer.stride, buffer.transform, - buffer.crop_rect, buffer.multi_fence); -} - s64 NVFlinger::GetNextTicks() const { static constexpr s64 max_hertz = 120LL; diff --git a/src/core/hle/service/nvflinger/nvflinger.h b/src/core/hle/service/nvflinger/nvflinger.h index 1f474f02c..d80fd07ef 100755 --- a/src/core/hle/service/nvflinger/nvflinger.h +++ b/src/core/hle/service/nvflinger/nvflinger.h @@ -77,8 +77,6 @@ public: /// Obtains a buffer queue identified by the ID. [[nodiscard]] BufferQueue* FindBufferQueue(u32 id); - void PrequeeFrame(u32 buffer_queue_id, u32 slot); - /// Performs a composition request to the emulated nvidia GPU and triggers the vsync events when /// finished. void Compose(); diff --git a/src/core/hle/service/vi/vi.cpp b/src/core/hle/service/vi/vi.cpp index 2ae8562eb..3e5949d52 100755 --- a/src/core/hle/service/vi/vi.cpp +++ b/src/core/hle/service/vi/vi.cpp @@ -592,7 +592,6 @@ private: buffer_queue.QueueBuffer(request.data.slot, request.data.transform, request.data.GetCropRect(), request.data.swap_interval, request.data.multi_fence); - nv_flinger.PrequeeFrame(id, request.data.slot); IGBPQueueBufferResponseParcel response{1280, 720}; ctx.WriteBuffer(response.Serialize()); diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp index 11c8ba9b4..ff024f530 100755 --- a/src/video_core/gpu.cpp +++ b/src/video_core/gpu.cpp @@ -114,17 +114,10 @@ void GPU::WaitFence(u32 syncpoint_id, u32 value) { }); } -void GPU::IncrementSyncPointGuest(const u32 syncpoint_id) { - std::lock_guard lock{pre_sync_mutex}; - auto& syncpoint = pre_syncpoints.at(syncpoint_id); - syncpoint++; - ProcessFrameRequests(syncpoint_id, syncpoint); -} - void GPU::IncrementSyncPoint(const u32 syncpoint_id) { - std::lock_guard lock{sync_mutex}; auto& syncpoint = syncpoints.at(syncpoint_id); syncpoint++; + std::lock_guard lock{sync_mutex}; sync_cv.notify_all(); auto& interrupt = syncpt_interrupts.at(syncpoint_id); if (!interrupt.empty()) { @@ -169,127 +162,25 @@ bool GPU::CancelSyncptInterrupt(const u32 syncpoint_id, const u32 value) { return true; } -void GPU::WaitOnWorkRequest(u64 fence) { - std::unique_lock lck{work_request_mutex}; - request_cv.wait(lck, - [&] { return fence >= current_request_fence.load(std::memory_order_relaxed); }); -} - u64 GPU::RequestFlush(VAddr addr, std::size_t size) { - std::unique_lock lck{work_request_mutex}; - const u64 fence = ++last_request_fence; - work_requests.emplace_back(fence, addr, size); - return fence; -} - -u64 GPU::RequestQueueFrame(u64 id) { - std::unique_lock lck{work_request_mutex}; - const u64 fence = ++last_request_fence; - work_requests.emplace_back(fence, id); + std::unique_lock lck{flush_request_mutex}; + const u64 fence = ++last_flush_fence; + flush_requests.emplace_back(fence, addr, size); return fence; } void GPU::TickWork() { - std::unique_lock lck{work_request_mutex}; - while (!work_requests.empty()) { - auto request = work_requests.front(); + std::unique_lock lck{flush_request_mutex}; + while (!flush_requests.empty()) { + auto& request = flush_requests.front(); const u64 fence = request.fence; - work_requests.pop_front(); - work_request_mutex.unlock(); - switch (request.type) { - case RequestType::Flush: { - rasterizer->FlushRegion(request.flush.addr, request.flush.size); - break; - } - case RequestType::QueueFrame: { - Tegra::FramebufferConfig frame_info; - { - std::unique_lock lock(frame_requests_mutex); - const u64 searching_id = request.queue_frame.id; - auto it = std::find_if( - frame_queue_items.begin(), frame_queue_items.end(), - [searching_id](const FrameQueue& item) { return item.id == searching_id; }); - ASSERT(it != frame_queue_items.end()); - frame_info = it->frame_info; - frame_queue_items.erase(it); - } - renderer->SwapBuffers(&frame_info); - break; - } - default: { - LOG_ERROR(HW_GPU, "Unknown, work request"); - } - } - current_request_fence.store(fence, std::memory_order_release); - work_request_mutex.lock(); - request_cv.notify_all(); - } -} - -void GPU::QueueFrame(const Tegra::FramebufferConfig* framebuffer, - const Service::Nvidia::MultiFence& fences) { - std::unique_lock lock(frame_requests_mutex); - if (fences.num_fences == 0) { - u64 new_queue_id = frame_queue_ids++; - FrameQueue item{ - .frame_info = *framebuffer, - .id = new_queue_id, - }; - frame_queue_items.push_back(item); - RequestQueueFrame(new_queue_id); - return; - } - u64 new_id = frame_request_ids++; - FrameRequest request{ - .frame_info = *framebuffer, - .count = 0, - .id = new_id, - }; - std::unique_lock lck{pre_sync_mutex}; - for (size_t i = 0; i < fences.num_fences; i++) { - auto& fence = fences.fences[i]; - if (pre_syncpoints[fence.id].load(std::memory_order_relaxed) < fence.value) { - const FrameTrigger trigger{ - .id = new_id, - .sync_point_value = fence.value, - }; - frame_triggers[fence.id].push_back(trigger); - ++request.count; - } - } - if (request.count == 0) { - lck.unlock(); - u64 new_queue_id = frame_request_ids++; - FrameQueue item{ - .frame_info = *framebuffer, - .id = new_queue_id, - }; - frame_queue_items.push_back(item); - u64 fence = RequestQueueFrame(new_queue_id); - WaitOnWorkRequest(fence); - return; - } - frame_requests.emplace(new_id, request); -} - -void GPU::ProcessFrameRequests(u32 syncpoint_id, u32 new_value) { - auto& list = frame_triggers[syncpoint_id]; - if (list.empty()) { - return; - } - auto it = list.begin(); - while (it != list.end()) { - if (it->sync_point_value <= new_value) { - auto obj = frame_requests.find(it->id); - --obj->second.count; - if (obj->second.count == 0) { - renderer->SwapBuffers(&obj->second.frame_info); - frame_requests.erase(obj); - } - it = list.erase(it); - continue; - } - ++it; + const VAddr addr = request.addr; + const std::size_t size = request.size; + flush_requests.pop_front(); + flush_request_mutex.unlock(); + rasterizer->FlushRegion(addr, size); + current_flush_fence.store(fence); + flush_request_mutex.lock(); } } @@ -508,7 +399,7 @@ void GPU::ProcessFenceActionMethod() { WaitFence(regs.fence_action.syncpoint_id, regs.fence_value); break; case FenceOperation::Increment: - rasterizer->SignalSyncPoint(regs.fence_action.syncpoint_id); + IncrementSyncPoint(regs.fence_action.syncpoint_id); break; default: UNIMPLEMENTED_MSG("Unimplemented operation {}", regs.fence_action.op.Value()); diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h index 8d50758d5..a8e98e51b 100755 --- a/src/video_core/gpu.h +++ b/src/video_core/gpu.h @@ -159,16 +159,11 @@ public: void OnCommandListEnd(); /// Request a host GPU memory flush from the CPU. - u64 RequestFlush(VAddr addr, std::size_t size); - - void WaitOnWorkRequest(u64 fence); - - void QueueFrame(const Tegra::FramebufferConfig* framebuffer, - const Service::Nvidia::MultiFence& fence); + [[nodiscard]] u64 RequestFlush(VAddr addr, std::size_t size); /// Obtains current flush request fence id. - [[nodiscard]] u64 CurrentWorkRequestFence() const { - return current_request_fence.load(std::memory_order_relaxed); + [[nodiscard]] u64 CurrentFlushRequestFence() const { + return current_flush_fence.load(std::memory_order_relaxed); } /// Tick pending requests within the GPU. @@ -230,7 +225,6 @@ public: /// Allows the CPU/NvFlinger to wait on the GPU before presenting a frame. void WaitFence(u32 syncpoint_id, u32 value); - void IncrementSyncPointGuest(u32 syncpoint_id); void IncrementSyncPoint(u32 syncpoint_id); [[nodiscard]] u32 GetSyncpointValue(u32 syncpoint_id) const; @@ -371,34 +365,6 @@ private: /// Determines where the method should be executed. [[nodiscard]] bool ExecuteMethodOnEngine(u32 method); - struct FrameRequest { - Tegra::FramebufferConfig frame_info; - size_t count; - u64 id; - }; - - struct FrameTrigger { - u64 id; - u32 sync_point_value; - }; - - struct FrameQueue { - Tegra::FramebufferConfig frame_info; - u64 id; - }; - - /// Request a frame release on the GPU thread - u64 RequestQueueFrame(u64 id); - - void ProcessFrameRequests(u32 syncpoint_id, u32 new_value); - - std::mutex frame_requests_mutex; - std::unordered_map> frame_triggers; - std::unordered_map frame_requests; - std::list frame_queue_items; - u64 frame_queue_ids{}; - u64 frame_request_ids{}; - protected: Core::System& system; std::unique_ptr memory_manager; @@ -426,50 +392,27 @@ private: /// When true, we are about to shut down emulation session, so terminate outstanding tasks std::atomic_bool shutting_down{}; - std::array, Service::Nvidia::MaxSyncPoints> pre_syncpoints{}; std::array, Service::Nvidia::MaxSyncPoints> syncpoints{}; std::array, Service::Nvidia::MaxSyncPoints> syncpt_interrupts; - std::mutex pre_sync_mutex; std::mutex sync_mutex; std::mutex device_mutex; std::condition_variable sync_cv; - enum class RequestType : u32 { - Flush = 0, - QueueFrame = 1, + struct FlushRequest { + explicit FlushRequest(u64 fence_, VAddr addr_, std::size_t size_) + : fence{fence_}, addr{addr_}, size{size_} {} + u64 fence; + VAddr addr; + std::size_t size; }; - struct WorkRequest { - explicit WorkRequest(u64 fence_, VAddr addr_, std::size_t size_) - : fence{fence_}, type{RequestType::Flush} { - flush.addr = addr_; - flush.size = size_; - } - - explicit WorkRequest(u64 fence_, u64 id) : fence{fence_}, type{RequestType::QueueFrame} { - queue_frame.id = id; - } - u64 fence; - union { - struct { - VAddr addr; - std::size_t size; - } flush; - struct { - u64 id; - } queue_frame; - }; - RequestType type; - }; // namespace Tegra - - std::list work_requests; - std::atomic current_request_fence{}; - u64 last_request_fence{}; - std::mutex work_request_mutex; - std::condition_variable request_cv; + std::list flush_requests; + std::atomic current_flush_fence{}; + u64 last_flush_fence{}; + std::mutex flush_request_mutex; const bool is_async; diff --git a/src/video_core/gpu_thread.cpp b/src/video_core/gpu_thread.cpp index 3470064f3..46f642b19 100755 --- a/src/video_core/gpu_thread.cpp +++ b/src/video_core/gpu_thread.cpp @@ -105,7 +105,7 @@ void ThreadManager::FlushRegion(VAddr addr, u64 size) { auto& gpu = system.GPU(); u64 fence = gpu.RequestFlush(addr, size); PushCommand(GPUTickCommand(), true); - ASSERT(fence <= gpu.CurrentWorkRequestFence()); + ASSERT(fence <= gpu.CurrentFlushRequestFence()); } void ThreadManager::InvalidateRegion(VAddr addr, u64 size) { diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 28d881e86..d5169e9c4 100755 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -214,8 +214,6 @@ void RasterizerOpenGL::Clear() { void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) { MICROPROFILE_SCOPE(OpenGL_Drawing); - SCOPE_EXIT({ gpu.TickWork(); }); - query_cache.UpdateCounters(); SyncState(); @@ -271,6 +269,8 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) { ++num_queued_commands; has_written_global_memory |= pipeline->WritesGlobalMemory(); + + gpu.TickWork(); } void RasterizerOpenGL::DispatchCompute() { @@ -421,7 +421,6 @@ void RasterizerOpenGL::SignalSemaphore(GPUVAddr addr, u32 value) { } void RasterizerOpenGL::SignalSyncPoint(u32 value) { - gpu.IncrementSyncPointGuest(value); if (!gpu.IsAsync()) { gpu.IncrementSyncPoint(value); return; diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index e7594796a..c6e093718 100755 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -412,7 +412,6 @@ void RasterizerVulkan::SignalSemaphore(GPUVAddr addr, u32 value) { } void RasterizerVulkan::SignalSyncPoint(u32 value) { - gpu.IncrementSyncPointGuest(value); if (!gpu.IsAsync()) { gpu.IncrementSyncPoint(value); return;