early-access version 2041

This commit is contained in:
pineappleEA 2021-09-09 00:11:45 +02:00
parent 5e020e5a75
commit d189d8fac4
17 changed files with 72 additions and 243 deletions

View File

@ -1,7 +1,7 @@
yuzu emulator early access
=============
This is the source code for early-access 2040.
This is the source code for early-access 2041.
## Legal Notice

View File

@ -491,7 +491,7 @@ public:
class EnsureTokenIdCacheAsyncInterface final : public IAsyncContext {
public:
explicit EnsureTokenIdCacheAsyncInterface(Core::System& system_) : IAsyncContext(system_) {
explicit EnsureTokenIdCacheAsyncInterface(Core::System& system_) : IAsyncContext{system_} {
MarkComplete();
}
~EnsureTokenIdCacheAsyncInterface() = default;
@ -504,13 +504,13 @@ public:
}
protected:
bool IsComplete() override {
bool IsComplete() const override {
return true;
}
void Cancel() override {}
ResultCode GetResult() override {
ResultCode GetResult() const override {
return ResultSuccess;
}
};
@ -518,7 +518,9 @@ protected:
class IManagerForApplication final : public ServiceFramework<IManagerForApplication> {
public:
explicit IManagerForApplication(Core::System& system_, Common::UUID user_id_)
: ServiceFramework{system_, "IManagerForApplication"}, user_id{user_id_}, system(system_) {
: ServiceFramework{system_, "IManagerForApplication"},
ensure_token_id{std::make_shared<EnsureTokenIdCacheAsyncInterface>(system)},
user_id{user_id_} {
// clang-format off
static const FunctionInfo functions[] = {
{0, &IManagerForApplication::CheckAvailability, "CheckAvailability"},
@ -533,8 +535,6 @@ public:
// clang-format on
RegisterHandlers(functions);
ensure_token_id = std::make_shared<EnsureTokenIdCacheAsyncInterface>(system);
}
private:
@ -591,7 +591,6 @@ private:
std::shared_ptr<EnsureTokenIdCacheAsyncInterface> ensure_token_id{};
Common::UUID user_id{Common::INVALID_UUID};
Core::System& system;
};
// 6.0.0+

View File

@ -14,12 +14,12 @@ IAsyncContext::IAsyncContext(Core::System& system_)
compeletion_event.Initialize("IAsyncContext:CompletionEvent");
// clang-format off
static const FunctionInfo functions[] = {
{0, &IAsyncContext::GetSystemEvent, "GetSystemEvent"},
{1, &IAsyncContext::Cancel, "Cancel"},
{2, &IAsyncContext::HasDone, "HasDone"},
{3, &IAsyncContext::GetResult, "GetResult"},
};
static const FunctionInfo functions[] = {
{0, &IAsyncContext::GetSystemEvent, "GetSystemEvent"},
{1, &IAsyncContext::Cancel, "Cancel"},
{2, &IAsyncContext::HasDone, "HasDone"},
{3, &IAsyncContext::GetResult, "GetResult"},
};
// clang-format on
RegisterHandlers(functions);
@ -46,11 +46,11 @@ void IAsyncContext::Cancel(Kernel::HLERequestContext& ctx) {
void IAsyncContext::HasDone(Kernel::HLERequestContext& ctx) {
LOG_DEBUG(Service_ACC, "called");
is_complete = IsComplete();
is_complete.store(IsComplete());
IPC::ResponseBuilder rb{ctx, 3};
rb.Push(ResultSuccess);
rb.Push(is_complete);
rb.Push(is_complete.load());
}
void IAsyncContext::GetResult(Kernel::HLERequestContext& ctx) {
@ -61,7 +61,7 @@ void IAsyncContext::GetResult(Kernel::HLERequestContext& ctx) {
}
void IAsyncContext::MarkComplete() {
is_complete = true;
is_complete.store(true);
compeletion_event.GetWritableEvent().Signal();
}

View File

@ -4,6 +4,7 @@
#pragma once
#include <atomic>
#include "core/hle/kernel/k_event.h"
#include "core/hle/service/service.h"
@ -23,13 +24,13 @@ public:
void GetResult(Kernel::HLERequestContext& ctx);
protected:
virtual bool IsComplete() = 0;
virtual bool IsComplete() const = 0;
virtual void Cancel() = 0;
virtual ResultCode GetResult() = 0;
virtual ResultCode GetResult() const = 0;
void MarkComplete();
bool is_complete{false};
std::atomic<bool> is_complete{false};
Kernel::KEvent compeletion_event;
};

View File

@ -98,7 +98,7 @@ ResultCode VfsDirectoryServiceWrapper::DeleteFile(const std::string& path_) cons
ResultCode VfsDirectoryServiceWrapper::CreateDirectory(const std::string& path_) const {
std::string path(Common::FS::SanitizePath(path_));
const auto components = Common::FS::SplitPathComponents(path);
std::string relative_path = "";
std::string relative_path;
for (const auto& component : components) {
// Skip empty path components
if (component.empty()) {

View File

@ -5,8 +5,10 @@
#include "common/assert.h"
#include "common/logging/log.h"
#include "core/core.h"
#include "core/core_timing.h"
#include "core/hle/service/nvdrv/devices/nvdisp_disp0.h"
#include "core/hle/service/nvdrv/devices/nvmap.h"
#include "core/perf_stats.h"
#include "video_core/gpu.h"
#include "video_core/renderer_base.h"
@ -39,7 +41,7 @@ void nvdisp_disp0::OnClose(DeviceFD fd) {}
void nvdisp_disp0::flip(u32 buffer_handle, u32 offset, u32 format, u32 width, u32 height,
u32 stride, NVFlinger::BufferQueue::BufferTransformFlags transform,
const Common::Rectangle<int>& crop_rect, const MultiFence& fences) {
const Common::Rectangle<int>& crop_rect) {
VAddr addr = nvmap_dev->GetObjectAddress(buffer_handle);
LOG_TRACE(Service,
"Drawing from address {:X} offset {:08X} Width {} Height {} Stride {} Format {}",
@ -50,7 +52,10 @@ void nvdisp_disp0::flip(u32 buffer_handle, u32 offset, u32 format, u32 width, u3
addr, offset, width, height, stride, static_cast<PixelFormat>(format),
transform, crop_rect};
system.GPU().QueueFrame(&framebuffer, fences);
system.GetPerfStats().EndSystemFrame();
system.GPU().SwapBuffers(&framebuffer);
system.SpeedLimiter().DoSpeedLimiting(system.CoreTiming().GetGlobalTimeUs());
system.GetPerfStats().BeginSystemFrame();
}
} // namespace Service::Nvidia::Devices

View File

@ -33,7 +33,7 @@ public:
/// Performs a screen flip, drawing the buffer pointed to by the handle.
void flip(u32 buffer_handle, u32 offset, u32 format, u32 width, u32 height, u32 stride,
NVFlinger::BufferQueue::BufferTransformFlags transform,
const Common::Rectangle<int>& crop_rect, const MultiFence& fence);
const Common::Rectangle<int>& crop_rect);
private:
std::shared_ptr<nvmap> nvmap_dev;

View File

@ -91,10 +91,6 @@ const IGBPBuffer& BufferQueue::RequestBuffer(u32 slot) const {
return buffers[slot].igbp_buffer;
}
const BufferQueue::Buffer& BufferQueue::AccessBuffer(u32 slot) const {
return buffers[slot];
}
void BufferQueue::QueueBuffer(u32 slot, BufferTransformFlags transform,
const Common::Rectangle<int>& crop_rect, u32 swap_interval,
Service::Nvidia::MultiFence& multi_fence) {

View File

@ -112,7 +112,6 @@ public:
void Connect();
void Disconnect();
u32 Query(QueryType type);
const Buffer& AccessBuffer(u32 slot) const;
u32 GetId() const {
return id;

View File

@ -275,6 +275,8 @@ void NVFlinger::Compose() {
continue;
}
const auto& igbp_buffer = buffer->get().igbp_buffer;
if (!system.IsPoweredOn()) {
return; // We are likely shutting down
}
@ -288,31 +290,23 @@ void NVFlinger::Compose() {
}
guard->lock();
system.GetPerfStats().EndSystemFrame();
MicroProfileFlip();
system.SpeedLimiter().DoSpeedLimiting(system.CoreTiming().GetGlobalTimeUs());
system.GetPerfStats().BeginSystemFrame();
// Now send the buffer to the GPU for drawing.
// TODO(Subv): Support more than just disp0. The display device selection is probably based
// on which display we're drawing (Default, Internal, External, etc)
auto nvdisp = nvdrv->GetDevice<Nvidia::Devices::nvdisp_disp0>("/dev/nvdisp_disp0");
ASSERT(nvdisp);
nvdisp->flip(igbp_buffer.gpu_buffer_id, igbp_buffer.offset, igbp_buffer.format,
igbp_buffer.width, igbp_buffer.height, igbp_buffer.stride,
buffer->get().transform, buffer->get().crop_rect);
swap_interval = buffer->get().swap_interval;
buffer_queue.ReleaseBuffer(buffer->get().slot);
}
}
void NVFlinger::PrequeueFrame(u32 buffer_queue_id, u32 slot) {
auto& buffer_queue = *FindBufferQueue(buffer_queue_id);
const auto& buffer = buffer_queue.AccessBuffer(slot);
const auto& igbp_buffer = buffer.igbp_buffer;
// Now send the buffer to the GPU for drawing.
// TODO(Subv): Support more than just disp0. The display device selection is probably based
// on which display we're drawing (Default, Internal, External, etc)
auto nvdisp = nvdrv->GetDevice<Nvidia::Devices::nvdisp_disp0>("/dev/nvdisp_disp0");
ASSERT(nvdisp);
nvdisp->flip(igbp_buffer.gpu_buffer_id, igbp_buffer.offset, igbp_buffer.format,
igbp_buffer.width, igbp_buffer.height, igbp_buffer.stride, buffer.transform,
buffer.crop_rect, buffer.multi_fence);
}
s64 NVFlinger::GetNextTicks() const {
static constexpr s64 max_hertz = 120LL;

View File

@ -78,8 +78,6 @@ public:
/// Obtains a buffer queue identified by the ID.
[[nodiscard]] BufferQueue* FindBufferQueue(u32 id);
void PrequeueFrame(u32 buffer_queue_id, u32 slot);
/// Performs a composition request to the emulated nvidia GPU and triggers the vsync events when
/// finished.
void Compose();

View File

@ -592,7 +592,6 @@ private:
buffer_queue.QueueBuffer(request.data.slot, request.data.transform,
request.data.GetCropRect(), request.data.swap_interval,
request.data.multi_fence);
nv_flinger.PrequeueFrame(id, request.data.slot);
IGBPQueueBufferResponseParcel response{1280, 720};
ctx.WriteBuffer(response.Serialize());

View File

@ -114,17 +114,10 @@ void GPU::WaitFence(u32 syncpoint_id, u32 value) {
});
}
void GPU::IncrementSyncPointGuest(const u32 syncpoint_id) {
std::lock_guard lock{pre_sync_mutex};
auto& syncpoint = pre_syncpoints.at(syncpoint_id);
syncpoint++;
ProcessFrameRequests(syncpoint_id, syncpoint);
}
void GPU::IncrementSyncPoint(const u32 syncpoint_id) {
std::lock_guard lock{sync_mutex};
auto& syncpoint = syncpoints.at(syncpoint_id);
syncpoint++;
std::lock_guard lock{sync_mutex};
sync_cv.notify_all();
auto& interrupt = syncpt_interrupts.at(syncpoint_id);
if (!interrupt.empty()) {
@ -169,121 +162,25 @@ bool GPU::CancelSyncptInterrupt(const u32 syncpoint_id, const u32 value) {
return true;
}
void GPU::WaitOnWorkRequest(u64 fence) {
std::unique_lock lck{work_request_mutex};
request_cv.wait(lck,
[&] { return fence >= current_request_fence.load(std::memory_order_relaxed); });
}
u64 GPU::RequestFlush(VAddr addr, std::size_t size) {
std::unique_lock lck{work_request_mutex};
const u64 fence = ++last_request_fence;
work_requests.emplace_back(fence, addr, size);
return fence;
}
u64 GPU::RequestQueueFrame(u64 id) {
std::unique_lock lck{work_request_mutex};
const u64 fence = ++last_request_fence;
work_requests.emplace_back(fence, id);
std::unique_lock lck{flush_request_mutex};
const u64 fence = ++last_flush_fence;
flush_requests.emplace_back(fence, addr, size);
return fence;
}
void GPU::TickWork() {
std::unique_lock lck{work_request_mutex};
while (!work_requests.empty()) {
auto request = work_requests.front();
std::unique_lock lck{flush_request_mutex};
while (!flush_requests.empty()) {
auto& request = flush_requests.front();
const u64 fence = request.fence;
work_requests.pop_front();
work_request_mutex.unlock();
switch (request.type) {
case RequestType::Flush: {
rasterizer->FlushRegion(request.flush.addr, request.flush.size);
break;
}
case RequestType::QueueFrame: {
Tegra::FramebufferConfig frame_info;
{
std::unique_lock<std::mutex> lock(frame_requests_mutex);
const u64 searching_id = request.queue_frame.id;
auto it = std::find_if(
frame_queue_items.begin(), frame_queue_items.end(),
[searching_id](const FrameQueue& item) { return item.id == searching_id; });
ASSERT(it != frame_queue_items.end());
frame_info = it->frame_info;
frame_queue_items.erase(it);
}
renderer->SwapBuffers(&frame_info);
break;
}
default: {
LOG_ERROR(HW_GPU, "Unknown work request type={}", request.type);
}
}
current_request_fence.store(fence, std::memory_order_release);
work_request_mutex.lock();
request_cv.notify_all();
}
}
void GPU::QueueFrame(const Tegra::FramebufferConfig* framebuffer,
const Service::Nvidia::MultiFence& fences) {
std::unique_lock<std::mutex> lock(frame_requests_mutex);
if (fences.num_fences == 0) {
u64 new_queue_id = frame_queue_ids++;
FrameQueue item{
.frame_info = *framebuffer,
.id = new_queue_id,
};
frame_queue_items.push_back(item);
RequestQueueFrame(new_queue_id);
return;
}
u64 new_id = frame_request_ids++;
FrameRequest request{
.frame_info = *framebuffer,
.count = 0,
.id = new_id,
};
std::unique_lock lck{pre_sync_mutex};
for (size_t i = 0; i < fences.num_fences; i++) {
auto& fence = fences.fences[i];
if (pre_syncpoints[fence.id].load(std::memory_order_relaxed) < fence.value) {
const FrameTrigger trigger{
.id = new_id,
.sync_point_value = fence.value,
};
frame_triggers[fence.id].push_back(trigger);
++request.count;
}
}
if (request.count == 0) {
lck.unlock();
gpu_thread.SwapBuffers(framebuffer);
return;
}
frame_requests.emplace(new_id, request);
}
void GPU::ProcessFrameRequests(u32 syncpoint_id, u32 new_value) {
auto& list = frame_triggers[syncpoint_id];
if (list.empty()) {
return;
}
auto it = list.begin();
while (it != list.end()) {
if (it->sync_point_value <= new_value) {
auto obj = frame_requests.find(it->id);
--obj->second.count;
if (obj->second.count == 0) {
rasterizer->FlushCommands();
renderer->SwapBuffers(&obj->second.frame_info);
frame_requests.erase(obj);
}
it = list.erase(it);
continue;
}
++it;
const VAddr addr = request.addr;
const std::size_t size = request.size;
flush_requests.pop_front();
flush_request_mutex.unlock();
rasterizer->FlushRegion(addr, size);
current_flush_fence.store(fence);
flush_request_mutex.lock();
}
}
@ -502,7 +399,7 @@ void GPU::ProcessFenceActionMethod() {
WaitFence(regs.fence_action.syncpoint_id, regs.fence_value);
break;
case FenceOperation::Increment:
rasterizer->SignalSyncPoint(regs.fence_action.syncpoint_id);
IncrementSyncPoint(regs.fence_action.syncpoint_id);
break;
default:
UNIMPLEMENTED_MSG("Unimplemented operation {}", regs.fence_action.op.Value());

View File

@ -159,16 +159,11 @@ public:
void OnCommandListEnd();
/// Request a host GPU memory flush from the CPU.
u64 RequestFlush(VAddr addr, std::size_t size);
void WaitOnWorkRequest(u64 fence);
void QueueFrame(const Tegra::FramebufferConfig* framebuffer,
const Service::Nvidia::MultiFence& fence);
[[nodiscard]] u64 RequestFlush(VAddr addr, std::size_t size);
/// Obtains current flush request fence id.
[[nodiscard]] u64 CurrentWorkRequestFence() const {
return current_request_fence.load(std::memory_order_relaxed);
[[nodiscard]] u64 CurrentFlushRequestFence() const {
return current_flush_fence.load(std::memory_order_relaxed);
}
/// Tick pending requests within the GPU.
@ -230,7 +225,6 @@ public:
/// Allows the CPU/NvFlinger to wait on the GPU before presenting a frame.
void WaitFence(u32 syncpoint_id, u32 value);
void IncrementSyncPointGuest(u32 syncpoint_id);
void IncrementSyncPoint(u32 syncpoint_id);
[[nodiscard]] u32 GetSyncpointValue(u32 syncpoint_id) const;
@ -371,34 +365,6 @@ private:
/// Determines where the method should be executed.
[[nodiscard]] bool ExecuteMethodOnEngine(u32 method);
struct FrameRequest {
Tegra::FramebufferConfig frame_info;
size_t count;
u64 id;
};
struct FrameTrigger {
u64 id;
u32 sync_point_value;
};
struct FrameQueue {
Tegra::FramebufferConfig frame_info;
u64 id;
};
/// Request a frame release on the GPU thread
u64 RequestQueueFrame(u64 id);
void ProcessFrameRequests(u32 syncpoint_id, u32 new_value);
std::mutex frame_requests_mutex;
std::unordered_map<u32, std::list<FrameTrigger>> frame_triggers;
std::unordered_map<u64, FrameRequest> frame_requests;
std::list<FrameQueue> frame_queue_items;
u64 frame_queue_ids{};
u64 frame_request_ids{};
protected:
Core::System& system;
std::unique_ptr<Tegra::MemoryManager> memory_manager;
@ -426,50 +392,27 @@ private:
/// When true, we are about to shut down emulation session, so terminate outstanding tasks
std::atomic_bool shutting_down{};
std::array<std::atomic<u32>, Service::Nvidia::MaxSyncPoints> pre_syncpoints{};
std::array<std::atomic<u32>, Service::Nvidia::MaxSyncPoints> syncpoints{};
std::array<std::list<u32>, Service::Nvidia::MaxSyncPoints> syncpt_interrupts;
std::mutex pre_sync_mutex;
std::mutex sync_mutex;
std::mutex device_mutex;
std::condition_variable sync_cv;
enum class RequestType : u32 {
Flush = 0,
QueueFrame = 1,
struct FlushRequest {
explicit FlushRequest(u64 fence_, VAddr addr_, std::size_t size_)
: fence{fence_}, addr{addr_}, size{size_} {}
u64 fence;
VAddr addr;
std::size_t size;
};
struct WorkRequest {
explicit WorkRequest(u64 fence_, VAddr addr_, std::size_t size_)
: fence{fence_}, type{RequestType::Flush} {
flush.addr = addr_;
flush.size = size_;
}
explicit WorkRequest(u64 fence_, u64 id) : fence{fence_}, type{RequestType::QueueFrame} {
queue_frame.id = id;
}
u64 fence;
union {
struct {
VAddr addr;
std::size_t size;
} flush;
struct {
u64 id;
} queue_frame;
};
RequestType type;
}; // namespace Tegra
std::list<WorkRequest> work_requests;
std::atomic<u64> current_request_fence{};
u64 last_request_fence{};
std::mutex work_request_mutex;
std::condition_variable request_cv;
std::list<FlushRequest> flush_requests;
std::atomic<u64> current_flush_fence{};
u64 last_flush_fence{};
std::mutex flush_request_mutex;
const bool is_async;

View File

@ -105,7 +105,7 @@ void ThreadManager::FlushRegion(VAddr addr, u64 size) {
auto& gpu = system.GPU();
u64 fence = gpu.RequestFlush(addr, size);
PushCommand(GPUTickCommand(), true);
ASSERT(fence <= gpu.CurrentWorkRequestFence());
ASSERT(fence <= gpu.CurrentFlushRequestFence());
}
void ThreadManager::InvalidateRegion(VAddr addr, u64 size) {

View File

@ -214,8 +214,6 @@ void RasterizerOpenGL::Clear() {
void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
MICROPROFILE_SCOPE(OpenGL_Drawing);
SCOPE_EXIT({ gpu.TickWork(); });
query_cache.UpdateCounters();
SyncState();
@ -271,6 +269,8 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
++num_queued_commands;
has_written_global_memory |= pipeline->WritesGlobalMemory();
gpu.TickWork();
}
void RasterizerOpenGL::DispatchCompute() {
@ -401,7 +401,6 @@ void RasterizerOpenGL::SignalSemaphore(GPUVAddr addr, u32 value) {
}
void RasterizerOpenGL::SignalSyncPoint(u32 value) {
gpu.IncrementSyncPointGuest(value);
if (!gpu.IsAsync()) {
gpu.IncrementSyncPoint(value);
return;

View File

@ -393,7 +393,6 @@ void RasterizerVulkan::SignalSemaphore(GPUVAddr addr, u32 value) {
}
void RasterizerVulkan::SignalSyncPoint(u32 value) {
gpu.IncrementSyncPointGuest(value);
if (!gpu.IsAsync()) {
gpu.IncrementSyncPoint(value);
return;