nvdec syncpt incorporation
laying the groundwork for async gpu, although this does not fully implement async nvdec operations
This commit is contained in:
parent
bcb702fa3e
commit
2c27127d04
11 changed files with 59 additions and 37 deletions
|
@ -11,8 +11,9 @@
|
||||||
|
|
||||||
namespace Service::Nvidia::Devices {
|
namespace Service::Nvidia::Devices {
|
||||||
|
|
||||||
nvhost_nvdec::nvhost_nvdec(Core::System& system, std::shared_ptr<nvmap> nvmap_dev)
|
nvhost_nvdec::nvhost_nvdec(Core::System& system, std::shared_ptr<nvmap> nvmap_dev,
|
||||||
: nvhost_nvdec_common(system, std::move(nvmap_dev)) {}
|
SyncpointManager& syncpoint_manager)
|
||||||
|
: nvhost_nvdec_common(system, std::move(nvmap_dev), syncpoint_manager) {}
|
||||||
nvhost_nvdec::~nvhost_nvdec() = default;
|
nvhost_nvdec::~nvhost_nvdec() = default;
|
||||||
|
|
||||||
NvResult nvhost_nvdec::Ioctl1(Ioctl command, const std::vector<u8>& input,
|
NvResult nvhost_nvdec::Ioctl1(Ioctl command, const std::vector<u8>& input,
|
||||||
|
|
|
@ -11,7 +11,8 @@ namespace Service::Nvidia::Devices {
|
||||||
|
|
||||||
class nvhost_nvdec final : public nvhost_nvdec_common {
|
class nvhost_nvdec final : public nvhost_nvdec_common {
|
||||||
public:
|
public:
|
||||||
explicit nvhost_nvdec(Core::System& system, std::shared_ptr<nvmap> nvmap_dev);
|
explicit nvhost_nvdec(Core::System& system, std::shared_ptr<nvmap> nvmap_dev,
|
||||||
|
SyncpointManager& syncpoint_manager);
|
||||||
~nvhost_nvdec() override;
|
~nvhost_nvdec() override;
|
||||||
|
|
||||||
NvResult Ioctl1(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) override;
|
NvResult Ioctl1(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) override;
|
||||||
|
|
|
@ -11,6 +11,7 @@
|
||||||
#include "core/core.h"
|
#include "core/core.h"
|
||||||
#include "core/hle/service/nvdrv/devices/nvhost_nvdec_common.h"
|
#include "core/hle/service/nvdrv/devices/nvhost_nvdec_common.h"
|
||||||
#include "core/hle/service/nvdrv/devices/nvmap.h"
|
#include "core/hle/service/nvdrv/devices/nvmap.h"
|
||||||
|
#include "core/hle/service/nvdrv/syncpoint_manager.h"
|
||||||
#include "core/memory.h"
|
#include "core/memory.h"
|
||||||
#include "video_core/memory_manager.h"
|
#include "video_core/memory_manager.h"
|
||||||
#include "video_core/renderer_base.h"
|
#include "video_core/renderer_base.h"
|
||||||
|
@ -36,8 +37,9 @@ std::size_t WriteVectors(std::vector<u8>& dst, const std::vector<T>& src, std::s
|
||||||
}
|
}
|
||||||
} // Anonymous namespace
|
} // Anonymous namespace
|
||||||
|
|
||||||
nvhost_nvdec_common::nvhost_nvdec_common(Core::System& system, std::shared_ptr<nvmap> nvmap_dev)
|
nvhost_nvdec_common::nvhost_nvdec_common(Core::System& system, std::shared_ptr<nvmap> nvmap_dev,
|
||||||
: nvdevice(system), nvmap_dev(std::move(nvmap_dev)) {}
|
SyncpointManager& syncpoint_manager)
|
||||||
|
: nvdevice(system), nvmap_dev(std::move(nvmap_dev)), syncpoint_manager(syncpoint_manager) {}
|
||||||
nvhost_nvdec_common::~nvhost_nvdec_common() = default;
|
nvhost_nvdec_common::~nvhost_nvdec_common() = default;
|
||||||
|
|
||||||
NvResult nvhost_nvdec_common::SetNVMAPfd(const std::vector<u8>& input) {
|
NvResult nvhost_nvdec_common::SetNVMAPfd(const std::vector<u8>& input) {
|
||||||
|
@ -71,10 +73,14 @@ NvResult nvhost_nvdec_common::Submit(const std::vector<u8>& input, std::vector<u
|
||||||
offset = SpliceVectors(input, wait_checks, params.syncpoint_count, offset);
|
offset = SpliceVectors(input, wait_checks, params.syncpoint_count, offset);
|
||||||
offset = SpliceVectors(input, fences, params.fence_count, offset);
|
offset = SpliceVectors(input, fences, params.fence_count, offset);
|
||||||
|
|
||||||
// TODO(ameerj): For async gpu, utilize fences for syncpoint 'max' increment
|
for (std::size_t i = 0; i < syncpt_increments.size(); i++) {
|
||||||
|
SyncptIncr syncpt_incr = syncpt_increments[i];
|
||||||
|
|
||||||
|
fences[i].id = syncpt_incr.id;
|
||||||
|
fences[i].value =
|
||||||
|
syncpoint_manager.IncreaseSyncpoint(syncpt_incr.id, syncpt_incr.increments);
|
||||||
|
}
|
||||||
auto& gpu = system.GPU();
|
auto& gpu = system.GPU();
|
||||||
|
|
||||||
for (const auto& cmd_buffer : command_buffers) {
|
for (const auto& cmd_buffer : command_buffers) {
|
||||||
auto object = nvmap_dev->GetObject(cmd_buffer.memory_id);
|
auto object = nvmap_dev->GetObject(cmd_buffer.memory_id);
|
||||||
ASSERT_OR_EXECUTE(object, return NvResult::InvalidState;);
|
ASSERT_OR_EXECUTE(object, return NvResult::InvalidState;);
|
||||||
|
@ -89,6 +95,10 @@ NvResult nvhost_nvdec_common::Submit(const std::vector<u8>& input, std::vector<u
|
||||||
cmdlist.size() * sizeof(u32));
|
cmdlist.size() * sizeof(u32));
|
||||||
gpu.PushCommandBuffer(cmdlist);
|
gpu.PushCommandBuffer(cmdlist);
|
||||||
}
|
}
|
||||||
|
fences[0].value = syncpoint_manager.IncreaseSyncpoint(fences[0].id, 1);
|
||||||
|
|
||||||
|
Tegra::ChCommandHeaderList cmdlist{{(4 << 28) | fences[0].id}};
|
||||||
|
gpu.PushCommandBuffer(cmdlist);
|
||||||
|
|
||||||
std::memcpy(output.data(), ¶ms, sizeof(IoctlSubmit));
|
std::memcpy(output.data(), ¶ms, sizeof(IoctlSubmit));
|
||||||
// Some games expect command_buffers to be written back
|
// Some games expect command_buffers to be written back
|
||||||
|
@ -98,6 +108,7 @@ NvResult nvhost_nvdec_common::Submit(const std::vector<u8>& input, std::vector<u
|
||||||
offset = WriteVectors(output, reloc_shifts, offset);
|
offset = WriteVectors(output, reloc_shifts, offset);
|
||||||
offset = WriteVectors(output, syncpt_increments, offset);
|
offset = WriteVectors(output, syncpt_increments, offset);
|
||||||
offset = WriteVectors(output, wait_checks, offset);
|
offset = WriteVectors(output, wait_checks, offset);
|
||||||
|
offset = WriteVectors(output, fences, offset);
|
||||||
|
|
||||||
return NvResult::Success;
|
return NvResult::Success;
|
||||||
}
|
}
|
||||||
|
@ -107,9 +118,10 @@ NvResult nvhost_nvdec_common::GetSyncpoint(const std::vector<u8>& input, std::ve
|
||||||
std::memcpy(¶ms, input.data(), sizeof(IoctlGetSyncpoint));
|
std::memcpy(¶ms, input.data(), sizeof(IoctlGetSyncpoint));
|
||||||
LOG_DEBUG(Service_NVDRV, "called GetSyncpoint, id={}", params.param);
|
LOG_DEBUG(Service_NVDRV, "called GetSyncpoint, id={}", params.param);
|
||||||
|
|
||||||
// We found that implementing this causes deadlocks with async gpu, along with degraded
|
if (device_syncpoints[params.param] == 0) {
|
||||||
// performance. TODO: RE the nvdec async implementation
|
device_syncpoints[params.param] = syncpoint_manager.AllocateSyncpoint();
|
||||||
params.value = 0;
|
}
|
||||||
|
params.value = device_syncpoints[params.param];
|
||||||
std::memcpy(output.data(), ¶ms, sizeof(IoctlGetSyncpoint));
|
std::memcpy(output.data(), ¶ms, sizeof(IoctlGetSyncpoint));
|
||||||
|
|
||||||
return NvResult::Success;
|
return NvResult::Success;
|
||||||
|
|
|
@ -10,12 +10,16 @@
|
||||||
#include "common/swap.h"
|
#include "common/swap.h"
|
||||||
#include "core/hle/service/nvdrv/devices/nvdevice.h"
|
#include "core/hle/service/nvdrv/devices/nvdevice.h"
|
||||||
|
|
||||||
namespace Service::Nvidia::Devices {
|
namespace Service::Nvidia {
|
||||||
|
class SyncpointManager;
|
||||||
|
|
||||||
|
namespace Devices {
|
||||||
class nvmap;
|
class nvmap;
|
||||||
|
|
||||||
class nvhost_nvdec_common : public nvdevice {
|
class nvhost_nvdec_common : public nvdevice {
|
||||||
public:
|
public:
|
||||||
explicit nvhost_nvdec_common(Core::System& system, std::shared_ptr<nvmap> nvmap_dev);
|
explicit nvhost_nvdec_common(Core::System& system, std::shared_ptr<nvmap> nvmap_dev,
|
||||||
|
SyncpointManager& syncpoint_manager);
|
||||||
~nvhost_nvdec_common() override;
|
~nvhost_nvdec_common() override;
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
|
@ -157,8 +161,10 @@ protected:
|
||||||
s32_le nvmap_fd{};
|
s32_le nvmap_fd{};
|
||||||
u32_le submit_timeout{};
|
u32_le submit_timeout{};
|
||||||
std::shared_ptr<nvmap> nvmap_dev;
|
std::shared_ptr<nvmap> nvmap_dev;
|
||||||
|
SyncpointManager& syncpoint_manager;
|
||||||
|
std::array<u32, MaxSyncPoints> device_syncpoints{};
|
||||||
// This is expected to be ordered, therefore we must use a map, not unordered_map
|
// This is expected to be ordered, therefore we must use a map, not unordered_map
|
||||||
std::map<GPUVAddr, BufferMap> buffer_mappings;
|
std::map<GPUVAddr, BufferMap> buffer_mappings;
|
||||||
};
|
};
|
||||||
}; // namespace Service::Nvidia::Devices
|
}; // namespace Devices
|
||||||
|
} // namespace Service::Nvidia
|
||||||
|
|
|
@ -10,8 +10,9 @@
|
||||||
#include "video_core/renderer_base.h"
|
#include "video_core/renderer_base.h"
|
||||||
|
|
||||||
namespace Service::Nvidia::Devices {
|
namespace Service::Nvidia::Devices {
|
||||||
nvhost_vic::nvhost_vic(Core::System& system, std::shared_ptr<nvmap> nvmap_dev)
|
nvhost_vic::nvhost_vic(Core::System& system, std::shared_ptr<nvmap> nvmap_dev,
|
||||||
: nvhost_nvdec_common(system, std::move(nvmap_dev)) {}
|
SyncpointManager& syncpoint_manager)
|
||||||
|
: nvhost_nvdec_common(system, std::move(nvmap_dev), syncpoint_manager) {}
|
||||||
|
|
||||||
nvhost_vic::~nvhost_vic() = default;
|
nvhost_vic::~nvhost_vic() = default;
|
||||||
|
|
||||||
|
|
|
@ -7,11 +7,11 @@
|
||||||
#include "core/hle/service/nvdrv/devices/nvhost_nvdec_common.h"
|
#include "core/hle/service/nvdrv/devices/nvhost_nvdec_common.h"
|
||||||
|
|
||||||
namespace Service::Nvidia::Devices {
|
namespace Service::Nvidia::Devices {
|
||||||
class nvmap;
|
|
||||||
|
|
||||||
class nvhost_vic final : public nvhost_nvdec_common {
|
class nvhost_vic final : public nvhost_nvdec_common {
|
||||||
public:
|
public:
|
||||||
explicit nvhost_vic(Core::System& system, std::shared_ptr<nvmap> nvmap_dev);
|
explicit nvhost_vic(Core::System& system, std::shared_ptr<nvmap> nvmap_dev,
|
||||||
|
SyncpointManager& syncpoint_manager);
|
||||||
~nvhost_vic();
|
~nvhost_vic();
|
||||||
|
|
||||||
NvResult Ioctl1(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) override;
|
NvResult Ioctl1(Ioctl command, const std::vector<u8>& input, std::vector<u8>& output) override;
|
||||||
|
|
|
@ -55,9 +55,11 @@ Module::Module(Core::System& system) : syncpoint_manager{system.GPU()} {
|
||||||
devices["/dev/nvdisp_disp0"] = std::make_shared<Devices::nvdisp_disp0>(system, nvmap_dev);
|
devices["/dev/nvdisp_disp0"] = std::make_shared<Devices::nvdisp_disp0>(system, nvmap_dev);
|
||||||
devices["/dev/nvhost-ctrl"] =
|
devices["/dev/nvhost-ctrl"] =
|
||||||
std::make_shared<Devices::nvhost_ctrl>(system, events_interface, syncpoint_manager);
|
std::make_shared<Devices::nvhost_ctrl>(system, events_interface, syncpoint_manager);
|
||||||
devices["/dev/nvhost-nvdec"] = std::make_shared<Devices::nvhost_nvdec>(system, nvmap_dev);
|
devices["/dev/nvhost-nvdec"] =
|
||||||
|
std::make_shared<Devices::nvhost_nvdec>(system, nvmap_dev, syncpoint_manager);
|
||||||
devices["/dev/nvhost-nvjpg"] = std::make_shared<Devices::nvhost_nvjpg>(system);
|
devices["/dev/nvhost-nvjpg"] = std::make_shared<Devices::nvhost_nvjpg>(system);
|
||||||
devices["/dev/nvhost-vic"] = std::make_shared<Devices::nvhost_vic>(system, nvmap_dev);
|
devices["/dev/nvhost-vic"] =
|
||||||
|
std::make_shared<Devices::nvhost_vic>(system, nvmap_dev, syncpoint_manager);
|
||||||
}
|
}
|
||||||
|
|
||||||
Module::~Module() = default;
|
Module::~Module() = default;
|
||||||
|
|
|
@ -33,8 +33,7 @@ CDmaPusher::CDmaPusher(GPU& gpu_)
|
||||||
: gpu{gpu_}, nvdec_processor(std::make_shared<Nvdec>(gpu)),
|
: gpu{gpu_}, nvdec_processor(std::make_shared<Nvdec>(gpu)),
|
||||||
vic_processor(std::make_unique<Vic>(gpu, nvdec_processor)),
|
vic_processor(std::make_unique<Vic>(gpu, nvdec_processor)),
|
||||||
host1x_processor(std::make_unique<Host1x>(gpu)),
|
host1x_processor(std::make_unique<Host1x>(gpu)),
|
||||||
nvdec_sync(std::make_unique<SyncptIncrManager>(gpu)),
|
sync_manager(std::make_unique<SyncptIncrManager>(gpu)) {}
|
||||||
vic_sync(std::make_unique<SyncptIncrManager>(gpu)) {}
|
|
||||||
|
|
||||||
CDmaPusher::~CDmaPusher() = default;
|
CDmaPusher::~CDmaPusher() = default;
|
||||||
|
|
||||||
|
@ -110,10 +109,10 @@ void CDmaPusher::ExecuteCommand(u32 state_offset, u32 data) {
|
||||||
const auto syncpoint_id = static_cast<u32>(data & 0xFF);
|
const auto syncpoint_id = static_cast<u32>(data & 0xFF);
|
||||||
const auto cond = static_cast<u32>((data >> 8) & 0xFF);
|
const auto cond = static_cast<u32>((data >> 8) & 0xFF);
|
||||||
if (cond == 0) {
|
if (cond == 0) {
|
||||||
nvdec_sync->Increment(syncpoint_id);
|
sync_manager->Increment(syncpoint_id);
|
||||||
} else {
|
} else {
|
||||||
nvdec_sync->IncrementWhenDone(static_cast<u32>(current_class), syncpoint_id);
|
sync_manager->SignalDone(
|
||||||
nvdec_sync->SignalDone(syncpoint_id);
|
sync_manager->IncrementWhenDone(static_cast<u32>(current_class), syncpoint_id));
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
@ -135,10 +134,10 @@ void CDmaPusher::ExecuteCommand(u32 state_offset, u32 data) {
|
||||||
const auto syncpoint_id = static_cast<u32>(data & 0xFF);
|
const auto syncpoint_id = static_cast<u32>(data & 0xFF);
|
||||||
const auto cond = static_cast<u32>((data >> 8) & 0xFF);
|
const auto cond = static_cast<u32>((data >> 8) & 0xFF);
|
||||||
if (cond == 0) {
|
if (cond == 0) {
|
||||||
vic_sync->Increment(syncpoint_id);
|
sync_manager->Increment(syncpoint_id);
|
||||||
} else {
|
} else {
|
||||||
vic_sync->IncrementWhenDone(static_cast<u32>(current_class), syncpoint_id);
|
sync_manager->SignalDone(
|
||||||
vic_sync->SignalDone(syncpoint_id);
|
sync_manager->IncrementWhenDone(static_cast<u32>(current_class), syncpoint_id));
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
|
@ -116,12 +116,10 @@ private:
|
||||||
void ThiStateWrite(ThiRegisters& state, u32 state_offset, const std::vector<u32>& arguments);
|
void ThiStateWrite(ThiRegisters& state, u32 state_offset, const std::vector<u32>& arguments);
|
||||||
|
|
||||||
GPU& gpu;
|
GPU& gpu;
|
||||||
|
std::shared_ptr<Tegra::Nvdec> nvdec_processor;
|
||||||
std::shared_ptr<Nvdec> nvdec_processor;
|
std::unique_ptr<Tegra::Vic> vic_processor;
|
||||||
std::unique_ptr<Vic> vic_processor;
|
std::unique_ptr<Tegra::Host1x> host1x_processor;
|
||||||
std::unique_ptr<Host1x> host1x_processor;
|
std::unique_ptr<SyncptIncrManager> sync_manager;
|
||||||
std::unique_ptr<SyncptIncrManager> nvdec_sync;
|
|
||||||
std::unique_ptr<SyncptIncrManager> vic_sync;
|
|
||||||
ChClassId current_class{};
|
ChClassId current_class{};
|
||||||
ThiRegisters vic_thi_state{};
|
ThiRegisters vic_thi_state{};
|
||||||
ThiRegisters nvdec_thi_state{};
|
ThiRegisters nvdec_thi_state{};
|
||||||
|
|
|
@ -34,6 +34,8 @@ void Tegra::Host1x::ProcessMethod(Method method, const std::vector<u32>& argumen
|
||||||
}
|
}
|
||||||
|
|
||||||
void Tegra::Host1x::Execute(u32 data) {
|
void Tegra::Host1x::Execute(u32 data) {
|
||||||
// This method waits on a valid syncpoint.
|
u32 syncpointId = (data & 0xFF);
|
||||||
// TODO: Implement when proper Async is in place
|
u32 threshold = state.load_syncpoint_payload32;
|
||||||
|
|
||||||
|
gpu.WaitFence(syncpointId, threshold);
|
||||||
}
|
}
|
||||||
|
|
|
@ -38,7 +38,7 @@ u32 SyncptIncrManager::IncrementWhenDone(u32 class_id, u32 id) {
|
||||||
}
|
}
|
||||||
|
|
||||||
void SyncptIncrManager::SignalDone(u32 handle) {
|
void SyncptIncrManager::SignalDone(u32 handle) {
|
||||||
const auto done_incr =
|
const auto& done_incr =
|
||||||
std::find_if(increments.begin(), increments.end(),
|
std::find_if(increments.begin(), increments.end(),
|
||||||
[handle](const SyncptIncr& incr) { return incr.id == handle; });
|
[handle](const SyncptIncr& incr) { return incr.id == handle; });
|
||||||
if (done_incr != increments.cend()) {
|
if (done_incr != increments.cend()) {
|
||||||
|
|
Loading…
Reference in a new issue