From 723df0f3685f01ce5a0330d567932136a9de7a8f Mon Sep 17 00:00:00 2001 From: Liam Date: Wed, 25 Oct 2023 00:34:40 -0400 Subject: [PATCH] nvdrv: rework to remove memcpy --- .../nvdrv/devices/ioctl_serialization.h | 180 +++++++++++------- src/core/hle/service/nvdrv/devices/nvdevice.h | 12 -- .../service/nvdrv/devices/nvhost_as_gpu.cpp | 26 +-- .../hle/service/nvdrv/devices/nvhost_as_gpu.h | 2 +- .../hle/service/nvdrv/devices/nvhost_ctrl.cpp | 16 +- .../hle/service/nvdrv/devices/nvhost_ctrl.h | 11 +- .../service/nvdrv/devices/nvhost_ctrl_gpu.cpp | 41 ++-- .../service/nvdrv/devices/nvhost_ctrl_gpu.h | 5 +- .../hle/service/nvdrv/devices/nvhost_gpu.cpp | 52 ++--- .../hle/service/nvdrv/devices/nvhost_gpu.h | 6 +- .../service/nvdrv/devices/nvhost_nvdec.cpp | 14 +- .../nvdrv/devices/nvhost_nvdec_common.cpp | 71 +++---- .../nvdrv/devices/nvhost_nvdec_common.h | 6 +- .../service/nvdrv/devices/nvhost_nvjpg.cpp | 2 +- .../hle/service/nvdrv/devices/nvhost_vic.cpp | 12 +- src/core/hle/service/nvdrv/devices/nvmap.cpp | 12 +- 16 files changed, 243 insertions(+), 225 deletions(-) diff --git a/src/core/hle/service/nvdrv/devices/ioctl_serialization.h b/src/core/hle/service/nvdrv/devices/ioctl_serialization.h index c560974f1..b12bcd138 100644 --- a/src/core/hle/service/nvdrv/devices/ioctl_serialization.h +++ b/src/core/hle/service/nvdrv/devices/ioctl_serialization.h @@ -11,97 +11,149 @@ namespace Service::Nvidia::Devices { -struct Ioctl1Traits { - template - static T GetClassImpl(R (T::*)(A)); - - template - static A GetArgImpl(R (T::*)(A)); +struct IoctlOneArgTraits { + template + static A GetFirstArgImpl(R (T::*)(A, B...)); }; -struct Ioctl23Traits { - template - static T GetClassImpl(R (T::*)(A, B)); +struct IoctlTwoArgTraits { + template + static A GetFirstArgImpl(R (T::*)(A, B, C...)); - template - static A GetArgImpl(R (T::*)(A, B)); + template + static B GetSecondArgImpl(R (T::*)(A, B, C...)); }; -template -struct ContainerType { - using ValueType = T; -}; +struct Null {}; -template -struct ContainerType { - using ValueType = T::value_type; -}; +// clang-format off -template -NvResult Wrap(std::span input, std::span output, Self* self, F&& callable, - Rest&&... rest) { - using Arg = ContainerType::ValueType; - constexpr bool ArgumentIsContainer = Common::IsContiguousContainer; +template +NvResult WrapGeneric(F&& callable, std::span input, std::span inline_input, std::span output, std::span inline_output) { + constexpr bool HasFixedArg = !std::is_same_v; + constexpr bool HasVarArg = !std::is_same_v; + constexpr bool HasInlInVarArg = !std::is_same_v; + constexpr bool HasInlOutVarArg = !std::is_same_v; - // Verify that the input and output sizes are valid. - const size_t in_params = input.size() / sizeof(Arg); - const size_t out_params = output.size() / sizeof(Arg); - if (in_params * sizeof(Arg) != input.size()) { - return NvResult::InvalidSize; - } - if (out_params * sizeof(Arg) != output.size()) { - return NvResult::InvalidSize; - } - if (in_params == 0 && out_params == 0 && !ArgumentIsContainer) { - return NvResult::InvalidSize; + // Declare the fixed-size input value. + FixedArg fixed{}; + size_t var_offset = 0; + + if constexpr (HasFixedArg) { + // Read the fixed-size input value. + var_offset = std::min(sizeof(FixedArg), input.size()); + if (var_offset > 0) { + std::memcpy(&fixed, input.data(), var_offset); + } } - // Copy inputs, if needed. - std::vector params(std::max(in_params, out_params)); - if (in_params > 0) { - std::memcpy(params.data(), input.data(), input.size()); + // Read the variable-sized inputs. + const size_t num_var_args = HasVarArg ? ((input.size() - var_offset) / sizeof(VarArg)) : 0; + std::vector var_args(num_var_args); + if constexpr (HasVarArg) { + if (num_var_args > 0) { + std::memcpy(var_args.data(), input.data() + var_offset, num_var_args * sizeof(VarArg)); + } } + const size_t num_inl_in_var_args = HasInlInVarArg ? (inline_input.size() / sizeof(InlInVarArg)) : 0; + std::vector inl_in_var_args(num_inl_in_var_args); + if constexpr (HasInlInVarArg) { + if (num_inl_in_var_args > 0) { + std::memcpy(inl_in_var_args.data(), inline_input.data(), num_inl_in_var_args * sizeof(InlInVarArg)); + } + } + + // Construct inline output data. + const size_t num_inl_out_var_args = HasInlOutVarArg ? (inline_output.size() / sizeof(InlOutVarArg)) : 0; + std::vector inl_out_var_args(num_inl_out_var_args); + // Perform the call. - NvResult result; - if constexpr (ArgumentIsContainer) { - result = (self->*callable)(params, std::forward(rest)...); - } else { - result = (self->*callable)(params.front(), std::forward(rest)...); + NvResult result = callable(fixed, var_args, inl_in_var_args, inl_out_var_args); + + // Copy outputs. + if constexpr (HasFixedArg) { + if (output.size() > 0) { + std::memcpy(output.data(), &fixed, std::min(output.size(), sizeof(FixedArg))); + } } - // Copy outputs, if needed. - if (out_params > 0) { - std::memcpy(output.data(), params.data(), output.size()); + if constexpr (HasVarArg) { + if (num_var_args > 0 && output.size() > var_offset) { + const size_t max_var_size = output.size() - var_offset; + std::memcpy(output.data() + var_offset, var_args.data(), std::min(max_var_size, num_var_args * sizeof(VarArg))); + } } + // Copy inline outputs. + if constexpr (HasInlOutVarArg) { + if (num_inl_out_var_args > 0) { + std::memcpy(inline_output.data(), inl_out_var_args.data(), num_inl_out_var_args * sizeof(InlOutVarArg)); + } + } + + // We're done. return result; } -template -NvResult nvdevice::Wrap1(F&& callable, std::span input, std::span output) { - using Self = decltype(Ioctl1Traits::GetClassImpl(callable)); - using InnerArg = std::remove_reference_t; +template +NvResult WrapFixed(Self* self, F&& callable, std::span input, std::span output, Rest&&... rest) { + using FixedArg = typename std::remove_reference_t; - return Wrap(input, output, static_cast(this), callable); + const auto Callable = [&](auto& fixed, auto& var, auto& inl_in, auto& inl_out) -> NvResult { + return (self->*callable)(fixed, std::forward(rest)...); + }; + + return WrapGeneric(std::move(Callable), input, {}, output, {}); } -template -NvResult nvdevice::Wrap2(F&& callable, std::span input, std::span inline_input, - std::span output) { - using Self = decltype(Ioctl23Traits::GetClassImpl(callable)); - using InnerArg = std::remove_reference_t; +template +NvResult WrapFixedInlOut(Self* self, F&& callable, std::span input, std::span output, std::span inline_output, Rest&&... rest) { + using FixedArg = typename std::remove_reference_t; + using InlOutVarArg = typename std::remove_reference_t::value_type; - return Wrap(input, output, static_cast(this), callable, inline_input); + const auto Callable = [&](auto& fixed, auto& var, auto& inl_in, auto& inl_out) -> NvResult { + return (self->*callable)(fixed, inl_out, std::forward(rest)...); + }; + + return WrapGeneric(std::move(Callable), input, {}, output, inline_output); } -template -NvResult nvdevice::Wrap3(F&& callable, std::span input, std::span output, - std::span inline_output) { - using Self = decltype(Ioctl23Traits::GetClassImpl(callable)); - using InnerArg = std::remove_reference_t; +template +NvResult WrapVariable(Self* self, F&& callable, std::span input, std::span output, Rest&&... rest) { + using VarArg = typename std::remove_reference_t::value_type; - return Wrap(input, output, static_cast(this), callable, inline_output); + const auto Callable = [&](auto& fixed, auto& var, auto& inl_in, auto& inl_out) -> NvResult { + return (self->*callable)(var, std::forward(rest)...); + }; + + return WrapGeneric(std::move(Callable), input, {}, output, {}); } +template +NvResult WrapFixedVariable(Self* self, F&& callable, std::span input, std::span output, Rest&&... rest) { + using FixedArg = typename std::remove_reference_t; + using VarArg = typename std::remove_reference_t::value_type; + + const auto Callable = [&](auto& fixed, auto& var, auto& inl_in, auto& inl_out) -> NvResult { + return (self->*callable)(fixed, var, std::forward(rest)...); + }; + + return WrapGeneric(std::move(Callable), input, {}, output, {}); +} + +template +NvResult WrapFixedInlIn(Self* self, F&& callable, std::span input, std::span inline_input, std::span output, Rest&&... rest) { + using FixedArg = typename std::remove_reference_t; + using InlInVarArg = typename std::remove_reference_t::value_type; + + const auto Callable = [&](auto& fixed, auto& var, auto& inl_in, auto& inl_out) -> NvResult { + return (self->*callable)(fixed, inl_in, std::forward(rest)...); + }; + + return WrapGeneric(std::move(Callable), input, inline_input, output, {}); +} + +// clang-format on + } // namespace Service::Nvidia::Devices diff --git a/src/core/hle/service/nvdrv/devices/nvdevice.h b/src/core/hle/service/nvdrv/devices/nvdevice.h index af766f320..a04538d5d 100644 --- a/src/core/hle/service/nvdrv/devices/nvdevice.h +++ b/src/core/hle/service/nvdrv/devices/nvdevice.h @@ -74,18 +74,6 @@ public: return nullptr; } -protected: - template - NvResult Wrap1(F&& callable, std::span input, std::span output); - - template - NvResult Wrap2(F&& callable, std::span input, std::span inline_input, - std::span output); - - template - NvResult Wrap3(F&& callable, std::span input, std::span output, - std::span inline_output); - protected: Core::System& system; }; diff --git a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp index 484001071..6b3639008 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp +++ b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp @@ -34,21 +34,21 @@ NvResult nvhost_as_gpu::Ioctl1(DeviceFD fd, Ioctl command, std::span i case 'A': switch (command.cmd) { case 0x1: - return Wrap1(&nvhost_as_gpu::BindChannel, input, output); + return WrapFixed(this, &nvhost_as_gpu::BindChannel, input, output); case 0x2: - return Wrap1(&nvhost_as_gpu::AllocateSpace, input, output); + return WrapFixed(this, &nvhost_as_gpu::AllocateSpace, input, output); case 0x3: - return Wrap1(&nvhost_as_gpu::FreeSpace, input, output); + return WrapFixed(this, &nvhost_as_gpu::FreeSpace, input, output); case 0x5: - return Wrap1(&nvhost_as_gpu::UnmapBuffer, input, output); + return WrapFixed(this, &nvhost_as_gpu::UnmapBuffer, input, output); case 0x6: - return Wrap1(&nvhost_as_gpu::MapBufferEx, input, output); + return WrapFixed(this, &nvhost_as_gpu::MapBufferEx, input, output); case 0x8: - return Wrap1(&nvhost_as_gpu::GetVARegions1, input, output); + return WrapFixed(this, &nvhost_as_gpu::GetVARegions1, input, output); case 0x9: - return Wrap1(&nvhost_as_gpu::AllocAsEx, input, output); + return WrapFixed(this, &nvhost_as_gpu::AllocAsEx, input, output); case 0x14: - return Wrap1(&nvhost_as_gpu::Remap, input, output); + return WrapVariable(this, &nvhost_as_gpu::Remap, input, output); default: break; } @@ -73,7 +73,8 @@ NvResult nvhost_as_gpu::Ioctl3(DeviceFD fd, Ioctl command, std::span i case 'A': switch (command.cmd) { case 0x8: - return Wrap3(&nvhost_as_gpu::GetVARegions3, input, output, inline_output); + return WrapFixedInlOut(this, &nvhost_as_gpu::GetVARegions3, input, output, + inline_output); default: break; } @@ -482,7 +483,7 @@ NvResult nvhost_as_gpu::GetVARegions1(IoctlGetVaRegions& params) { return NvResult::Success; } -NvResult nvhost_as_gpu::GetVARegions3(IoctlGetVaRegions& params, std::span inline_output) { +NvResult nvhost_as_gpu::GetVARegions3(IoctlGetVaRegions& params, std::span regions) { LOG_DEBUG(Service_NVDRV, "called, buf_addr={:X}, buf_size={:X}", params.buf_addr, params.buf_size); @@ -494,7 +495,10 @@ NvResult nvhost_as_gpu::GetVARegions3(IoctlGetVaRegions& params, std::span i GetVARegionsImpl(params); - std::memcpy(inline_output.data(), params.regions.data(), 2 * sizeof(VaRegion)); + const size_t num_regions = std::min(params.regions.size(), regions.size()); + for (size_t i = 0; i < num_regions; i++) { + regions[i] = params.regions[i]; + } return NvResult::Success; } diff --git a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.h b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.h index bc041f215..932997e75 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.h +++ b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.h @@ -149,7 +149,7 @@ private: void GetVARegionsImpl(IoctlGetVaRegions& params); NvResult GetVARegions1(IoctlGetVaRegions& params); - NvResult GetVARegions3(IoctlGetVaRegions& params, std::span inline_output); + NvResult GetVARegions3(IoctlGetVaRegions& params, std::span regions); void FreeMappingLocked(u64 offset); diff --git a/src/core/hle/service/nvdrv/devices/nvhost_ctrl.cpp b/src/core/hle/service/nvdrv/devices/nvhost_ctrl.cpp index 8cefff6d1..b8dd34e24 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_ctrl.cpp +++ b/src/core/hle/service/nvdrv/devices/nvhost_ctrl.cpp @@ -41,19 +41,19 @@ NvResult nvhost_ctrl::Ioctl1(DeviceFD fd, Ioctl command, std::span inp case 0x0: switch (command.cmd) { case 0x1b: - return Wrap1(&nvhost_ctrl::NvOsGetConfigU32, input, output); + return WrapFixed(this, &nvhost_ctrl::NvOsGetConfigU32, input, output); case 0x1c: - return Wrap1(&nvhost_ctrl::IocCtrlClearEventWait, input, output); + return WrapFixed(this, &nvhost_ctrl::IocCtrlClearEventWait, input, output); case 0x1d: - return Wrap1(&nvhost_ctrl::IocCtrlEventWaitWithAllocation, input, output); + return WrapFixed(this, &nvhost_ctrl::IocCtrlEventWait, input, output, true); case 0x1e: - return Wrap1(&nvhost_ctrl::IocCtrlEventWaitNotAllocation, input, output); + return WrapFixed(this, &nvhost_ctrl::IocCtrlEventWait, input, output, false); case 0x1f: - return Wrap1(&nvhost_ctrl::IocCtrlEventRegister, input, output); + return WrapFixed(this, &nvhost_ctrl::IocCtrlEventRegister, input, output); case 0x20: - return Wrap1(&nvhost_ctrl::IocCtrlEventUnregister, input, output); + return WrapFixed(this, &nvhost_ctrl::IocCtrlEventUnregister, input, output); case 0x21: - return Wrap1(&nvhost_ctrl::IocCtrlEventUnregisterBatch, input, output); + return WrapFixed(this, &nvhost_ctrl::IocCtrlEventUnregisterBatch, input, output); } break; default: @@ -86,7 +86,7 @@ NvResult nvhost_ctrl::NvOsGetConfigU32(IocGetConfigParams& params) { return NvResult::ConfigVarNotFound; // Returns error on production mode } -NvResult nvhost_ctrl::IocCtrlEventWaitImpl(IocCtrlEventWaitParams& params, bool is_allocation) { +NvResult nvhost_ctrl::IocCtrlEventWait(IocCtrlEventWaitParams& params, bool is_allocation) { LOG_DEBUG(Service_NVDRV, "syncpt_id={}, threshold={}, timeout={}, is_allocation={}", params.fence.id, params.fence.value, params.timeout, is_allocation); diff --git a/src/core/hle/service/nvdrv/devices/nvhost_ctrl.h b/src/core/hle/service/nvdrv/devices/nvhost_ctrl.h index 6913c61ac..992124b60 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_ctrl.h +++ b/src/core/hle/service/nvdrv/devices/nvhost_ctrl.h @@ -190,20 +190,11 @@ private: NvResult IocCtrlEventRegister(IocCtrlEventRegisterParams& params); NvResult IocCtrlEventUnregister(IocCtrlEventUnregisterParams& params); NvResult IocCtrlEventUnregisterBatch(IocCtrlEventUnregisterBatchParams& params); + NvResult IocCtrlEventWait(IocCtrlEventWaitParams& params, bool is_allocation); NvResult IocCtrlClearEventWait(IocCtrlEventClearParams& params); NvResult FreeEvent(u32 slot); - // TODO: these are not the correct names - NvResult IocCtrlEventWaitNotAllocation(IocCtrlEventWaitParams& params) { - return this->IocCtrlEventWaitImpl(params, false); - } - NvResult IocCtrlEventWaitWithAllocation(IocCtrlEventWaitParams& params) { - return this->IocCtrlEventWaitImpl(params, true); - } - - NvResult IocCtrlEventWaitImpl(IocCtrlEventWaitParams& params, bool is_allocation); - EventInterface& events_interface; NvCore::Container& core; NvCore::SyncpointManager& syncpoint_manager; diff --git a/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.cpp b/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.cpp index 92e677b3d..61a2df121 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.cpp +++ b/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.cpp @@ -28,23 +28,23 @@ NvResult nvhost_ctrl_gpu::Ioctl1(DeviceFD fd, Ioctl command, std::span case 'G': switch (command.cmd) { case 0x1: - return Wrap1(&nvhost_ctrl_gpu::ZCullGetCtxSize, input, output); + return WrapFixed(this, &nvhost_ctrl_gpu::ZCullGetCtxSize, input, output); case 0x2: - return Wrap1(&nvhost_ctrl_gpu::ZCullGetInfo, input, output); + return WrapFixed(this, &nvhost_ctrl_gpu::ZCullGetInfo, input, output); case 0x3: - return Wrap1(&nvhost_ctrl_gpu::ZBCSetTable, input, output); + return WrapFixed(this, &nvhost_ctrl_gpu::ZBCSetTable, input, output); case 0x4: - return Wrap1(&nvhost_ctrl_gpu::ZBCQueryTable, input, output); + return WrapFixed(this, &nvhost_ctrl_gpu::ZBCQueryTable, input, output); case 0x5: - return Wrap1(&nvhost_ctrl_gpu::GetCharacteristics1, input, output); + return WrapFixed(this, &nvhost_ctrl_gpu::GetCharacteristics1, input, output); case 0x6: - return Wrap1(&nvhost_ctrl_gpu::GetTPCMasks1, input, output); + return WrapFixed(this, &nvhost_ctrl_gpu::GetTPCMasks1, input, output); case 0x7: - return Wrap1(&nvhost_ctrl_gpu::FlushL2, input, output); + return WrapFixed(this, &nvhost_ctrl_gpu::FlushL2, input, output); case 0x14: - return Wrap1(&nvhost_ctrl_gpu::GetActiveSlotMask, input, output); + return WrapFixed(this, &nvhost_ctrl_gpu::GetActiveSlotMask, input, output); case 0x1c: - return Wrap1(&nvhost_ctrl_gpu::GetGpuTime, input, output); + return WrapFixed(this, &nvhost_ctrl_gpu::GetGpuTime, input, output); default: break; } @@ -66,9 +66,11 @@ NvResult nvhost_ctrl_gpu::Ioctl3(DeviceFD fd, Ioctl command, std::span case 'G': switch (command.cmd) { case 0x5: - return Wrap3(&nvhost_ctrl_gpu::GetCharacteristics3, input, output, inline_output); + return WrapFixedInlOut(this, &nvhost_ctrl_gpu::GetCharacteristics3, input, output, + inline_output); case 0x6: - return Wrap3(&nvhost_ctrl_gpu::GetTPCMasks3, input, output, inline_output); + return WrapFixedInlOut(this, &nvhost_ctrl_gpu::GetTPCMasks3, input, output, + inline_output); default: break; } @@ -125,8 +127,8 @@ NvResult nvhost_ctrl_gpu::GetCharacteristics1(IoctlCharacteristics& params) { return NvResult::Success; } -NvResult nvhost_ctrl_gpu::GetCharacteristics3(IoctlCharacteristics& params, - std::span inline_output) { +NvResult nvhost_ctrl_gpu::GetCharacteristics3( + IoctlCharacteristics& params, std::span gpu_characteristics) { LOG_DEBUG(Service_NVDRV, "called"); params.gc.arch = 0x120; @@ -166,8 +168,9 @@ NvResult nvhost_ctrl_gpu::GetCharacteristics3(IoctlCharacteristics& params, params.gc.gr_compbit_store_base_hw = 0x0; params.gpu_characteristics_buf_size = 0xA0; params.gpu_characteristics_buf_addr = 0xdeadbeef; // Cannot be 0 (UNUSED) - std::memcpy(inline_output.data(), ¶ms.gc, - std::min(sizeof(params.gc), inline_output.size())); + if (!gpu_characteristics.empty()) { + gpu_characteristics.front() = params.gc; + } return NvResult::Success; } @@ -179,14 +182,14 @@ NvResult nvhost_ctrl_gpu::GetTPCMasks1(IoctlGpuGetTpcMasksArgs& params) { return NvResult::Success; } -NvResult nvhost_ctrl_gpu::GetTPCMasks3(IoctlGpuGetTpcMasksArgs& params, - std::span inline_output) { +NvResult nvhost_ctrl_gpu::GetTPCMasks3(IoctlGpuGetTpcMasksArgs& params, std::span tpc_mask) { LOG_DEBUG(Service_NVDRV, "called, mask_buffer_size=0x{:X}", params.mask_buffer_size); if (params.mask_buffer_size != 0) { params.tcp_mask = 3; } - std::memcpy(inline_output.data(), ¶ms.tcp_mask, - std::min(sizeof(params.tcp_mask), inline_output.size())); + if (!tpc_mask.empty()) { + tpc_mask.front() = params.tcp_mask; + } return NvResult::Success; } diff --git a/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.h b/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.h index e1977a6b5..d170299bd 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.h +++ b/src/core/hle/service/nvdrv/devices/nvhost_ctrl_gpu.h @@ -152,10 +152,11 @@ private: static_assert(sizeof(IoctlGetGpuTime) == 0x10, "IoctlGetGpuTime is incorrect size"); NvResult GetCharacteristics1(IoctlCharacteristics& params); - NvResult GetCharacteristics3(IoctlCharacteristics& params, std::span inline_output); + NvResult GetCharacteristics3(IoctlCharacteristics& params, + std::span gpu_characteristics); NvResult GetTPCMasks1(IoctlGpuGetTpcMasksArgs& params); - NvResult GetTPCMasks3(IoctlGpuGetTpcMasksArgs& params, std::span inline_output); + NvResult GetTPCMasks3(IoctlGpuGetTpcMasksArgs& params, std::span tpc_mask); NvResult GetActiveSlotMask(IoctlActiveSlotMask& params); NvResult ZCullGetCtxSize(IoctlZcullGetCtxSize& params); diff --git a/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp b/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp index 2d67acc6a..b0395c2f0 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp +++ b/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp @@ -53,7 +53,7 @@ NvResult nvhost_gpu::Ioctl1(DeviceFD fd, Ioctl command, std::span inpu case 0x0: switch (command.cmd) { case 0x3: - return Wrap1(&nvhost_gpu::GetWaitbase, input, output); + return WrapFixed(this, &nvhost_gpu::GetWaitbase, input, output); default: break; } @@ -61,25 +61,25 @@ NvResult nvhost_gpu::Ioctl1(DeviceFD fd, Ioctl command, std::span inpu case 'H': switch (command.cmd) { case 0x1: - return Wrap1(&nvhost_gpu::SetNVMAPfd, input, output); + return WrapFixed(this, &nvhost_gpu::SetNVMAPfd, input, output); case 0x3: - return Wrap1(&nvhost_gpu::ChannelSetTimeout, input, output); + return WrapFixed(this, &nvhost_gpu::ChannelSetTimeout, input, output); case 0x8: - return SubmitGPFIFOBase1(input, false); + return WrapFixedVariable(this, &nvhost_gpu::SubmitGPFIFOBase1, input, output, false); case 0x9: - return Wrap1(&nvhost_gpu::AllocateObjectContext, input, output); + return WrapFixed(this, &nvhost_gpu::AllocateObjectContext, input, output); case 0xb: - return Wrap1(&nvhost_gpu::ZCullBind, input, output); + return WrapFixed(this, &nvhost_gpu::ZCullBind, input, output); case 0xc: - return Wrap1(&nvhost_gpu::SetErrorNotifier, input, output); + return WrapFixed(this, &nvhost_gpu::SetErrorNotifier, input, output); case 0xd: - return Wrap1(&nvhost_gpu::SetChannelPriority, input, output); + return WrapFixed(this, &nvhost_gpu::SetChannelPriority, input, output); case 0x1a: - return Wrap1(&nvhost_gpu::AllocGPFIFOEx2, input, output); + return WrapFixed(this, &nvhost_gpu::AllocGPFIFOEx2, input, output); case 0x1b: - return SubmitGPFIFOBase1(input, true); + return WrapFixedVariable(this, &nvhost_gpu::SubmitGPFIFOBase1, input, output, true); case 0x1d: - return Wrap1(&nvhost_gpu::ChannelSetTimeslice, input, output); + return WrapFixed(this, &nvhost_gpu::ChannelSetTimeslice, input, output); default: break; } @@ -87,9 +87,9 @@ NvResult nvhost_gpu::Ioctl1(DeviceFD fd, Ioctl command, std::span inpu case 'G': switch (command.cmd) { case 0x14: - return Wrap1(&nvhost_gpu::SetClientData, input, output); + return WrapFixed(this, &nvhost_gpu::SetClientData, input, output); case 0x15: - return Wrap1(&nvhost_gpu::GetClientData, input, output); + return WrapFixed(this, &nvhost_gpu::GetClientData, input, output); default: break; } @@ -105,7 +105,8 @@ NvResult nvhost_gpu::Ioctl2(DeviceFD fd, Ioctl command, std::span inpu case 'H': switch (command.cmd) { case 0x1b: - return SubmitGPFIFOBase2(input, inline_input); + return WrapFixedInlIn(this, &nvhost_gpu::SubmitGPFIFOBase2, input, inline_input, + output); } break; } @@ -271,36 +272,35 @@ NvResult nvhost_gpu::SubmitGPFIFOImpl(IoctlSubmitGpfifo& params, Tegra::CommandL return NvResult::Success; } -NvResult nvhost_gpu::SubmitGPFIFOBase1(std::span input, bool kickoff) { - if (input.size() < sizeof(IoctlSubmitGpfifo)) { +NvResult nvhost_gpu::SubmitGPFIFOBase1(IoctlSubmitGpfifo& params, + std::span commands, bool kickoff) { + if (params.num_entries > commands.size()) { UNIMPLEMENTED(); return NvResult::InvalidSize; } - IoctlSubmitGpfifo params{}; - std::memcpy(¶ms, input.data(), sizeof(IoctlSubmitGpfifo)); - Tegra::CommandList entries(params.num_entries); + Tegra::CommandList entries(params.num_entries); if (kickoff) { system.ApplicationMemory().ReadBlock(params.address, entries.command_lists.data(), params.num_entries * sizeof(Tegra::CommandListHeader)); } else { - std::memcpy(entries.command_lists.data(), &input[sizeof(IoctlSubmitGpfifo)], + std::memcpy(entries.command_lists.data(), commands.data(), params.num_entries * sizeof(Tegra::CommandListHeader)); } return SubmitGPFIFOImpl(params, std::move(entries)); } -NvResult nvhost_gpu::SubmitGPFIFOBase2(std::span input, - std::span input_inline) { - if (input.size() < sizeof(IoctlSubmitGpfifo)) { +NvResult nvhost_gpu::SubmitGPFIFOBase2(IoctlSubmitGpfifo& params, + std::span commands) { + if (params.num_entries > commands.size()) { UNIMPLEMENTED(); return NvResult::InvalidSize; } - IoctlSubmitGpfifo params{}; - std::memcpy(¶ms, input.data(), sizeof(IoctlSubmitGpfifo)); + Tegra::CommandList entries(params.num_entries); - std::memcpy(entries.command_lists.data(), input_inline.data(), input_inline.size()); + std::memcpy(entries.command_lists.data(), commands.data(), + params.num_entries * sizeof(Tegra::CommandListHeader)); return SubmitGPFIFOImpl(params, std::move(entries)); } diff --git a/src/core/hle/service/nvdrv/devices/nvhost_gpu.h b/src/core/hle/service/nvdrv/devices/nvhost_gpu.h index 703079a54..88fd228ff 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_gpu.h +++ b/src/core/hle/service/nvdrv/devices/nvhost_gpu.h @@ -196,8 +196,10 @@ private: NvResult AllocateObjectContext(IoctlAllocObjCtx& params); NvResult SubmitGPFIFOImpl(IoctlSubmitGpfifo& params, Tegra::CommandList&& entries); - NvResult SubmitGPFIFOBase1(std::span input, bool kickoff = false); - NvResult SubmitGPFIFOBase2(std::span input, std::span input_inline); + NvResult SubmitGPFIFOBase1(IoctlSubmitGpfifo& params, + std::span commands, bool kickoff = false); + NvResult SubmitGPFIFOBase2(IoctlSubmitGpfifo& params, + std::span commands); NvResult GetWaitbase(IoctlGetWaitbase& params); NvResult ChannelSetTimeout(IoctlChannelSetTimeout& params); diff --git a/src/core/hle/service/nvdrv/devices/nvhost_nvdec.cpp b/src/core/hle/service/nvdrv/devices/nvhost_nvdec.cpp index 74790a7d8..f43914e1b 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_nvdec.cpp +++ b/src/core/hle/service/nvdrv/devices/nvhost_nvdec.cpp @@ -26,18 +26,18 @@ NvResult nvhost_nvdec::Ioctl1(DeviceFD fd, Ioctl command, std::span in if (!host1x_file.fd_to_id.contains(fd)) { host1x_file.fd_to_id[fd] = host1x_file.nvdec_next_id++; } - return Submit(fd, input, output); + return WrapFixedVariable(this, &nvhost_nvdec::Submit, input, output, fd); } case 0x2: - return Wrap1(&nvhost_nvdec::GetSyncpoint, input, output); + return WrapFixed(this, &nvhost_nvdec::GetSyncpoint, input, output); case 0x3: - return Wrap1(&nvhost_nvdec::GetWaitbase, input, output); + return WrapFixed(this, &nvhost_nvdec::GetWaitbase, input, output); case 0x7: - return Wrap1(&nvhost_nvdec::SetSubmitTimeout, input, output); + return WrapFixed(this, &nvhost_nvdec::SetSubmitTimeout, input, output); case 0x9: - return MapBuffer(input, output); + return WrapFixedVariable(this, &nvhost_nvdec::MapBuffer, input, output); case 0xa: - return UnmapBuffer(input, output); + return WrapFixedVariable(this, &nvhost_nvdec::UnmapBuffer, input, output); default: break; } @@ -45,7 +45,7 @@ NvResult nvhost_nvdec::Ioctl1(DeviceFD fd, Ioctl command, std::span in case 'H': switch (command.cmd) { case 0x1: - return Wrap1(&nvhost_nvdec::SetNVMAPfd, input, output); + return WrapFixed(this, &nvhost_nvdec::SetNVMAPfd, input, output); default: break; } diff --git a/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.cpp b/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.cpp index 3fdf383f0..74c701b95 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.cpp +++ b/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.cpp @@ -76,13 +76,7 @@ NvResult nvhost_nvdec_common::SetNVMAPfd(IoctlSetNvmapFD& params) { return NvResult::Success; } -NvResult nvhost_nvdec_common::Submit(DeviceFD fd, std::span input, std::span output) { - if (input.size() < sizeof(IoctlSubmit) || output.size() < sizeof(IoctlSubmit)) { - UNIMPLEMENTED(); - return NvResult::InvalidSize; - } - IoctlSubmit params{}; - std::memcpy(¶ms, input.data(), std::min(input.size(), sizeof(IoctlSubmit))); +NvResult nvhost_nvdec_common::Submit(IoctlSubmit& params, std::span data, DeviceFD fd) { LOG_DEBUG(Service_NVDRV, "called NVDEC Submit, cmd_buffer_count={}", params.cmd_buffer_count); // Instantiate param buffers @@ -93,12 +87,12 @@ NvResult nvhost_nvdec_common::Submit(DeviceFD fd, std::span input, std std::vector fence_thresholds(params.fence_count); // Slice input into their respective buffers - std::size_t offset = sizeof(IoctlSubmit); - offset += SliceVectors(input, command_buffers, params.cmd_buffer_count, offset); - offset += SliceVectors(input, relocs, params.relocation_count, offset); - offset += SliceVectors(input, reloc_shifts, params.relocation_count, offset); - offset += SliceVectors(input, syncpt_increments, params.syncpoint_count, offset); - offset += SliceVectors(input, fence_thresholds, params.fence_count, offset); + std::size_t offset = 0; + offset += SliceVectors(data, command_buffers, params.cmd_buffer_count, offset); + offset += SliceVectors(data, relocs, params.relocation_count, offset); + offset += SliceVectors(data, reloc_shifts, params.relocation_count, offset); + offset += SliceVectors(data, syncpt_increments, params.syncpoint_count, offset); + offset += SliceVectors(data, fence_thresholds, params.fence_count, offset); auto& gpu = system.GPU(); if (gpu.UseNvdec()) { @@ -116,14 +110,13 @@ NvResult nvhost_nvdec_common::Submit(DeviceFD fd, std::span input, std cmdlist.size() * sizeof(u32)); gpu.PushCommandBuffer(core.Host1xDeviceFile().fd_to_id[fd], cmdlist); } - std::memcpy(output.data(), ¶ms, sizeof(IoctlSubmit)); // Some games expect command_buffers to be written back - offset = sizeof(IoctlSubmit); - offset += WriteVectors(output, command_buffers, offset); - offset += WriteVectors(output, relocs, offset); - offset += WriteVectors(output, reloc_shifts, offset); - offset += WriteVectors(output, syncpt_increments, offset); - offset += WriteVectors(output, fence_thresholds, offset); + offset = 0; + offset += WriteVectors(data, command_buffers, offset); + offset += WriteVectors(data, relocs, offset); + offset += WriteVectors(data, reloc_shifts, offset); + offset += WriteVectors(data, syncpt_increments, offset); + offset += WriteVectors(data, fence_thresholds, offset); return NvResult::Success; } @@ -140,40 +133,24 @@ NvResult nvhost_nvdec_common::GetWaitbase(IoctlGetWaitbase& params) { return NvResult::Success; } -NvResult nvhost_nvdec_common::MapBuffer(std::span input, std::span output) { - IoctlMapBuffer params{}; - std::memcpy(¶ms, input.data(), sizeof(IoctlMapBuffer)); - std::vector cmd_buffer_handles(params.num_entries); - - SliceVectors(input, cmd_buffer_handles, params.num_entries, sizeof(IoctlMapBuffer)); - - for (auto& cmd_buffer : cmd_buffer_handles) { - cmd_buffer.map_address = nvmap.PinHandle(cmd_buffer.map_handle); +NvResult nvhost_nvdec_common::MapBuffer(IoctlMapBuffer& params, std::span entries) { + const size_t num_entries = std::min(params.num_entries, static_cast(entries.size())); + for (size_t i = 0; i < num_entries; i++) { + entries[i].map_address = nvmap.PinHandle(entries[i].map_handle); } - if (output.size() < - sizeof(IoctlMapBuffer) + cmd_buffer_handles.size() * sizeof(MapBufferEntry)) { - return NvResult::InvalidSize; - } - - std::memcpy(output.data(), ¶ms, sizeof(IoctlMapBuffer)); - std::memcpy(output.data() + sizeof(IoctlMapBuffer), cmd_buffer_handles.data(), - cmd_buffer_handles.size() * sizeof(MapBufferEntry)); - return NvResult::Success; } -NvResult nvhost_nvdec_common::UnmapBuffer(std::span input, std::span output) { - IoctlMapBuffer params{}; - std::memcpy(¶ms, input.data(), std::min(input.size(), sizeof(IoctlMapBuffer))); - std::vector cmd_buffer_handles(params.num_entries); - - SliceVectors(input, cmd_buffer_handles, params.num_entries, sizeof(IoctlMapBuffer)); - for (auto& cmd_buffer : cmd_buffer_handles) { - nvmap.UnpinHandle(cmd_buffer.map_handle); +NvResult nvhost_nvdec_common::UnmapBuffer(IoctlMapBuffer& params, + std::span entries) { + const size_t num_entries = std::min(params.num_entries, static_cast(entries.size())); + for (size_t i = 0; i < num_entries; i++) { + nvmap.UnpinHandle(entries[i].map_handle); + entries[i] = {}; } - std::memset(output.data(), 0, output.size()); + params = {}; return NvResult::Success; } diff --git a/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.h b/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.h index cc988b897..7ce748e18 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.h +++ b/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.h @@ -108,11 +108,11 @@ protected: /// Ioctl command implementations NvResult SetNVMAPfd(IoctlSetNvmapFD&); - NvResult Submit(DeviceFD fd, std::span input, std::span output); + NvResult Submit(IoctlSubmit& params, std::span input, DeviceFD fd); NvResult GetSyncpoint(IoctlGetSyncpoint& params); NvResult GetWaitbase(IoctlGetWaitbase& params); - NvResult MapBuffer(std::span input, std::span output); - NvResult UnmapBuffer(std::span input, std::span output); + NvResult MapBuffer(IoctlMapBuffer& params, std::span entries); + NvResult UnmapBuffer(IoctlMapBuffer& params, std::span entries); NvResult SetSubmitTimeout(u32 timeout); Kernel::KEvent* QueryEvent(u32 event_id) override; diff --git a/src/core/hle/service/nvdrv/devices/nvhost_nvjpg.cpp b/src/core/hle/service/nvdrv/devices/nvhost_nvjpg.cpp index 23a57c4d5..9e6b86458 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_nvjpg.cpp +++ b/src/core/hle/service/nvdrv/devices/nvhost_nvjpg.cpp @@ -19,7 +19,7 @@ NvResult nvhost_nvjpg::Ioctl1(DeviceFD fd, Ioctl command, std::span in case 'H': switch (command.cmd) { case 0x1: - return Wrap1(&nvhost_nvjpg::SetNVMAPfd, input, output); + return WrapFixed(this, &nvhost_nvjpg::SetNVMAPfd, input, output); default: break; } diff --git a/src/core/hle/service/nvdrv/devices/nvhost_vic.cpp b/src/core/hle/service/nvdrv/devices/nvhost_vic.cpp index 20af75872..87f8d7c22 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_vic.cpp +++ b/src/core/hle/service/nvdrv/devices/nvhost_vic.cpp @@ -26,16 +26,16 @@ NvResult nvhost_vic::Ioctl1(DeviceFD fd, Ioctl command, std::span inpu if (!host1x_file.fd_to_id.contains(fd)) { host1x_file.fd_to_id[fd] = host1x_file.vic_next_id++; } - return Submit(fd, input, output); + return WrapFixedVariable(this, &nvhost_vic::Submit, input, output, fd); } case 0x2: - return Wrap1(&nvhost_vic::GetSyncpoint, input, output); + return WrapFixed(this, &nvhost_vic::GetSyncpoint, input, output); case 0x3: - return Wrap1(&nvhost_vic::GetWaitbase, input, output); + return WrapFixed(this, &nvhost_vic::GetWaitbase, input, output); case 0x9: - return MapBuffer(input, output); + return WrapFixedVariable(this, &nvhost_vic::MapBuffer, input, output); case 0xa: - return UnmapBuffer(input, output); + return WrapFixedVariable(this, &nvhost_vic::UnmapBuffer, input, output); default: break; } @@ -43,7 +43,7 @@ NvResult nvhost_vic::Ioctl1(DeviceFD fd, Ioctl command, std::span inpu case 'H': switch (command.cmd) { case 0x1: - return Wrap1(&nvhost_vic::SetNVMAPfd, input, output); + return WrapFixed(this, &nvhost_vic::SetNVMAPfd, input, output); default: break; } diff --git a/src/core/hle/service/nvdrv/devices/nvmap.cpp b/src/core/hle/service/nvdrv/devices/nvmap.cpp index 94286e295..71b2e62ec 100644 --- a/src/core/hle/service/nvdrv/devices/nvmap.cpp +++ b/src/core/hle/service/nvdrv/devices/nvmap.cpp @@ -32,17 +32,17 @@ NvResult nvmap::Ioctl1(DeviceFD fd, Ioctl command, std::span input, case 0x1: switch (command.cmd) { case 0x1: - return Wrap1(&nvmap::IocCreate, input, output); + return WrapFixed(this, &nvmap::IocCreate, input, output); case 0x3: - return Wrap1(&nvmap::IocFromId, input, output); + return WrapFixed(this, &nvmap::IocFromId, input, output); case 0x4: - return Wrap1(&nvmap::IocAlloc, input, output); + return WrapFixed(this, &nvmap::IocAlloc, input, output); case 0x5: - return Wrap1(&nvmap::IocFree, input, output); + return WrapFixed(this, &nvmap::IocFree, input, output); case 0x9: - return Wrap1(&nvmap::IocParam, input, output); + return WrapFixed(this, &nvmap::IocParam, input, output); case 0xe: - return Wrap1(&nvmap::IocGetId, input, output); + return WrapFixed(this, &nvmap::IocGetId, input, output); default: break; }