From a28afca771f083596981405b2e80f89342146402 Mon Sep 17 00:00:00 2001 From: pineappleEA Date: Fri, 3 Dec 2021 04:35:20 +0100 Subject: [PATCH] early-access version 2263 --- README.md | 2 +- src/common/x64/native_clock.cpp | 6 +-- src/core/hle/service/am/am.cpp | 14 +++++- src/core/hle/service/am/am.h | 1 + src/core/hle/service/audio/hwopus.cpp | 4 +- src/core/hle/service/caps/caps.h | 2 +- .../service/nvdrv/devices/nvhost_nvdec.cpp | 11 ++--- .../nvdrv/devices/nvhost_nvdec_common.cpp | 5 +-- .../nvdrv/devices/nvhost_nvdec_common.h | 3 +- .../hle/service/nvdrv/devices/nvhost_vic.cpp | 11 ++--- src/core/perf_stats.h | 4 +- .../command_classes/codecs/codec.cpp | 41 +++++++++++++----- src/video_core/gpu.cpp | 43 +++++++++++++------ src/video_core/gpu.h | 4 +- src/video_core/shader_notify.cpp | 2 +- src/video_core/shader_notify.h | 2 +- src/yuzu/loading_screen.cpp | 4 +- src/yuzu/loading_screen.h | 4 +- 18 files changed, 101 insertions(+), 62 deletions(-) diff --git a/README.md b/README.md index 25fb66bd6..d68fe0735 100755 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ yuzu emulator early access ============= -This is the source code for early-access 2261. +This is the source code for early-access 2263. ## Legal Notice diff --git a/src/common/x64/native_clock.cpp b/src/common/x64/native_clock.cpp index 87de40624..28f834443 100755 --- a/src/common/x64/native_clock.cpp +++ b/src/common/x64/native_clock.cpp @@ -19,16 +19,16 @@ u64 EstimateRDTSCFrequency() { // get current time _mm_mfence(); const u64 tscStart = __rdtsc(); - const auto startTime = std::chrono::high_resolution_clock::now(); + const auto startTime = std::chrono::steady_clock::now(); // wait roughly 3 seconds while (true) { auto milli = std::chrono::duration_cast( - std::chrono::high_resolution_clock::now() - startTime); + std::chrono::steady_clock::now() - startTime); if (milli.count() >= 3000) break; std::this_thread::sleep_for(milli_10); } - const auto endTime = std::chrono::high_resolution_clock::now(); + const auto endTime = std::chrono::steady_clock::now(); _mm_mfence(); const u64 tscEnd = __rdtsc(); // calculate difference diff --git a/src/core/hle/service/am/am.cpp b/src/core/hle/service/am/am.cpp index aee8d4f93..e60661fe1 100755 --- a/src/core/hle/service/am/am.cpp +++ b/src/core/hle/service/am/am.cpp @@ -30,6 +30,7 @@ #include "core/hle/service/apm/apm_controller.h" #include "core/hle/service/apm/apm_interface.h" #include "core/hle/service/bcat/backend/backend.h" +#include "core/hle/service/caps/caps.h" #include "core/hle/service/filesystem/filesystem.h" #include "core/hle/service/ns/ns.h" #include "core/hle/service/nvflinger/nvflinger.h" @@ -298,7 +299,7 @@ ISelfController::ISelfController(Core::System& system_, NVFlinger::NVFlinger& nv {91, &ISelfController::GetAccumulatedSuspendedTickChangedEvent, "GetAccumulatedSuspendedTickChangedEvent"}, {100, &ISelfController::SetAlbumImageTakenNotificationEnabled, "SetAlbumImageTakenNotificationEnabled"}, {110, nullptr, "SetApplicationAlbumUserData"}, - {120, nullptr, "SaveCurrentScreenshot"}, + {120, &ISelfController::SaveCurrentScreenshot, "SaveCurrentScreenshot"}, {130, nullptr, "SetRecordVolumeMuted"}, {1000, nullptr, "GetDebugStorageChannel"}, }; @@ -579,6 +580,17 @@ void ISelfController::SetAlbumImageTakenNotificationEnabled(Kernel::HLERequestCo rb.Push(ResultSuccess); } +void ISelfController::SaveCurrentScreenshot(Kernel::HLERequestContext& ctx) { + IPC::RequestParser rp{ctx}; + + const auto album_report_option = rp.PopEnum(); + + LOG_WARNING(Service_AM, "(STUBBED) called. album_report_option={}", album_report_option); + + IPC::ResponseBuilder rb{ctx, 2}; + rb.Push(ResultSuccess); +} + AppletMessageQueue::AppletMessageQueue(Core::System& system) : service_context{system, "AppletMessageQueue"} { on_new_message = service_context.CreateEvent("AMMessageQueue:OnMessageReceived"); diff --git a/src/core/hle/service/am/am.h b/src/core/hle/service/am/am.h index 202d20757..2a578aea5 100755 --- a/src/core/hle/service/am/am.h +++ b/src/core/hle/service/am/am.h @@ -151,6 +151,7 @@ private: void GetAccumulatedSuspendedTickValue(Kernel::HLERequestContext& ctx); void GetAccumulatedSuspendedTickChangedEvent(Kernel::HLERequestContext& ctx); void SetAlbumImageTakenNotificationEnabled(Kernel::HLERequestContext& ctx); + void SaveCurrentScreenshot(Kernel::HLERequestContext& ctx); enum class ScreenshotPermission : u32 { Inherit = 0, diff --git a/src/core/hle/service/audio/hwopus.cpp b/src/core/hle/service/audio/hwopus.cpp index 7da1f2969..981b6c996 100755 --- a/src/core/hle/service/audio/hwopus.cpp +++ b/src/core/hle/service/audio/hwopus.cpp @@ -96,7 +96,7 @@ private: bool DecodeOpusData(u32& consumed, u32& sample_count, const std::vector& input, std::vector& output, u64* out_performance_time) const { - const auto start_time = std::chrono::high_resolution_clock::now(); + const auto start_time = std::chrono::steady_clock::now(); const std::size_t raw_output_sz = output.size() * sizeof(opus_int16); if (sizeof(OpusPacketHeader) > input.size()) { LOG_ERROR(Audio, "Input is smaller than the header size, header_sz={}, input_sz={}", @@ -135,7 +135,7 @@ private: return false; } - const auto end_time = std::chrono::high_resolution_clock::now() - start_time; + const auto end_time = std::chrono::steady_clock::now() - start_time; sample_count = out_sample_count; consumed = static_cast(sizeof(OpusPacketHeader) + hdr.size); if (out_performance_time != nullptr) { diff --git a/src/core/hle/service/caps/caps.h b/src/core/hle/service/caps/caps.h index b18adcb9d..7254055e6 100755 --- a/src/core/hle/service/caps/caps.h +++ b/src/core/hle/service/caps/caps.h @@ -24,7 +24,7 @@ enum class AlbumImageOrientation { Orientation3 = 3, }; -enum class AlbumReportOption { +enum class AlbumReportOption : s32 { Disable = 0, Enable = 1, }; diff --git a/src/core/hle/service/nvdrv/devices/nvhost_nvdec.cpp b/src/core/hle/service/nvdrv/devices/nvhost_nvdec.cpp index 8e2a16d86..0d7d4ad03 100755 --- a/src/core/hle/service/nvdrv/devices/nvhost_nvdec.cpp +++ b/src/core/hle/service/nvdrv/devices/nvhost_nvdec.cpp @@ -21,7 +21,7 @@ NvResult nvhost_nvdec::Ioctl1(DeviceFD fd, Ioctl command, const std::vector& case 0x0: switch (command.cmd) { case 0x1: - return Submit(fd, input, output); + return Submit(input, output); case 0x2: return GetSyncpoint(input, output); case 0x3: @@ -62,16 +62,11 @@ NvResult nvhost_nvdec::Ioctl3(DeviceFD fd, Ioctl command, const std::vector& return NvResult::NotImplemented; } -void nvhost_nvdec::OnOpen(DeviceFD fd) { - static u32 next_id{}; - fd_to_id[fd] = next_id++; -} +void nvhost_nvdec::OnOpen(DeviceFD fd) {} void nvhost_nvdec::OnClose(DeviceFD fd) { LOG_INFO(Service_NVDRV, "NVDEC video stream ended"); - if (fd_to_id.find(fd) != fd_to_id.end()) { - system.GPU().ClearCdmaInstance(fd_to_id[fd]); - } + system.GPU().ClearCdmaInstance(); } } // namespace Service::Nvidia::Devices diff --git a/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.cpp b/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.cpp index 8a05f0668..e61261f98 100755 --- a/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.cpp +++ b/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.cpp @@ -59,8 +59,7 @@ NvResult nvhost_nvdec_common::SetNVMAPfd(const std::vector& input) { return NvResult::Success; } -NvResult nvhost_nvdec_common::Submit(DeviceFD fd, const std::vector& input, - std::vector& output) { +NvResult nvhost_nvdec_common::Submit(const std::vector& input, std::vector& output) { IoctlSubmit params{}; std::memcpy(¶ms, input.data(), sizeof(IoctlSubmit)); LOG_DEBUG(Service_NVDRV, "called NVDEC Submit, cmd_buffer_count={}", params.cmd_buffer_count); @@ -94,7 +93,7 @@ NvResult nvhost_nvdec_common::Submit(DeviceFD fd, const std::vector& input, Tegra::ChCommandHeaderList cmdlist(cmd_buffer.word_count); system.Memory().ReadBlock(object->addr + cmd_buffer.offset, cmdlist.data(), cmdlist.size() * sizeof(u32)); - gpu.PushCommandBuffer(fd_to_id[fd], cmdlist); + gpu.PushCommandBuffer(cmdlist); } std::memcpy(output.data(), ¶ms, sizeof(IoctlSubmit)); // Some games expect command_buffers to be written back diff --git a/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.h b/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.h index e28c54df6..351625c17 100755 --- a/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.h +++ b/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.h @@ -104,14 +104,13 @@ protected: /// Ioctl command implementations NvResult SetNVMAPfd(const std::vector& input); - NvResult Submit(DeviceFD fd, const std::vector& input, std::vector& output); + NvResult Submit(const std::vector& input, std::vector& output); NvResult GetSyncpoint(const std::vector& input, std::vector& output); NvResult GetWaitbase(const std::vector& input, std::vector& output); NvResult MapBuffer(const std::vector& input, std::vector& output); NvResult UnmapBuffer(const std::vector& input, std::vector& output); NvResult SetSubmitTimeout(const std::vector& input, std::vector& output); - std::unordered_map fd_to_id{}; s32_le nvmap_fd{}; u32_le submit_timeout{}; std::shared_ptr nvmap_dev; diff --git a/src/core/hle/service/nvdrv/devices/nvhost_vic.cpp b/src/core/hle/service/nvdrv/devices/nvhost_vic.cpp index 420fe21c8..eac4dd530 100755 --- a/src/core/hle/service/nvdrv/devices/nvhost_vic.cpp +++ b/src/core/hle/service/nvdrv/devices/nvhost_vic.cpp @@ -21,7 +21,7 @@ NvResult nvhost_vic::Ioctl1(DeviceFD fd, Ioctl command, const std::vector& i case 0x0: switch (command.cmd) { case 0x1: - return Submit(fd, input, output); + return Submit(input, output); case 0x2: return GetSyncpoint(input, output); case 0x3: @@ -62,15 +62,10 @@ NvResult nvhost_vic::Ioctl3(DeviceFD fd, Ioctl command, const std::vector& i return NvResult::NotImplemented; } -void nvhost_vic::OnOpen(DeviceFD fd) { - static u32 next_id{}; - fd_to_id[fd] = next_id++; -} +void nvhost_vic::OnOpen(DeviceFD fd) {} void nvhost_vic::OnClose(DeviceFD fd) { - if (fd_to_id.find(fd) != fd_to_id.end()) { - system.GPU().ClearCdmaInstance(fd_to_id[fd]); - } + system.GPU().ClearCdmaInstance(); } } // namespace Service::Nvidia::Devices diff --git a/src/core/perf_stats.h b/src/core/perf_stats.h index a2541906f..816202588 100755 --- a/src/core/perf_stats.h +++ b/src/core/perf_stats.h @@ -33,7 +33,7 @@ public: explicit PerfStats(u64 title_id_); ~PerfStats(); - using Clock = std::chrono::high_resolution_clock; + using Clock = std::chrono::steady_clock; void BeginSystemFrame(); void EndSystemFrame(); @@ -87,7 +87,7 @@ private: class SpeedLimiter { public: - using Clock = std::chrono::high_resolution_clock; + using Clock = std::chrono::steady_clock; void DoSpeedLimiting(std::chrono::microseconds current_system_time_us); diff --git a/src/video_core/command_classes/codecs/codec.cpp b/src/video_core/command_classes/codecs/codec.cpp index 02d309170..2c0d8da64 100755 --- a/src/video_core/command_classes/codecs/codec.cpp +++ b/src/video_core/command_classes/codecs/codec.cpp @@ -23,6 +23,17 @@ namespace Tegra { namespace { constexpr AVPixelFormat PREFERRED_GPU_FMT = AV_PIX_FMT_NV12; constexpr AVPixelFormat PREFERRED_CPU_FMT = AV_PIX_FMT_YUV420P; +constexpr std::array PREFERRED_GPU_DECODERS = { + AV_HWDEVICE_TYPE_CUDA, +#ifdef _WIN32 + AV_HWDEVICE_TYPE_D3D11VA, + AV_HWDEVICE_TYPE_DXVA2, +#elif defined(__linux__) + AV_HWDEVICE_TYPE_VDPAU, +#endif + // last resort for Linux Flatpak (w/ NVIDIA) + AV_HWDEVICE_TYPE_VULKAN, +}; void AVPacketDeleter(AVPacket* ptr) { av_packet_free(&ptr); @@ -61,6 +72,20 @@ Codec::~Codec() { av_buffer_unref(&av_gpu_decoder); } +// List all the currently available hwcontext in ffmpeg +static std::vector ListSupportedContexts() { + std::vector contexts{}; + AVHWDeviceType current_device_type = AV_HWDEVICE_TYPE_NONE; + do { + current_device_type = av_hwdevice_iterate_types(current_device_type); + // filter out VA-API since we will try that first if supported + if (current_device_type != AV_HWDEVICE_TYPE_VAAPI) { + contexts.push_back(current_device_type); + } + } while (current_device_type != AV_HWDEVICE_TYPE_NONE); + return contexts; +} + #ifdef LIBVA_FOUND // List all the currently loaded Linux modules static std::vector ListLinuxKernelModules() { @@ -122,16 +147,12 @@ bool Codec::CreateGpuAvDevice() { av_dict_free(&hwdevice_options); #endif static constexpr auto HW_CONFIG_METHOD = AV_CODEC_HW_CONFIG_METHOD_HW_DEVICE_CTX; - static constexpr std::array GPU_DECODER_TYPES{ -#ifdef linux - AV_HWDEVICE_TYPE_VDPAU, -#endif - AV_HWDEVICE_TYPE_CUDA, -#ifdef _WIN32 - AV_HWDEVICE_TYPE_D3D11VA, -#endif - }; - for (const auto& type : GPU_DECODER_TYPES) { + static const auto supported_contexts = ListSupportedContexts(); + for (const auto& type : PREFERRED_GPU_DECODERS) { + if (std::none_of(supported_contexts.begin(), supported_contexts.end(), + [&type](const auto& context) { return context == type; })) { + continue; + } const int hwdevice_res = av_hwdevice_ctx_create(&av_gpu_decoder, type, nullptr, nullptr, 0); if (hwdevice_res < 0) { LOG_DEBUG(Service_NVDRV, "{} av_hwdevice_ctx_create failed {}", diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp index 27a47954d..ab7c21a49 100755 --- a/src/video_core/gpu.cpp +++ b/src/video_core/gpu.cpp @@ -185,6 +185,16 @@ struct GPU::Impl { return *dma_pusher; } + /// Returns a reference to the GPU CDMA pusher. + [[nodiscard]] Tegra::CDmaPusher& CDmaPusher() { + return *cdma_pusher; + } + + /// Returns a const reference to the GPU CDMA pusher. + [[nodiscard]] const Tegra::CDmaPusher& CDmaPusher() const { + return *cdma_pusher; + } + /// Returns a reference to the underlying renderer. [[nodiscard]] VideoCore::RendererBase& Renderer() { return *renderer; @@ -328,26 +338,25 @@ struct GPU::Impl { } /// Push GPU command buffer entries to be processed - void PushCommandBuffer(u32 id, Tegra::ChCommandHeaderList& entries) { + void PushCommandBuffer(Tegra::ChCommandHeaderList& entries) { if (!use_nvdec) { return; } - if (cdma_pushers.find(id) == cdma_pushers.end()) { - cdma_pushers[id] = std::make_unique(gpu); + if (!cdma_pusher) { + cdma_pusher = std::make_unique(gpu); } // SubmitCommandBuffer would make the nvdec operations async, this is not currently working // TODO(ameerj): RE proper async nvdec operation // gpu_thread.SubmitCommandBuffer(std::move(entries)); - cdma_pushers[id]->ProcessEntries(std::move(entries)); + + cdma_pusher->ProcessEntries(std::move(entries)); } /// Frees the CDMAPusher instance to free up resources - void ClearCdmaInstance(u32 id) { - if (cdma_pushers.find(id) != cdma_pushers.end()) { - cdma_pushers.erase(id); - } + void ClearCdmaInstance() { + cdma_pusher.reset(); } /// Swap buffers (render frame) @@ -650,7 +659,7 @@ struct GPU::Impl { Core::System& system; std::unique_ptr memory_manager; std::unique_ptr dma_pusher; - std::map> cdma_pushers; + std::unique_ptr cdma_pusher; std::unique_ptr renderer; VideoCore::RasterizerInterface* rasterizer = nullptr; const bool use_nvdec; @@ -802,6 +811,14 @@ const Tegra::DmaPusher& GPU::DmaPusher() const { return impl->DmaPusher(); } +Tegra::CDmaPusher& GPU::CDmaPusher() { + return impl->CDmaPusher(); +} + +const Tegra::CDmaPusher& GPU::CDmaPusher() const { + return impl->CDmaPusher(); +} + VideoCore::RendererBase& GPU::Renderer() { return impl->Renderer(); } @@ -870,12 +887,12 @@ void GPU::PushGPUEntries(Tegra::CommandList&& entries) { impl->PushGPUEntries(std::move(entries)); } -void GPU::PushCommandBuffer(u32 id, Tegra::ChCommandHeaderList& entries) { - impl->PushCommandBuffer(id, entries); +void GPU::PushCommandBuffer(Tegra::ChCommandHeaderList& entries) { + impl->PushCommandBuffer(entries); } -void GPU::ClearCdmaInstance(u32 id) { - impl->ClearCdmaInstance(id); +void GPU::ClearCdmaInstance() { + impl->ClearCdmaInstance(); } void GPU::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h index 500411176..c89a5d693 100755 --- a/src/video_core/gpu.h +++ b/src/video_core/gpu.h @@ -242,10 +242,10 @@ public: void PushGPUEntries(Tegra::CommandList&& entries); /// Push GPU command buffer entries to be processed - void PushCommandBuffer(u32 id, Tegra::ChCommandHeaderList& entries); + void PushCommandBuffer(Tegra::ChCommandHeaderList& entries); /// Frees the CDMAPusher instance to free up resources - void ClearCdmaInstance(u32 id); + void ClearCdmaInstance(); /// Swap buffers (render frame) void SwapBuffers(const Tegra::FramebufferConfig* framebuffer); diff --git a/src/video_core/shader_notify.cpp b/src/video_core/shader_notify.cpp index dc6995b46..bcaf5f575 100755 --- a/src/video_core/shader_notify.cpp +++ b/src/video_core/shader_notify.cpp @@ -18,7 +18,7 @@ int ShaderNotify::ShadersBuilding() noexcept { const int now_complete = num_complete.load(std::memory_order::relaxed); const int now_building = num_building.load(std::memory_order::relaxed); if (now_complete == now_building) { - const auto now = std::chrono::high_resolution_clock::now(); + const auto now = std::chrono::steady_clock::now(); if (completed && num_complete == num_when_completed) { if (now - complete_time > TIME_TO_STOP_REPORTING) { report_base = now_complete; diff --git a/src/video_core/shader_notify.h b/src/video_core/shader_notify.h index ad363bfb5..4d8d52071 100755 --- a/src/video_core/shader_notify.h +++ b/src/video_core/shader_notify.h @@ -28,6 +28,6 @@ private: bool completed{}; int num_when_completed{}; - std::chrono::high_resolution_clock::time_point complete_time; + std::chrono::steady_clock::time_point complete_time; }; } // namespace VideoCore diff --git a/src/yuzu/loading_screen.cpp b/src/yuzu/loading_screen.cpp index ae842306c..b001b8c23 100755 --- a/src/yuzu/loading_screen.cpp +++ b/src/yuzu/loading_screen.cpp @@ -136,7 +136,7 @@ void LoadingScreen::OnLoadComplete() { void LoadingScreen::OnLoadProgress(VideoCore::LoadCallbackStage stage, std::size_t value, std::size_t total) { using namespace std::chrono; - const auto now = high_resolution_clock::now(); + const auto now = steady_clock::now(); // reset the timer if the stage changes if (stage != previous_stage) { ui->progress_bar->setStyleSheet(QString::fromUtf8(progressbar_style[stage])); @@ -160,7 +160,7 @@ void LoadingScreen::OnLoadProgress(VideoCore::LoadCallbackStage stage, std::size // If theres a drastic slowdown in the rate, then display an estimate if (now - previous_time > milliseconds{50} || slow_shader_compile_start) { if (!slow_shader_compile_start) { - slow_shader_start = high_resolution_clock::now(); + slow_shader_start = steady_clock::now(); slow_shader_compile_start = true; slow_shader_first_value = value; } diff --git a/src/yuzu/loading_screen.h b/src/yuzu/loading_screen.h index 801d08e1a..29155a77c 100755 --- a/src/yuzu/loading_screen.h +++ b/src/yuzu/loading_screen.h @@ -84,8 +84,8 @@ private: // shaders, it will start quickly but end slow if new shaders were added since previous launch. // These variables are used to detect the change in speed so we can generate an ETA bool slow_shader_compile_start = false; - std::chrono::high_resolution_clock::time_point slow_shader_start; - std::chrono::high_resolution_clock::time_point previous_time; + std::chrono::steady_clock::time_point slow_shader_start; + std::chrono::steady_clock::time_point previous_time; std::size_t slow_shader_first_value = 0; };