From c8db4e5c9e78c04b8338b6492c65b37d97171a32 Mon Sep 17 00:00:00 2001 From: pineappleEA Date: Mon, 9 Aug 2021 02:12:07 +0200 Subject: [PATCH] early-access version 1983 --- CMakeLists.txt | 91 +++++- README.md | 2 +- src/common/settings.cpp | 4 +- src/common/settings.h | 8 +- src/core/telemetry_session.cpp | 16 +- src/video_core/CMakeLists.txt | 1 + .../command_classes/codecs/codec.cpp | 274 ++++++++++++------ src/video_core/command_classes/codecs/codec.h | 12 +- src/video_core/command_classes/codecs/vp9.cpp | 4 +- .../command_classes/codecs/vp9_types.h | 2 +- src/video_core/video_core.cpp | 3 +- src/yuzu/configuration/config.cpp | 7 +- src/yuzu/configuration/config.h | 1 + src/yuzu/configuration/configure_graphics.cpp | 42 ++- src/yuzu/configuration/configure_graphics.h | 1 + src/yuzu/configuration/configure_graphics.ui | 51 +++- src/yuzu_cmd/config.cpp | 2 +- src/yuzu_cmd/default_ini.h | 6 +- 18 files changed, 388 insertions(+), 139 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index de2413843..764ece8a4 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -518,6 +518,10 @@ set(FFmpeg_COMPONENTS avutil swscale) +if (${CMAKE_SYSTEM_NAME} STREQUAL "Linux") + Include(FindPkgConfig REQUIRED) + pkg_check_modules(LIBVA libva) +endif() if (NOT YUZU_USE_BUNDLED_FFMPEG) # Use system installed FFmpeg find_package(FFmpeg QUIET COMPONENTS ${FFmpeg_COMPONENTS}) @@ -540,6 +544,9 @@ endif() if (YUZU_USE_BUNDLED_FFMPEG) if (NOT WIN32) + # TODO(lat9nq): Move this to externals/ffmpeg/CMakeLists.txt (and move externals/ffmpeg to + # externals/ffmpeg/ffmpeg) + # Build FFmpeg from externals message(STATUS "Using FFmpeg from externals") @@ -579,20 +586,23 @@ if (YUZU_USE_BUNDLED_FFMPEG) CACHE PATH "Paths to FFmpeg libraries" FORCE) endforeach() - set(FFmpeg_INCLUDE_DIR - "${FFmpeg_PREFIX};${FFmpeg_BUILD_DIR}" - CACHE PATH "Path to FFmpeg headers" FORCE) + Include(FindPkgConfig REQUIRED) + pkg_check_modules(LIBVA libva) + pkg_check_modules(CUDA cuda) + pkg_check_modules(FFNVCODEC ffnvcodec) + pkg_check_modules(VDPAU vdpau) + + set(FFmpeg_HWACCEL_LIBRARIES) + set(FFmpeg_HWACCEL_FLAGS) + set(FFmpeg_HWACCEL_INCLUDE_DIRS) + set(FFmpeg_HWACCEL_LDFLAGS) - if (${CMAKE_SYSTEM_NAME} STREQUAL "Linux") - Include(FindPkgConfig REQUIRED) - pkg_check_modules(LIBVA libva) - endif() if(LIBVA_FOUND) pkg_check_modules(LIBDRM libdrm REQUIRED) find_package(X11 REQUIRED) pkg_check_modules(LIBVA-DRM libva-drm REQUIRED) pkg_check_modules(LIBVA-X11 libva-x11 REQUIRED) - set(FFmpeg_LIBVA_LIBRARIES + list(APPEND FFmpeg_HWACCEL_LIBRARIES ${LIBDRM_LIBRARIES} ${X11_LIBRARIES} ${LIBVA-DRM_LIBRARIES} @@ -602,11 +612,57 @@ if (YUZU_USE_BUNDLED_FFMPEG) --enable-hwaccel=h264_vaapi --enable-hwaccel=vp9_vaapi --enable-libdrm) + list(APPEND FFmpeg_HWACCEL_INCLUDE_DIRS + ${LIBDRM_INCLUDE_DIRS} + ${X11_INCLUDE_DIRS} + ${LIBVA-DRM_INCLUDE_DIRS} + ${LIBVA-X11_INCLUDE_DIRS} + ${LIBVA_INCLUDE_DIRS} + ) message(STATUS "VA-API found") else() set(FFmpeg_HWACCEL_FLAGS --disable-vaapi) endif() + if (FFNVCODEC_FOUND AND CUDA_FOUND) + list(APPEND FFmpeg_HWACCEL_FLAGS + --enable-cuvid + --enable-ffnvcodec + --enable-nvdec + --enable-cuda-llvm + --enable-hwaccel=h264_nvdec + --enable-hwaccel=vp9_nvdec + --extra-cflags=-I${CUDA_INCLUDE_DIRS} + ) + list(APPEND FFmpeg_HWACCEL_LIBRARIES + ${FFNVCODEC_LIBRARIES} + ${CUDA_LIBRARIES} + ) + list(APPEND FFmpeg_HWACCEL_INCLUDE_DIRS + ${FFNVCODEC_INCLUDE_DIRS} + ${CUDA_INCLUDE_DIRS} + ) + list(APPEND FFmpeg_HWACCEL_LDFLAGS + ${FFNVCODEC_LDFLAGS} + ${CUDA_LDFLAGS} + ) + message(STATUS "ffnvcodec libraries version ${FFNVCODEC_VERSION} found") + endif() + + if (VDPAU_FOUND) + list(APPEND FFmpeg_HWACCEL_FLAGS + --enable-vdpau + --enable-hwaccel=h264_vdpau + --enable-hwaccel=vp9_vdpau + ) + list(APPEND FFmpeg_HWACCEL_LIBRARIES ${VDPAU_LIBRARIES}) + list(APPEND FFmpeg_HWACCEL_INCLUDE_DIRS ${VDPAU_INCLUDE_DIRS}) + list(APPEND FFmpeg_HWACCEL_LDFLAGS ${VDPAU_LDFLAGS}) + message(STATUS "vdpau libraries version ${VDPAU_VERSION} found") + else() + list(APPEND FFmpeg_HWACCEL_FLAGS --disable-vdpau) + endif() + # `configure` parameters builds only exactly what yuzu needs from FFmpeg # `--disable-vdpau` is needed to avoid linking issues add_custom_command( @@ -624,7 +680,6 @@ if (YUZU_USE_BUNDLED_FFMPEG) --disable-network --disable-postproc --disable-swresample - --disable-vdpau --enable-decoder=h264 --enable-decoder=vp9 --cc="${CMAKE_C_COMPILER}" @@ -653,15 +708,26 @@ if (YUZU_USE_BUNDLED_FFMPEG) ${FFmpeg_BUILD_DIR} ) + set(FFmpeg_INCLUDE_DIR + "${FFmpeg_PREFIX};${FFmpeg_BUILD_DIR};${FFmpeg_HWACCEL_INCLUDE_DIRS}" + CACHE PATH "Path to FFmpeg headers" FORCE) + + set(FFmpeg_LDFLAGS + "${FFmpeg_HWACCEL_LDFLAGS}" + CACHE STRING "FFmpeg linker flags" FORCE) + # ALL makes this custom target build every time # but it won't actually build if the DEPENDS parameter is up to date add_custom_target(ffmpeg-configure ALL DEPENDS ${FFmpeg_MAKEFILE}) add_custom_target(ffmpeg-build ALL DEPENDS ${FFmpeg_BUILD_LIBRARIES} ffmpeg-configure) link_libraries(${FFmpeg_LIBVA_LIBRARIES}) - set(FFmpeg_LIBRARIES ${FFmpeg_LIBVA_LIBRARIES} ${FFmpeg_BUILD_LIBRARIES} + set(FFmpeg_LIBRARIES ${FFmpeg_BUILD_LIBRARIES} ${FFmpeg_HWACCEL_LIBRARIES} CACHE PATH "Paths to FFmpeg libraries" FORCE) unset(FFmpeg_BUILD_LIBRARIES) - unset(FFmpeg_LIBVA_LIBRARIES) + unset(FFmpeg_HWACCEL_FLAGS) + unset(FFmpeg_HWACCEL_INCLUDE_DIRS) + unset(FFmpeg_HWACCEL_LDFLAGS) + unset(FFmpeg_HWACCEL_LIBRARIES) if (FFmpeg_FOUND) message(STATUS "Found FFmpeg version ${FFmpeg_VERSION}") @@ -670,12 +736,13 @@ if (YUZU_USE_BUNDLED_FFMPEG) endif() else() # WIN32 # Use yuzu FFmpeg binaries - set(FFmpeg_EXT_NAME "ffmpeg-4.3.1") + set(FFmpeg_EXT_NAME "ffmpeg-4.4") set(FFmpeg_PATH "${CMAKE_BINARY_DIR}/externals/${FFmpeg_EXT_NAME}") download_bundled_external("ffmpeg/" ${FFmpeg_EXT_NAME} "") set(FFmpeg_FOUND YES) set(FFmpeg_INCLUDE_DIR "${FFmpeg_PATH}/include" CACHE PATH "Path to FFmpeg headers" FORCE) set(FFmpeg_LIBRARY_DIR "${FFmpeg_PATH}/bin" CACHE PATH "Path to FFmpeg library directory" FORCE) + set(FFmpeg_LDFLAGS "" CACHE STRING "FFmpeg linker flags" FORCE) set(FFmpeg_DLL_DIR "${FFmpeg_PATH}/bin" CACHE PATH "Path to FFmpeg dll's" FORCE) set(FFmpeg_LIBRARIES ${FFmpeg_LIBRARY_DIR}/swscale.lib diff --git a/README.md b/README.md index d9187ad93..8854bc1e9 100755 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ yuzu emulator early access ============= -This is the source code for early-access 1981. +This is the source code for early-access 1983. ## Legal Notice diff --git a/src/common/settings.cpp b/src/common/settings.cpp index fd3b639cd..0d2df80a8 100755 --- a/src/common/settings.cpp +++ b/src/common/settings.cpp @@ -54,7 +54,7 @@ void LogSettings() { log_setting("Renderer_GPUAccuracyLevel", values.gpu_accuracy.GetValue()); log_setting("Renderer_UseAsynchronousGpuEmulation", values.use_asynchronous_gpu_emulation.GetValue()); - log_setting("Renderer_UseNvdecEmulation", values.use_nvdec_emulation.GetValue()); + log_setting("Renderer_NvdecEmulation", values.nvdec_emulation.GetValue()); log_setting("Renderer_AccelerateASTC", values.accelerate_astc.GetValue()); log_setting("Renderer_UseVsync", values.use_vsync.GetValue()); log_setting("Renderer_ShaderBackend", values.shader_backend.GetValue()); @@ -136,7 +136,7 @@ void RestoreGlobalState(bool is_powered_on) { values.use_disk_shader_cache.SetGlobal(true); values.gpu_accuracy.SetGlobal(true); values.use_asynchronous_gpu_emulation.SetGlobal(true); - values.use_nvdec_emulation.SetGlobal(true); + values.nvdec_emulation.SetGlobal(true); values.accelerate_astc.SetGlobal(true); values.use_vsync.SetGlobal(true); values.shader_backend.SetGlobal(true); diff --git a/src/common/settings.h b/src/common/settings.h index ddde09ca7..e78d01fbd 100755 --- a/src/common/settings.h +++ b/src/common/settings.h @@ -47,6 +47,12 @@ enum class FullscreenMode : u32 { Exclusive = 1, }; +enum class NvdecEmulation : u32 { + Off = 0, + CPU = 1, + GPU = 2, +}; + /** The BasicSetting class is a simple resource manager. It defines a label and default value * alongside the actual value of the setting for simpler and less-error prone use with frontend * configurations. Setting a default value and label is required, though subclasses may deviate from @@ -342,7 +348,7 @@ struct Values { Setting use_disk_shader_cache{true, "use_disk_shader_cache"}; Setting gpu_accuracy{GPUAccuracy::High, "gpu_accuracy"}; Setting use_asynchronous_gpu_emulation{true, "use_asynchronous_gpu_emulation"}; - Setting use_nvdec_emulation{true, "use_nvdec_emulation"}; + Setting nvdec_emulation{NvdecEmulation::GPU, "nvdec_emulation"}; Setting accelerate_astc{true, "accelerate_astc"}; Setting use_vsync{true, "use_vsync"}; BasicSetting fps_cap{1000, "fps_cap"}; diff --git a/src/core/telemetry_session.cpp b/src/core/telemetry_session.cpp index 5a8cfd301..1f1607998 100755 --- a/src/core/telemetry_session.cpp +++ b/src/core/telemetry_session.cpp @@ -72,6 +72,18 @@ static const char* TranslateGPUAccuracyLevel(Settings::GPUAccuracy backend) { return "Unknown"; } +static const char* TranslateNvdecEmulation(Settings::NvdecEmulation backend) { + switch (backend) { + case Settings::NvdecEmulation::Off: + return "Off"; + case Settings::NvdecEmulation::CPU: + return "CPU"; + case Settings::NvdecEmulation::GPU: + return "GPU"; + } + return "Unknown"; +} + u64 GetTelemetryId() { u64 telemetry_id{}; const auto filename = Common::FS::GetYuzuPath(Common::FS::YuzuPath::ConfigDir) / "telemetry_id"; @@ -229,8 +241,8 @@ void TelemetrySession::AddInitialInfo(Loader::AppLoader& app_loader, TranslateGPUAccuracyLevel(Settings::values.gpu_accuracy.GetValue())); AddField(field_type, "Renderer_UseAsynchronousGpuEmulation", Settings::values.use_asynchronous_gpu_emulation.GetValue()); - AddField(field_type, "Renderer_UseNvdecEmulation", - Settings::values.use_nvdec_emulation.GetValue()); + AddField(field_type, "Renderer_NvdecEmulation", + TranslateNvdecEmulation(Settings::values.nvdec_emulation.GetValue())); AddField(field_type, "Renderer_AccelerateASTC", Settings::values.accelerate_astc.GetValue()); AddField(field_type, "Renderer_UseVsync", Settings::values.use_vsync.GetValue()); AddField(field_type, "Renderer_ShaderBackend", diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 1eb67c051..380501d6a 100755 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -228,6 +228,7 @@ endif() target_include_directories(video_core PRIVATE ${FFmpeg_INCLUDE_DIR}) target_link_libraries(video_core PRIVATE ${FFmpeg_LIBRARIES}) +target_link_options(video_core PRIVATE ${FFmpeg_LDFLAGS}) add_dependencies(video_core host_shaders) target_include_directories(video_core PRIVATE ${HOST_SHADERS_INCLUDE}) diff --git a/src/video_core/command_classes/codecs/codec.cpp b/src/video_core/command_classes/codecs/codec.cpp index f798a0053..f52ed4c6b 100755 --- a/src/video_core/command_classes/codecs/codec.cpp +++ b/src/video_core/command_classes/codecs/codec.cpp @@ -5,6 +5,7 @@ #include #include #include "common/assert.h" +#include "common/settings.h" #include "video_core/command_classes/codecs/codec.h" #include "video_core/command_classes/codecs/h264.h" #include "video_core/command_classes/codecs/vp9.h" @@ -16,44 +17,24 @@ extern "C" { } namespace Tegra { -#if defined(LIBVA_FOUND) -// Hardware acceleration code from FFmpeg/doc/examples/hw_decode.c originally under MIT license namespace { -constexpr std::array VAAPI_DRIVERS = { - "i915", - "amdgpu", -}; +void AVPacketDeleter(AVPacket* ptr) { + av_packet_free(&ptr); +} -AVPixelFormat GetHwFormat(AVCodecContext*, const AVPixelFormat* pix_fmts) { +using AVPacketPtr = std::unique_ptr; + +AVPixelFormat GetGpuFormat(AVCodecContext* av_codec_ctx, const AVPixelFormat* pix_fmts) { for (const AVPixelFormat* p = pix_fmts; *p != AV_PIX_FMT_NONE; ++p) { - if (*p == AV_PIX_FMT_VAAPI) { - return AV_PIX_FMT_VAAPI; + if (*p == av_codec_ctx->pix_fmt) { + return av_codec_ctx->pix_fmt; } } LOG_INFO(Service_NVDRV, "Could not find compatible GPU AV format, falling back to CPU"); - return *pix_fmts; -} - -bool CreateVaapiHwdevice(AVBufferRef** av_hw_device) { - AVDictionary* hwdevice_options = nullptr; - av_dict_set(&hwdevice_options, "connection_type", "drm", 0); - for (const auto& driver : VAAPI_DRIVERS) { - av_dict_set(&hwdevice_options, "kernel_driver", driver, 0); - const int hwdevice_error = av_hwdevice_ctx_create(av_hw_device, AV_HWDEVICE_TYPE_VAAPI, - nullptr, hwdevice_options, 0); - if (hwdevice_error >= 0) { - LOG_INFO(Service_NVDRV, "Using VA-API with {}", driver); - av_dict_free(&hwdevice_options); - return true; - } - LOG_DEBUG(Service_NVDRV, "VA-API av_hwdevice_ctx_create failed {}", hwdevice_error); - } - LOG_DEBUG(Service_NVDRV, "VA-API av_hwdevice_ctx_create failed for all drivers"); - av_dict_free(&hwdevice_options); - return false; + av_codec_ctx->pix_fmt = AV_PIX_FMT_NONE; + return AV_PIX_FMT_NONE; } } // namespace -#endif void AVFrameDeleter(AVFrame* ptr) { av_frame_free(&ptr); @@ -68,56 +49,158 @@ Codec::~Codec() { return; } // Free libav memory - avcodec_send_packet(av_codec_ctx, nullptr); - AVFrame* av_frame = av_frame_alloc(); - avcodec_receive_frame(av_codec_ctx, av_frame); - avcodec_flush_buffers(av_codec_ctx); - av_frame_free(&av_frame); - avcodec_close(av_codec_ctx); - av_buffer_unref(&av_hw_device); + avcodec_free_context(&av_codec_ctx); + av_buffer_unref(&av_gpu_decoder); } -void Codec::InitializeHwdec() { - // Prioritize integrated GPU to mitigate bandwidth bottlenecks +bool Codec::CreateGpuAvDevice() { #if defined(LIBVA_FOUND) - if (CreateVaapiHwdevice(&av_hw_device)) { - const auto hw_device_ctx = av_buffer_ref(av_hw_device); - ASSERT_MSG(hw_device_ctx, "av_buffer_ref failed"); - av_codec_ctx->hw_device_ctx = hw_device_ctx; - av_codec_ctx->get_format = GetHwFormat; + static constexpr std::array VAAPI_DRIVERS = { + "i915", + "iHD", + }; + AVDictionary* hwdevice_options = nullptr; + av_dict_set(&hwdevice_options, "connection_type", "drm", 0); + for (const auto& driver : VAAPI_DRIVERS) { + av_dict_set(&hwdevice_options, "kernel_driver", driver, 0); + const int hwdevice_error = av_hwdevice_ctx_create(&av_gpu_decoder, AV_HWDEVICE_TYPE_VAAPI, + nullptr, hwdevice_options, 0); + if (hwdevice_error >= 0) { + LOG_INFO(Service_NVDRV, "Using VA-API with {}", driver); + av_dict_free(&hwdevice_options); + av_codec_ctx->pix_fmt = AV_PIX_FMT_VAAPI; + return true; + } + LOG_DEBUG(Service_NVDRV, "VA-API av_hwdevice_ctx_create failed {}", hwdevice_error); + } + LOG_DEBUG(Service_NVDRV, "VA-API av_hwdevice_ctx_create failed for all drivers"); + av_dict_free(&hwdevice_options); +#endif + static constexpr auto HW_CONFIG_METHOD = AV_CODEC_HW_CONFIG_METHOD_HW_DEVICE_CTX; + static constexpr std::array GPU_DECODER_TYPES{ + AV_HWDEVICE_TYPE_CUDA, + AV_HWDEVICE_TYPE_D3D11VA, + AV_HWDEVICE_TYPE_VDPAU, + }; + for (const auto& type : GPU_DECODER_TYPES) { + const int hwdevice_res = av_hwdevice_ctx_create(&av_gpu_decoder, type, nullptr, nullptr, 0); + if (hwdevice_res < 0) { + LOG_DEBUG(Service_NVDRV, "{} av_hwdevice_ctx_create failed {}", + av_hwdevice_get_type_name(type), hwdevice_res); + continue; + } + for (int i = 0;; i++) { + const AVCodecHWConfig* config = avcodec_get_hw_config(av_codec, i); + if (!config) { + LOG_DEBUG(Service_NVDRV, "{} decoder does not support device type {}.", + av_codec->name, av_hwdevice_get_type_name(type)); + break; + } + if (config->methods & HW_CONFIG_METHOD && config->device_type == type) { + av_codec_ctx->pix_fmt = config->pix_fmt; + LOG_INFO(Service_NVDRV, "Using {} GPU decoder", av_hwdevice_get_type_name(type)); + return true; + } + } + } + return false; +} + +void Codec::InitializeAvCodecContext() { + av_codec_ctx = avcodec_alloc_context3(av_codec); + av_opt_set(av_codec_ctx->priv_data, "tune", "zerolatency", 0); +} + +void Codec::InitializeGpuDecoder() { + if (!CreateGpuAvDevice()) { + av_buffer_unref(&av_gpu_decoder); return; } -#endif - // TODO more GPU accelerated decoders + auto* hw_device_ctx = av_buffer_ref(av_gpu_decoder); + ASSERT_MSG(hw_device_ctx, "av_buffer_ref failed"); + av_codec_ctx->hw_device_ctx = hw_device_ctx; + av_codec_ctx->get_format = GetGpuFormat; +} + +void Codec::TestGpuDecoder() { + static constexpr std::array vp9_test{ + 0x92, 0x49, 0x83, 0x42, 0x00, 0x09, 0xf8, 0x05, 0x9b, 0x09, 0x1c, 0x12, + 0x0e, 0x0c, 0x32, 0x00, 0x02, 0x08, 0x7f, 0xcd, 0xec, 0x3f, 0x3b, 0x77, + 0x81, 0xf6, 0x47, 0xe2, 0xf0, 0x7e, 0x8b, 0x41, 0xfe, 0x3f, 0x87, 0x89, + 0x7d, 0x37, 0xa2, 0xd1, 0x3e, 0x9c, 0x59, 0x5f, 0x75, 0xee, 0xbb, 0x97, + }; + static constexpr std::array h264_test{ + 0x00, 0x00, 0x00, 0x01, 0x67, 0x64, 0x00, 0x0D, 0xAC, 0x34, 0xE5, 0x05, + 0x06, 0x7E, 0x78, 0x40, 0x00, 0x00, 0x19, 0x00, 0x00, 0x05, 0xDA, 0xA3, + 0xC5, 0x0A, 0x45, 0x80, 0x00, 0x00, 0x00, 0x01, 0x68, 0xEE, 0xB2, 0xC8, + 0xB0, 0x00, 0x00, 0x01, 0x65, 0x88, 0x80, 0x20, 0x01, 0xFF, 0xF3, 0x7E, + }; + auto test_data = [&] { + switch (current_codec) { + case NvdecCommon::VideoCodec::H264: + return h264_test; + case NvdecCommon::VideoCodec::Vp9: + return vp9_test; + default: + UNIMPLEMENTED_MSG("Unknown codec {}", current_codec); + return std::array{}; + } + }(); + AVPacketPtr packet{av_packet_alloc(), AVPacketDeleter}; + if (!packet) { + LOG_ERROR(Service_NVDRV, "av_packet_alloc failed"); + return; + } + // Temporarily disable logging, the test frames are incomplete and will log errors that can be + // ignored for our purposes. + av_log_set_level(AV_LOG_QUIET); + packet->data = test_data.data(); + packet->size = static_cast(test_data.size()); + avcodec_send_packet(av_codec_ctx, packet.get()); + av_log_set_level(AV_LOG_WARNING); + + // GetGpuFormat is invoked after the avcodec_send_packet call. + // Fallback to CPU decoding if no compatible GPU format was found. + if (av_codec_ctx->pix_fmt != AV_PIX_FMT_NONE) { + avcodec_flush_buffers(av_codec_ctx); + using_gpu_decode = true; + return; + } + avcodec_close(av_codec_ctx); + av_buffer_unref(&av_gpu_decoder); + InitializeAvCodecContext(); + avcodec_open2(av_codec_ctx, av_codec, nullptr); } void Codec::Initialize() { - AVCodecID codec; - switch (current_codec) { - case NvdecCommon::VideoCodec::H264: - codec = AV_CODEC_ID_H264; - break; - case NvdecCommon::VideoCodec::Vp9: - codec = AV_CODEC_ID_VP9; - break; - default: - UNIMPLEMENTED_MSG("Unknown codec {}", current_codec); - return; - } + const AVCodecID codec = [&] { + switch (current_codec) { + case NvdecCommon::VideoCodec::H264: + return AV_CODEC_ID_H264; + case NvdecCommon::VideoCodec::Vp9: + return AV_CODEC_ID_VP9; + default: + UNIMPLEMENTED_MSG("Unknown codec {}", current_codec); + return AV_CODEC_ID_NONE; + } + }(); av_codec = avcodec_find_decoder(codec); - av_codec_ctx = avcodec_alloc_context3(av_codec); - av_opt_set(av_codec_ctx->priv_data, "tune", "zerolatency", 0); - InitializeHwdec(); - if (!av_codec_ctx->hw_device_ctx) { - LOG_INFO(Service_NVDRV, "Using FFmpeg software decoding"); + + InitializeAvCodecContext(); + if (Settings::values.nvdec_emulation.GetValue() == Settings::NvdecEmulation::GPU) { + InitializeGpuDecoder(); } - const auto av_error = avcodec_open2(av_codec_ctx, av_codec, nullptr); - if (av_error < 0) { - LOG_ERROR(Service_NVDRV, "avcodec_open2() Failed."); - avcodec_close(av_codec_ctx); - av_buffer_unref(&av_hw_device); + if (const int res = avcodec_open2(av_codec_ctx, av_codec, nullptr); res < 0) { + LOG_ERROR(Service_NVDRV, "avcodec_open2() Failed with result {}", res); + avcodec_free_context(&av_codec_ctx); + av_buffer_unref(&av_gpu_decoder); return; } + if (av_codec_ctx->hw_device_ctx) { + TestGpuDecoder(); + } else { + LOG_INFO(Service_NVDRV, "Using FFmpeg software decoding"); + } initialized = true; } @@ -133,6 +216,9 @@ void Codec::Decode() { if (is_first_frame) { Initialize(); } + if (!initialized) { + return; + } bool vp9_hidden_frame = false; std::vector frame_data; if (current_codec == NvdecCommon::VideoCodec::H264) { @@ -141,50 +227,48 @@ void Codec::Decode() { frame_data = vp9_decoder->ComposeFrameHeader(state); vp9_hidden_frame = vp9_decoder->WasFrameHidden(); } - AVPacket packet{}; - av_init_packet(&packet); - packet.data = frame_data.data(); - packet.size = static_cast(frame_data.size()); - if (const int ret = avcodec_send_packet(av_codec_ctx, &packet); ret) { - LOG_DEBUG(Service_NVDRV, "avcodec_send_packet error {}", ret); + AVPacketPtr packet{av_packet_alloc(), AVPacketDeleter}; + if (!packet) { + LOG_ERROR(Service_NVDRV, "av_packet_alloc failed"); + return; + } + packet->data = frame_data.data(); + packet->size = static_cast(frame_data.size()); + if (const int res = avcodec_send_packet(av_codec_ctx, packet.get()); res != 0) { + LOG_DEBUG(Service_NVDRV, "avcodec_send_packet error {}", res); return; } // Only receive/store visible frames if (vp9_hidden_frame) { return; } - AVFrame* hw_frame = av_frame_alloc(); - AVFrame* sw_frame = hw_frame; - ASSERT_MSG(hw_frame, "av_frame_alloc hw_frame failed"); - if (const int ret = avcodec_receive_frame(av_codec_ctx, hw_frame); ret) { + AVFramePtr initial_frame{av_frame_alloc(), AVFrameDeleter}; + AVFramePtr final_frame{nullptr, AVFrameDeleter}; + ASSERT_MSG(initial_frame, "av_frame_alloc initial_frame failed"); + if (const int ret = avcodec_receive_frame(av_codec_ctx, initial_frame.get()); ret) { LOG_DEBUG(Service_NVDRV, "avcodec_receive_frame error {}", ret); - av_frame_free(&hw_frame); return; } - if (!hw_frame->width || !hw_frame->height) { + if (initial_frame->width == 0 || initial_frame->height == 0) { LOG_WARNING(Service_NVDRV, "Zero width or height in frame"); - av_frame_free(&hw_frame); return; } -#if defined(LIBVA_FOUND) - // Hardware acceleration code from FFmpeg/doc/examples/hw_decode.c under MIT license - if (hw_frame->format == AV_PIX_FMT_VAAPI) { - sw_frame = av_frame_alloc(); - ASSERT_MSG(sw_frame, "av_frame_alloc sw_frame failed"); + if (using_gpu_decode) { + final_frame = AVFramePtr{av_frame_alloc(), AVFrameDeleter}; + ASSERT_MSG(final_frame, "av_frame_alloc final_frame failed"); // Can't use AV_PIX_FMT_YUV420P and share code with software decoding in vic.cpp // because Intel drivers crash unless using AV_PIX_FMT_NV12 - sw_frame->format = AV_PIX_FMT_NV12; - const int transfer_data_ret = av_hwframe_transfer_data(sw_frame, hw_frame, 0); - ASSERT_MSG(!transfer_data_ret, "av_hwframe_transfer_data error {}", transfer_data_ret); - av_frame_free(&hw_frame); + final_frame->format = AV_PIX_FMT_NV12; + const int ret = av_hwframe_transfer_data(final_frame.get(), initial_frame.get(), 0); + ASSERT_MSG(!ret, "av_hwframe_transfer_data error {}", ret); + } else { + final_frame = std::move(initial_frame); } -#endif - if (sw_frame->format != AV_PIX_FMT_YUV420P && sw_frame->format != AV_PIX_FMT_NV12) { - UNIMPLEMENTED_MSG("Unexpected video format from host graphics: {}", sw_frame->format); - av_frame_free(&sw_frame); + if (final_frame->format != AV_PIX_FMT_YUV420P && final_frame->format != AV_PIX_FMT_NV12) { + UNIMPLEMENTED_MSG("Unexpected video format: {}", final_frame->format); return; } - av_frames.push(AVFramePtr{sw_frame, AVFrameDeleter}); + av_frames.push(std::move(final_frame)); if (av_frames.size() > 10) { LOG_TRACE(Service_NVDRV, "av_frames.push overflow dropped frame"); av_frames.pop(); diff --git a/src/video_core/command_classes/codecs/codec.h b/src/video_core/command_classes/codecs/codec.h index 71936203f..0ccdd7229 100755 --- a/src/video_core/command_classes/codecs/codec.h +++ b/src/video_core/command_classes/codecs/codec.h @@ -50,18 +50,26 @@ public: /// Returns the value of current_codec [[nodiscard]] NvdecCommon::VideoCodec GetCurrentCodec() const; + /// Return name of the current codec [[nodiscard]] std::string_view GetCurrentCodecName() const; private: - void InitializeHwdec(); + void InitializeAvCodecContext(); + + void InitializeGpuDecoder(); + + void TestGpuDecoder(); + + bool CreateGpuAvDevice(); bool initialized{}; + bool using_gpu_decode{}; NvdecCommon::VideoCodec current_codec{NvdecCommon::VideoCodec::None}; AVCodec* av_codec{nullptr}; - AVBufferRef* av_hw_device{nullptr}; AVCodecContext* av_codec_ctx{nullptr}; + AVBufferRef* av_gpu_decoder{nullptr}; GPU& gpu; const NvdecCommon::NvdecRegisters& state; diff --git a/src/video_core/command_classes/codecs/vp9.cpp b/src/video_core/command_classes/codecs/vp9.cpp index 7eecb3991..70030066a 100755 --- a/src/video_core/command_classes/codecs/vp9.cpp +++ b/src/video_core/command_classes/codecs/vp9.cpp @@ -397,14 +397,14 @@ Vp9FrameContainer VP9::GetCurrentFrame(const NvdecCommon::NvdecRegisters& state) next_frame = std::move(temp); } else { next_frame.info = current_frame.info; - next_frame.bit_stream = std::move(current_frame.bit_stream); + next_frame.bit_stream = current_frame.bit_stream; } return current_frame; } std::vector VP9::ComposeCompressedHeader() { VpxRangeEncoder writer{}; - const bool update_probs = current_frame_info.show_frame && !current_frame_info.is_key_frame; + const bool update_probs = !current_frame_info.is_key_frame && current_frame_info.show_frame; if (!current_frame_info.lossless) { if (static_cast(current_frame_info.transform_mode) >= 3) { writer.Write(3, 2); diff --git a/src/video_core/command_classes/codecs/vp9_types.h b/src/video_core/command_classes/codecs/vp9_types.h index 6820afa26..87eafdb03 100755 --- a/src/video_core/command_classes/codecs/vp9_types.h +++ b/src/video_core/command_classes/codecs/vp9_types.h @@ -176,7 +176,7 @@ struct PictureInfo { .frame_size_changed = (vp9_flags & FrameFlags::FrameSizeChanged) != 0, .error_resilient_mode = (vp9_flags & FrameFlags::ErrorResilientMode) != 0, .last_frame_shown = (vp9_flags & FrameFlags::LastShowFrame) != 0, - .show_frame = false, + .show_frame = true, .ref_frame_sign_bias = ref_frame_sign_bias, .base_q_index = base_q_index, .y_dc_delta_q = y_dc_delta_q, diff --git a/src/video_core/video_core.cpp b/src/video_core/video_core.cpp index 3b575db4d..cae543a51 100755 --- a/src/video_core/video_core.cpp +++ b/src/video_core/video_core.cpp @@ -37,7 +37,8 @@ std::unique_ptr CreateRenderer( namespace VideoCore { std::unique_ptr CreateGPU(Core::Frontend::EmuWindow& emu_window, Core::System& system) { - const bool use_nvdec = Settings::values.use_nvdec_emulation.GetValue(); + const auto nvdec_value = Settings::values.nvdec_emulation.GetValue(); + const bool use_nvdec = nvdec_value != Settings::NvdecEmulation::Off; const bool use_async = Settings::values.use_asynchronous_gpu_emulation.GetValue(); auto gpu = std::make_unique(system, use_async, use_nvdec); auto context = emu_window.CreateSharedContext(); diff --git a/src/yuzu/configuration/config.cpp b/src/yuzu/configuration/config.cpp index 6b45e1564..b9d937fe1 100755 --- a/src/yuzu/configuration/config.cpp +++ b/src/yuzu/configuration/config.cpp @@ -826,7 +826,7 @@ void Config::ReadRendererValues() { ReadGlobalSetting(Settings::values.use_disk_shader_cache); ReadGlobalSetting(Settings::values.gpu_accuracy); ReadGlobalSetting(Settings::values.use_asynchronous_gpu_emulation); - ReadGlobalSetting(Settings::values.use_nvdec_emulation); + ReadGlobalSetting(Settings::values.nvdec_emulation); ReadGlobalSetting(Settings::values.accelerate_astc); ReadGlobalSetting(Settings::values.use_vsync); ReadGlobalSetting(Settings::values.shader_backend); @@ -1371,7 +1371,10 @@ void Config::SaveRendererValues() { static_cast(Settings::values.gpu_accuracy.GetDefault()), Settings::values.gpu_accuracy.UsingGlobal()); WriteGlobalSetting(Settings::values.use_asynchronous_gpu_emulation); - WriteGlobalSetting(Settings::values.use_nvdec_emulation); + WriteSetting(QString::fromStdString(Settings::values.nvdec_emulation.GetLabel()), + static_cast(Settings::values.nvdec_emulation.GetValue(global)), + static_cast(Settings::values.nvdec_emulation.GetDefault()), + Settings::values.nvdec_emulation.UsingGlobal()); WriteGlobalSetting(Settings::values.accelerate_astc); WriteGlobalSetting(Settings::values.use_vsync); WriteSetting(QString::fromStdString(Settings::values.shader_backend.GetLabel()), diff --git a/src/yuzu/configuration/config.h b/src/yuzu/configuration/config.h index 36e277bda..d6d20771b 100755 --- a/src/yuzu/configuration/config.h +++ b/src/yuzu/configuration/config.h @@ -182,5 +182,6 @@ private: Q_DECLARE_METATYPE(Settings::CPUAccuracy); Q_DECLARE_METATYPE(Settings::GPUAccuracy); Q_DECLARE_METATYPE(Settings::FullscreenMode); +Q_DECLARE_METATYPE(Settings::NvdecEmulation); Q_DECLARE_METATYPE(Settings::RendererBackend); Q_DECLARE_METATYPE(Settings::ShaderBackend); diff --git a/src/yuzu/configuration/configure_graphics.cpp b/src/yuzu/configuration/configure_graphics.cpp index 37e896258..c594164be 100755 --- a/src/yuzu/configuration/configure_graphics.cpp +++ b/src/yuzu/configuration/configure_graphics.cpp @@ -88,24 +88,30 @@ void ConfigureGraphics::SetConfiguration() { ui->api_widget->setEnabled(runtime_lock); ui->use_asynchronous_gpu_emulation->setEnabled(runtime_lock); ui->use_disk_shader_cache->setEnabled(runtime_lock); - ui->use_nvdec_emulation->setEnabled(runtime_lock); + ui->nvdec_emulation_widget->setEnabled(runtime_lock); ui->accelerate_astc->setEnabled(runtime_lock); ui->use_disk_shader_cache->setChecked(Settings::values.use_disk_shader_cache.GetValue()); ui->use_asynchronous_gpu_emulation->setChecked( Settings::values.use_asynchronous_gpu_emulation.GetValue()); - ui->use_nvdec_emulation->setChecked(Settings::values.use_nvdec_emulation.GetValue()); ui->accelerate_astc->setChecked(Settings::values.accelerate_astc.GetValue()); if (Settings::IsConfiguringGlobal()) { ui->api->setCurrentIndex(static_cast(Settings::values.renderer_backend.GetValue())); ui->fullscreen_mode_combobox->setCurrentIndex( static_cast(Settings::values.fullscreen_mode.GetValue())); + ui->nvdec_emulation->setCurrentIndex( + static_cast(Settings::values.nvdec_emulation.GetValue())); ui->aspect_ratio_combobox->setCurrentIndex(Settings::values.aspect_ratio.GetValue()); } else { ConfigurationShared::SetPerGameSetting(ui->api, &Settings::values.renderer_backend); ConfigurationShared::SetHighlight(ui->api_widget, !Settings::values.renderer_backend.UsingGlobal()); + ConfigurationShared::SetPerGameSetting(ui->nvdec_emulation, + &Settings::values.nvdec_emulation); + ConfigurationShared::SetHighlight(ui->nvdec_emulation_widget, + !Settings::values.nvdec_emulation.UsingGlobal()); + ConfigurationShared::SetPerGameSetting(ui->fullscreen_mode_combobox, &Settings::values.fullscreen_mode); ConfigurationShared::SetHighlight(ui->fullscreen_mode_label, @@ -137,8 +143,6 @@ void ConfigureGraphics::ApplyConfiguration() { ConfigurationShared::ApplyPerGameSetting(&Settings::values.use_asynchronous_gpu_emulation, ui->use_asynchronous_gpu_emulation, use_asynchronous_gpu_emulation); - ConfigurationShared::ApplyPerGameSetting(&Settings::values.use_nvdec_emulation, - ui->use_nvdec_emulation, use_nvdec_emulation); ConfigurationShared::ApplyPerGameSetting(&Settings::values.accelerate_astc, ui->accelerate_astc, accelerate_astc); @@ -147,6 +151,9 @@ void ConfigureGraphics::ApplyConfiguration() { if (Settings::values.renderer_backend.UsingGlobal()) { Settings::values.renderer_backend.SetValue(GetCurrentGraphicsBackend()); } + if (Settings::values.nvdec_emulation.UsingGlobal()) { + Settings::values.nvdec_emulation.SetValue(GetCurrentNvdecEmulation()); + } if (Settings::values.shader_backend.UsingGlobal()) { Settings::values.shader_backend.SetValue(shader_backend); } @@ -180,6 +187,13 @@ void ConfigureGraphics::ApplyConfiguration() { } } + if (ui->nvdec_emulation->currentIndex() == ConfigurationShared::USE_GLOBAL_INDEX) { + Settings::values.nvdec_emulation.SetGlobal(true); + } else { + Settings::values.nvdec_emulation.SetGlobal(false); + Settings::values.nvdec_emulation.SetValue(GetCurrentNvdecEmulation()); + } + if (ui->bg_combobox->currentIndex() == ConfigurationShared::USE_GLOBAL_INDEX) { Settings::values.bg_red.SetGlobal(true); Settings::values.bg_green.SetGlobal(true); @@ -278,6 +292,20 @@ Settings::RendererBackend ConfigureGraphics::GetCurrentGraphicsBackend() const { ConfigurationShared::USE_GLOBAL_OFFSET); } +Settings::NvdecEmulation ConfigureGraphics::GetCurrentNvdecEmulation() const { + if (Settings::IsConfiguringGlobal()) { + return static_cast(ui->nvdec_emulation->currentIndex()); + } + + if (ui->nvdec_emulation->currentIndex() == ConfigurationShared::USE_GLOBAL_INDEX) { + Settings::values.nvdec_emulation.SetGlobal(true); + return Settings::values.nvdec_emulation.GetValue(); + } + Settings::values.nvdec_emulation.SetGlobal(false); + return static_cast(ui->nvdec_emulation->currentIndex() - + ConfigurationShared::USE_GLOBAL_OFFSET); +} + void ConfigureGraphics::SetupPerGameUI() { if (Settings::IsConfiguringGlobal()) { ui->api->setEnabled(Settings::values.renderer_backend.UsingGlobal()); @@ -286,7 +314,7 @@ void ConfigureGraphics::SetupPerGameUI() { ui->aspect_ratio_combobox->setEnabled(Settings::values.aspect_ratio.UsingGlobal()); ui->use_asynchronous_gpu_emulation->setEnabled( Settings::values.use_asynchronous_gpu_emulation.UsingGlobal()); - ui->use_nvdec_emulation->setEnabled(Settings::values.use_nvdec_emulation.UsingGlobal()); + ui->nvdec_emulation->setEnabled(Settings::values.nvdec_emulation.UsingGlobal()); ui->accelerate_astc->setEnabled(Settings::values.accelerate_astc.UsingGlobal()); ui->use_disk_shader_cache->setEnabled(Settings::values.use_disk_shader_cache.UsingGlobal()); ui->bg_button->setEnabled(Settings::values.bg_red.UsingGlobal()); @@ -301,8 +329,6 @@ void ConfigureGraphics::SetupPerGameUI() { ConfigurationShared::SetColoredTristate( ui->use_disk_shader_cache, Settings::values.use_disk_shader_cache, use_disk_shader_cache); - ConfigurationShared::SetColoredTristate( - ui->use_nvdec_emulation, Settings::values.use_nvdec_emulation, use_nvdec_emulation); ConfigurationShared::SetColoredTristate(ui->accelerate_astc, Settings::values.accelerate_astc, accelerate_astc); ConfigurationShared::SetColoredTristate(ui->use_asynchronous_gpu_emulation, @@ -316,4 +342,6 @@ void ConfigureGraphics::SetupPerGameUI() { static_cast(Settings::values.fullscreen_mode.GetValue(true))); ConfigurationShared::InsertGlobalItem( ui->api, static_cast(Settings::values.renderer_backend.GetValue(true))); + ConfigurationShared::InsertGlobalItem( + ui->nvdec_emulation, static_cast(Settings::values.nvdec_emulation.GetValue(true))); } diff --git a/src/yuzu/configuration/configure_graphics.h b/src/yuzu/configuration/configure_graphics.h index c866b911b..7d7ac329d 100755 --- a/src/yuzu/configuration/configure_graphics.h +++ b/src/yuzu/configuration/configure_graphics.h @@ -43,6 +43,7 @@ private: void SetupPerGameUI(); Settings::RendererBackend GetCurrentGraphicsBackend() const; + Settings::NvdecEmulation GetCurrentNvdecEmulation() const; std::unique_ptr ui; QColor bg_color; diff --git a/src/yuzu/configuration/configure_graphics.ui b/src/yuzu/configuration/configure_graphics.ui index 099ddbb7c..41e930f6b 100755 --- a/src/yuzu/configuration/configure_graphics.ui +++ b/src/yuzu/configuration/configure_graphics.ui @@ -167,13 +167,6 @@ - - - - Use NVDEC emulation - - - @@ -181,6 +174,50 @@ + + + + + 0 + + + 0 + + + 0 + + + 0 + + + + + NVDEC emulation: + + + + + + + + Disabled + + + + + CPU Decoding + + + + + GPU Decoding + + + + + + + diff --git a/src/yuzu_cmd/config.cpp b/src/yuzu_cmd/config.cpp index 757dd1ea0..891f7be6f 100755 --- a/src/yuzu_cmd/config.cpp +++ b/src/yuzu_cmd/config.cpp @@ -465,7 +465,7 @@ void Config::ReadValues() { ReadSetting("Renderer", Settings::values.disable_fps_limit); ReadSetting("Renderer", Settings::values.shader_backend); ReadSetting("Renderer", Settings::values.use_asynchronous_shaders); - ReadSetting("Renderer", Settings::values.use_nvdec_emulation); + ReadSetting("Renderer", Settings::values.nvdec_emulation); ReadSetting("Renderer", Settings::values.accelerate_astc); ReadSetting("Renderer", Settings::values.use_fast_gpu_time); diff --git a/src/yuzu_cmd/default_ini.h b/src/yuzu_cmd/default_ini.h index e02eceb99..72f3213fb 100755 --- a/src/yuzu_cmd/default_ini.h +++ b/src/yuzu_cmd/default_ini.h @@ -261,9 +261,9 @@ shader_backend = # 0 (default): Off, 1: On use_asynchronous_shaders = -# Enable NVDEC emulation. -# 0: Off, 1 (default): On -use_nvdec_emulation = +# NVDEC emulation. +# 0: Disabled, 1: CPU Decoding, 2 (default): GPU Decoding +nvdec_emulation = # Accelerate ASTC texture decoding. # 0: Off, 1 (default): On