early-access version 1986

This commit is contained in:
pineappleEA 2021-08-10 13:30:16 +02:00
parent bdd9b5d7c2
commit e37f82ce96
29 changed files with 562 additions and 821 deletions

View file

@ -518,10 +518,6 @@ set(FFmpeg_COMPONENTS
avutil avutil
swscale) swscale)
if (${CMAKE_SYSTEM_NAME} STREQUAL "Linux")
Include(FindPkgConfig REQUIRED)
pkg_check_modules(LIBVA libva)
endif()
if (NOT YUZU_USE_BUNDLED_FFMPEG) if (NOT YUZU_USE_BUNDLED_FFMPEG)
# Use system installed FFmpeg # Use system installed FFmpeg
find_package(FFmpeg QUIET COMPONENTS ${FFmpeg_COMPONENTS}) find_package(FFmpeg QUIET COMPONENTS ${FFmpeg_COMPONENTS})
@ -544,9 +540,6 @@ endif()
if (YUZU_USE_BUNDLED_FFMPEG) if (YUZU_USE_BUNDLED_FFMPEG)
if (NOT WIN32) if (NOT WIN32)
# TODO(lat9nq): Move this to externals/ffmpeg/CMakeLists.txt (and move externals/ffmpeg to
# externals/ffmpeg/ffmpeg)
# Build FFmpeg from externals # Build FFmpeg from externals
message(STATUS "Using FFmpeg from externals") message(STATUS "Using FFmpeg from externals")
@ -586,23 +579,20 @@ if (YUZU_USE_BUNDLED_FFMPEG)
CACHE PATH "Paths to FFmpeg libraries" FORCE) CACHE PATH "Paths to FFmpeg libraries" FORCE)
endforeach() endforeach()
set(FFmpeg_INCLUDE_DIR
"${FFmpeg_PREFIX};${FFmpeg_BUILD_DIR}"
CACHE PATH "Path to FFmpeg headers" FORCE)
if (${CMAKE_SYSTEM_NAME} STREQUAL "Linux")
Include(FindPkgConfig REQUIRED) Include(FindPkgConfig REQUIRED)
pkg_check_modules(LIBVA libva) pkg_check_modules(LIBVA libva)
pkg_check_modules(CUDA cuda) endif()
pkg_check_modules(FFNVCODEC ffnvcodec)
pkg_check_modules(VDPAU vdpau)
set(FFmpeg_HWACCEL_LIBRARIES)
set(FFmpeg_HWACCEL_FLAGS)
set(FFmpeg_HWACCEL_INCLUDE_DIRS)
set(FFmpeg_HWACCEL_LDFLAGS)
if(LIBVA_FOUND) if(LIBVA_FOUND)
pkg_check_modules(LIBDRM libdrm REQUIRED) pkg_check_modules(LIBDRM libdrm REQUIRED)
find_package(X11 REQUIRED) find_package(X11 REQUIRED)
pkg_check_modules(LIBVA-DRM libva-drm REQUIRED) pkg_check_modules(LIBVA-DRM libva-drm REQUIRED)
pkg_check_modules(LIBVA-X11 libva-x11 REQUIRED) pkg_check_modules(LIBVA-X11 libva-x11 REQUIRED)
list(APPEND FFmpeg_HWACCEL_LIBRARIES set(FFmpeg_LIBVA_LIBRARIES
${LIBDRM_LIBRARIES} ${LIBDRM_LIBRARIES}
${X11_LIBRARIES} ${X11_LIBRARIES}
${LIBVA-DRM_LIBRARIES} ${LIBVA-DRM_LIBRARIES}
@ -612,57 +602,11 @@ if (YUZU_USE_BUNDLED_FFMPEG)
--enable-hwaccel=h264_vaapi --enable-hwaccel=h264_vaapi
--enable-hwaccel=vp9_vaapi --enable-hwaccel=vp9_vaapi
--enable-libdrm) --enable-libdrm)
list(APPEND FFmpeg_HWACCEL_INCLUDE_DIRS
${LIBDRM_INCLUDE_DIRS}
${X11_INCLUDE_DIRS}
${LIBVA-DRM_INCLUDE_DIRS}
${LIBVA-X11_INCLUDE_DIRS}
${LIBVA_INCLUDE_DIRS}
)
message(STATUS "VA-API found") message(STATUS "VA-API found")
else() else()
set(FFmpeg_HWACCEL_FLAGS --disable-vaapi) set(FFmpeg_HWACCEL_FLAGS --disable-vaapi)
endif() endif()
if (FFNVCODEC_FOUND AND CUDA_FOUND)
list(APPEND FFmpeg_HWACCEL_FLAGS
--enable-cuvid
--enable-ffnvcodec
--enable-nvdec
--enable-cuda-llvm
--enable-hwaccel=h264_nvdec
--enable-hwaccel=vp9_nvdec
--extra-cflags=-I${CUDA_INCLUDE_DIRS}
)
list(APPEND FFmpeg_HWACCEL_LIBRARIES
${FFNVCODEC_LIBRARIES}
${CUDA_LIBRARIES}
)
list(APPEND FFmpeg_HWACCEL_INCLUDE_DIRS
${FFNVCODEC_INCLUDE_DIRS}
${CUDA_INCLUDE_DIRS}
)
list(APPEND FFmpeg_HWACCEL_LDFLAGS
${FFNVCODEC_LDFLAGS}
${CUDA_LDFLAGS}
)
message(STATUS "ffnvcodec libraries version ${FFNVCODEC_VERSION} found")
endif()
if (VDPAU_FOUND)
list(APPEND FFmpeg_HWACCEL_FLAGS
--enable-vdpau
--enable-hwaccel=h264_vdpau
--enable-hwaccel=vp9_vdpau
)
list(APPEND FFmpeg_HWACCEL_LIBRARIES ${VDPAU_LIBRARIES})
list(APPEND FFmpeg_HWACCEL_INCLUDE_DIRS ${VDPAU_INCLUDE_DIRS})
list(APPEND FFmpeg_HWACCEL_LDFLAGS ${VDPAU_LDFLAGS})
message(STATUS "vdpau libraries version ${VDPAU_VERSION} found")
else()
list(APPEND FFmpeg_HWACCEL_FLAGS --disable-vdpau)
endif()
# `configure` parameters builds only exactly what yuzu needs from FFmpeg # `configure` parameters builds only exactly what yuzu needs from FFmpeg
# `--disable-vdpau` is needed to avoid linking issues # `--disable-vdpau` is needed to avoid linking issues
add_custom_command( add_custom_command(
@ -680,6 +624,7 @@ if (YUZU_USE_BUNDLED_FFMPEG)
--disable-network --disable-network
--disable-postproc --disable-postproc
--disable-swresample --disable-swresample
--disable-vdpau
--enable-decoder=h264 --enable-decoder=h264
--enable-decoder=vp9 --enable-decoder=vp9
--cc="${CMAKE_C_COMPILER}" --cc="${CMAKE_C_COMPILER}"
@ -708,26 +653,15 @@ if (YUZU_USE_BUNDLED_FFMPEG)
${FFmpeg_BUILD_DIR} ${FFmpeg_BUILD_DIR}
) )
set(FFmpeg_INCLUDE_DIR
"${FFmpeg_PREFIX};${FFmpeg_BUILD_DIR};${FFmpeg_HWACCEL_INCLUDE_DIRS}"
CACHE PATH "Path to FFmpeg headers" FORCE)
set(FFmpeg_LDFLAGS
"${FFmpeg_HWACCEL_LDFLAGS}"
CACHE STRING "FFmpeg linker flags" FORCE)
# ALL makes this custom target build every time # ALL makes this custom target build every time
# but it won't actually build if the DEPENDS parameter is up to date # but it won't actually build if the DEPENDS parameter is up to date
add_custom_target(ffmpeg-configure ALL DEPENDS ${FFmpeg_MAKEFILE}) add_custom_target(ffmpeg-configure ALL DEPENDS ${FFmpeg_MAKEFILE})
add_custom_target(ffmpeg-build ALL DEPENDS ${FFmpeg_BUILD_LIBRARIES} ffmpeg-configure) add_custom_target(ffmpeg-build ALL DEPENDS ${FFmpeg_BUILD_LIBRARIES} ffmpeg-configure)
link_libraries(${FFmpeg_LIBVA_LIBRARIES}) link_libraries(${FFmpeg_LIBVA_LIBRARIES})
set(FFmpeg_LIBRARIES ${FFmpeg_BUILD_LIBRARIES} ${FFmpeg_HWACCEL_LIBRARIES} set(FFmpeg_LIBRARIES ${FFmpeg_LIBVA_LIBRARIES} ${FFmpeg_BUILD_LIBRARIES}
CACHE PATH "Paths to FFmpeg libraries" FORCE) CACHE PATH "Paths to FFmpeg libraries" FORCE)
unset(FFmpeg_BUILD_LIBRARIES) unset(FFmpeg_BUILD_LIBRARIES)
unset(FFmpeg_HWACCEL_FLAGS) unset(FFmpeg_LIBVA_LIBRARIES)
unset(FFmpeg_HWACCEL_INCLUDE_DIRS)
unset(FFmpeg_HWACCEL_LDFLAGS)
unset(FFmpeg_HWACCEL_LIBRARIES)
if (FFmpeg_FOUND) if (FFmpeg_FOUND)
message(STATUS "Found FFmpeg version ${FFmpeg_VERSION}") message(STATUS "Found FFmpeg version ${FFmpeg_VERSION}")
@ -736,13 +670,12 @@ if (YUZU_USE_BUNDLED_FFMPEG)
endif() endif()
else() # WIN32 else() # WIN32
# Use yuzu FFmpeg binaries # Use yuzu FFmpeg binaries
set(FFmpeg_EXT_NAME "ffmpeg-4.4") set(FFmpeg_EXT_NAME "ffmpeg-4.3.1")
set(FFmpeg_PATH "${CMAKE_BINARY_DIR}/externals/${FFmpeg_EXT_NAME}") set(FFmpeg_PATH "${CMAKE_BINARY_DIR}/externals/${FFmpeg_EXT_NAME}")
download_bundled_external("ffmpeg/" ${FFmpeg_EXT_NAME} "") download_bundled_external("ffmpeg/" ${FFmpeg_EXT_NAME} "")
set(FFmpeg_FOUND YES) set(FFmpeg_FOUND YES)
set(FFmpeg_INCLUDE_DIR "${FFmpeg_PATH}/include" CACHE PATH "Path to FFmpeg headers" FORCE) set(FFmpeg_INCLUDE_DIR "${FFmpeg_PATH}/include" CACHE PATH "Path to FFmpeg headers" FORCE)
set(FFmpeg_LIBRARY_DIR "${FFmpeg_PATH}/bin" CACHE PATH "Path to FFmpeg library directory" FORCE) set(FFmpeg_LIBRARY_DIR "${FFmpeg_PATH}/bin" CACHE PATH "Path to FFmpeg library directory" FORCE)
set(FFmpeg_LDFLAGS "" CACHE STRING "FFmpeg linker flags" FORCE)
set(FFmpeg_DLL_DIR "${FFmpeg_PATH}/bin" CACHE PATH "Path to FFmpeg dll's" FORCE) set(FFmpeg_DLL_DIR "${FFmpeg_PATH}/bin" CACHE PATH "Path to FFmpeg dll's" FORCE)
set(FFmpeg_LIBRARIES set(FFmpeg_LIBRARIES
${FFmpeg_LIBRARY_DIR}/swscale.lib ${FFmpeg_LIBRARY_DIR}/swscale.lib

View file

@ -1,7 +1,7 @@
yuzu emulator early access yuzu emulator early access
============= =============
This is the source code for early-access 1984. This is the source code for early-access 1986.
## Legal Notice ## Legal Notice

View file

@ -54,7 +54,7 @@ void LogSettings() {
log_setting("Renderer_GPUAccuracyLevel", values.gpu_accuracy.GetValue()); log_setting("Renderer_GPUAccuracyLevel", values.gpu_accuracy.GetValue());
log_setting("Renderer_UseAsynchronousGpuEmulation", log_setting("Renderer_UseAsynchronousGpuEmulation",
values.use_asynchronous_gpu_emulation.GetValue()); values.use_asynchronous_gpu_emulation.GetValue());
log_setting("Renderer_NvdecEmulation", values.nvdec_emulation.GetValue()); log_setting("Renderer_UseNvdecEmulation", values.use_nvdec_emulation.GetValue());
log_setting("Renderer_AccelerateASTC", values.accelerate_astc.GetValue()); log_setting("Renderer_AccelerateASTC", values.accelerate_astc.GetValue());
log_setting("Renderer_UseVsync", values.use_vsync.GetValue()); log_setting("Renderer_UseVsync", values.use_vsync.GetValue());
log_setting("Renderer_ShaderBackend", values.shader_backend.GetValue()); log_setting("Renderer_ShaderBackend", values.shader_backend.GetValue());
@ -137,7 +137,7 @@ void RestoreGlobalState(bool is_powered_on) {
values.use_disk_shader_cache.SetGlobal(true); values.use_disk_shader_cache.SetGlobal(true);
values.gpu_accuracy.SetGlobal(true); values.gpu_accuracy.SetGlobal(true);
values.use_asynchronous_gpu_emulation.SetGlobal(true); values.use_asynchronous_gpu_emulation.SetGlobal(true);
values.nvdec_emulation.SetGlobal(true); values.use_nvdec_emulation.SetGlobal(true);
values.accelerate_astc.SetGlobal(true); values.accelerate_astc.SetGlobal(true);
values.use_vsync.SetGlobal(true); values.use_vsync.SetGlobal(true);
values.shader_backend.SetGlobal(true); values.shader_backend.SetGlobal(true);

View file

@ -47,12 +47,6 @@ enum class FullscreenMode : u32 {
Exclusive = 1, Exclusive = 1,
}; };
enum class NvdecEmulation : u32 {
Off = 0,
CPU = 1,
GPU = 2,
};
/** The BasicSetting class is a simple resource manager. It defines a label and default value /** The BasicSetting class is a simple resource manager. It defines a label and default value
* alongside the actual value of the setting for simpler and less-error prone use with frontend * alongside the actual value of the setting for simpler and less-error prone use with frontend
* configurations. Setting a default value and label is required, though subclasses may deviate from * configurations. Setting a default value and label is required, though subclasses may deviate from
@ -348,7 +342,7 @@ struct Values {
Setting<bool> use_disk_shader_cache{true, "use_disk_shader_cache"}; Setting<bool> use_disk_shader_cache{true, "use_disk_shader_cache"};
Setting<GPUAccuracy> gpu_accuracy{GPUAccuracy::High, "gpu_accuracy"}; Setting<GPUAccuracy> gpu_accuracy{GPUAccuracy::High, "gpu_accuracy"};
Setting<bool> use_asynchronous_gpu_emulation{true, "use_asynchronous_gpu_emulation"}; Setting<bool> use_asynchronous_gpu_emulation{true, "use_asynchronous_gpu_emulation"};
Setting<NvdecEmulation> nvdec_emulation{NvdecEmulation::GPU, "nvdec_emulation"}; Setting<bool> use_nvdec_emulation{true, "use_nvdec_emulation"};
Setting<bool> accelerate_astc{true, "accelerate_astc"}; Setting<bool> accelerate_astc{true, "accelerate_astc"};
Setting<bool> use_vsync{true, "use_vsync"}; Setting<bool> use_vsync{true, "use_vsync"};
BasicSetting<u16> fps_cap{1000, "fps_cap"}; BasicSetting<u16> fps_cap{1000, "fps_cap"};

View file

@ -72,18 +72,6 @@ static const char* TranslateGPUAccuracyLevel(Settings::GPUAccuracy backend) {
return "Unknown"; return "Unknown";
} }
static const char* TranslateNvdecEmulation(Settings::NvdecEmulation backend) {
switch (backend) {
case Settings::NvdecEmulation::Off:
return "Off";
case Settings::NvdecEmulation::CPU:
return "CPU";
case Settings::NvdecEmulation::GPU:
return "GPU";
}
return "Unknown";
}
u64 GetTelemetryId() { u64 GetTelemetryId() {
u64 telemetry_id{}; u64 telemetry_id{};
const auto filename = Common::FS::GetYuzuPath(Common::FS::YuzuPath::ConfigDir) / "telemetry_id"; const auto filename = Common::FS::GetYuzuPath(Common::FS::YuzuPath::ConfigDir) / "telemetry_id";
@ -241,8 +229,8 @@ void TelemetrySession::AddInitialInfo(Loader::AppLoader& app_loader,
TranslateGPUAccuracyLevel(Settings::values.gpu_accuracy.GetValue())); TranslateGPUAccuracyLevel(Settings::values.gpu_accuracy.GetValue()));
AddField(field_type, "Renderer_UseAsynchronousGpuEmulation", AddField(field_type, "Renderer_UseAsynchronousGpuEmulation",
Settings::values.use_asynchronous_gpu_emulation.GetValue()); Settings::values.use_asynchronous_gpu_emulation.GetValue());
AddField(field_type, "Renderer_NvdecEmulation", AddField(field_type, "Renderer_UseNvdecEmulation",
TranslateNvdecEmulation(Settings::values.nvdec_emulation.GetValue())); Settings::values.use_nvdec_emulation.GetValue());
AddField(field_type, "Renderer_AccelerateASTC", Settings::values.accelerate_astc.GetValue()); AddField(field_type, "Renderer_AccelerateASTC", Settings::values.accelerate_astc.GetValue());
AddField(field_type, "Renderer_UseVsync", Settings::values.use_vsync.GetValue()); AddField(field_type, "Renderer_UseVsync", Settings::values.use_vsync.GetValue());
AddField(field_type, "Renderer_ShaderBackend", AddField(field_type, "Renderer_ShaderBackend",

View file

@ -97,6 +97,7 @@ add_library(video_core STATIC
renderer_opengl/gl_stream_buffer.h renderer_opengl/gl_stream_buffer.h
renderer_opengl/gl_texture_cache.cpp renderer_opengl/gl_texture_cache.cpp
renderer_opengl/gl_texture_cache.h renderer_opengl/gl_texture_cache.h
renderer_opengl/gl_texture_cache_base.cpp
renderer_opengl/gl_query_cache.cpp renderer_opengl/gl_query_cache.cpp
renderer_opengl/gl_query_cache.h renderer_opengl/gl_query_cache.h
renderer_opengl/maxwell_to_gl.h renderer_opengl/maxwell_to_gl.h
@ -155,6 +156,7 @@ add_library(video_core STATIC
renderer_vulkan/vk_swapchain.h renderer_vulkan/vk_swapchain.h
renderer_vulkan/vk_texture_cache.cpp renderer_vulkan/vk_texture_cache.cpp
renderer_vulkan/vk_texture_cache.h renderer_vulkan/vk_texture_cache.h
renderer_vulkan/vk_texture_cache_base.cpp
renderer_vulkan/vk_update_descriptor.cpp renderer_vulkan/vk_update_descriptor.cpp
renderer_vulkan/vk_update_descriptor.h renderer_vulkan/vk_update_descriptor.h
shader_cache.cpp shader_cache.cpp
@ -186,6 +188,7 @@ add_library(video_core STATIC
texture_cache/samples_helper.h texture_cache/samples_helper.h
texture_cache/slot_vector.h texture_cache/slot_vector.h
texture_cache/texture_cache.h texture_cache/texture_cache.h
texture_cache/texture_cache_base.h
texture_cache/types.h texture_cache/types.h
texture_cache/util.cpp texture_cache/util.cpp
texture_cache/util.h texture_cache/util.h
@ -228,7 +231,6 @@ endif()
target_include_directories(video_core PRIVATE ${FFmpeg_INCLUDE_DIR}) target_include_directories(video_core PRIVATE ${FFmpeg_INCLUDE_DIR})
target_link_libraries(video_core PRIVATE ${FFmpeg_LIBRARIES}) target_link_libraries(video_core PRIVATE ${FFmpeg_LIBRARIES})
target_link_options(video_core PRIVATE ${FFmpeg_LDFLAGS})
add_dependencies(video_core host_shaders) add_dependencies(video_core host_shaders)
target_include_directories(video_core PRIVATE ${HOST_SHADERS_INCLUDE}) target_include_directories(video_core PRIVATE ${HOST_SHADERS_INCLUDE})

View file

@ -5,7 +5,6 @@
#include <fstream> #include <fstream>
#include <vector> #include <vector>
#include "common/assert.h" #include "common/assert.h"
#include "common/settings.h"
#include "video_core/command_classes/codecs/codec.h" #include "video_core/command_classes/codecs/codec.h"
#include "video_core/command_classes/codecs/h264.h" #include "video_core/command_classes/codecs/h264.h"
#include "video_core/command_classes/codecs/vp9.h" #include "video_core/command_classes/codecs/vp9.h"
@ -17,24 +16,44 @@ extern "C" {
} }
namespace Tegra { namespace Tegra {
#if defined(LIBVA_FOUND)
// Hardware acceleration code from FFmpeg/doc/examples/hw_decode.c originally under MIT license
namespace { namespace {
void AVPacketDeleter(AVPacket* ptr) { constexpr std::array<const char*, 2> VAAPI_DRIVERS = {
av_packet_free(&ptr); "i915",
} "amdgpu",
};
using AVPacketPtr = std::unique_ptr<AVPacket, decltype(&AVPacketDeleter)>; AVPixelFormat GetHwFormat(AVCodecContext*, const AVPixelFormat* pix_fmts) {
AVPixelFormat GetGpuFormat(AVCodecContext* av_codec_ctx, const AVPixelFormat* pix_fmts) {
for (const AVPixelFormat* p = pix_fmts; *p != AV_PIX_FMT_NONE; ++p) { for (const AVPixelFormat* p = pix_fmts; *p != AV_PIX_FMT_NONE; ++p) {
if (*p == av_codec_ctx->pix_fmt) { if (*p == AV_PIX_FMT_VAAPI) {
return av_codec_ctx->pix_fmt; return AV_PIX_FMT_VAAPI;
} }
} }
LOG_INFO(Service_NVDRV, "Could not find compatible GPU AV format, falling back to CPU"); LOG_INFO(Service_NVDRV, "Could not find compatible GPU AV format, falling back to CPU");
av_codec_ctx->pix_fmt = AV_PIX_FMT_NONE; return *pix_fmts;
return AV_PIX_FMT_NONE; }
bool CreateVaapiHwdevice(AVBufferRef** av_hw_device) {
AVDictionary* hwdevice_options = nullptr;
av_dict_set(&hwdevice_options, "connection_type", "drm", 0);
for (const auto& driver : VAAPI_DRIVERS) {
av_dict_set(&hwdevice_options, "kernel_driver", driver, 0);
const int hwdevice_error = av_hwdevice_ctx_create(av_hw_device, AV_HWDEVICE_TYPE_VAAPI,
nullptr, hwdevice_options, 0);
if (hwdevice_error >= 0) {
LOG_INFO(Service_NVDRV, "Using VA-API with {}", driver);
av_dict_free(&hwdevice_options);
return true;
}
LOG_DEBUG(Service_NVDRV, "VA-API av_hwdevice_ctx_create failed {}", hwdevice_error);
}
LOG_DEBUG(Service_NVDRV, "VA-API av_hwdevice_ctx_create failed for all drivers");
av_dict_free(&hwdevice_options);
return false;
} }
} // namespace } // namespace
#endif
void AVFrameDeleter(AVFrame* ptr) { void AVFrameDeleter(AVFrame* ptr) {
av_frame_free(&ptr); av_frame_free(&ptr);
@ -49,158 +68,56 @@ Codec::~Codec() {
return; return;
} }
// Free libav memory // Free libav memory
avcodec_free_context(&av_codec_ctx); avcodec_send_packet(av_codec_ctx, nullptr);
av_buffer_unref(&av_gpu_decoder); AVFrame* av_frame = av_frame_alloc();
avcodec_receive_frame(av_codec_ctx, av_frame);
avcodec_flush_buffers(av_codec_ctx);
av_frame_free(&av_frame);
avcodec_close(av_codec_ctx);
av_buffer_unref(&av_hw_device);
} }
bool Codec::CreateGpuAvDevice() { void Codec::InitializeHwdec() {
// Prioritize integrated GPU to mitigate bandwidth bottlenecks
#if defined(LIBVA_FOUND) #if defined(LIBVA_FOUND)
static constexpr std::array<const char*, 2> VAAPI_DRIVERS = { if (CreateVaapiHwdevice(&av_hw_device)) {
"i915", const auto hw_device_ctx = av_buffer_ref(av_hw_device);
"iHD",
};
AVDictionary* hwdevice_options = nullptr;
av_dict_set(&hwdevice_options, "connection_type", "drm", 0);
for (const auto& driver : VAAPI_DRIVERS) {
av_dict_set(&hwdevice_options, "kernel_driver", driver, 0);
const int hwdevice_error = av_hwdevice_ctx_create(&av_gpu_decoder, AV_HWDEVICE_TYPE_VAAPI,
nullptr, hwdevice_options, 0);
if (hwdevice_error >= 0) {
LOG_INFO(Service_NVDRV, "Using VA-API with {}", driver);
av_dict_free(&hwdevice_options);
av_codec_ctx->pix_fmt = AV_PIX_FMT_VAAPI;
return true;
}
LOG_DEBUG(Service_NVDRV, "VA-API av_hwdevice_ctx_create failed {}", hwdevice_error);
}
LOG_DEBUG(Service_NVDRV, "VA-API av_hwdevice_ctx_create failed for all drivers");
av_dict_free(&hwdevice_options);
#endif
static constexpr auto HW_CONFIG_METHOD = AV_CODEC_HW_CONFIG_METHOD_HW_DEVICE_CTX;
static constexpr std::array GPU_DECODER_TYPES{
AV_HWDEVICE_TYPE_CUDA,
AV_HWDEVICE_TYPE_D3D11VA,
AV_HWDEVICE_TYPE_VDPAU,
};
for (const auto& type : GPU_DECODER_TYPES) {
const int hwdevice_res = av_hwdevice_ctx_create(&av_gpu_decoder, type, nullptr, nullptr, 0);
if (hwdevice_res < 0) {
LOG_DEBUG(Service_NVDRV, "{} av_hwdevice_ctx_create failed {}",
av_hwdevice_get_type_name(type), hwdevice_res);
continue;
}
for (int i = 0;; i++) {
const AVCodecHWConfig* config = avcodec_get_hw_config(av_codec, i);
if (!config) {
LOG_DEBUG(Service_NVDRV, "{} decoder does not support device type {}.",
av_codec->name, av_hwdevice_get_type_name(type));
break;
}
if (config->methods & HW_CONFIG_METHOD && config->device_type == type) {
av_codec_ctx->pix_fmt = config->pix_fmt;
LOG_INFO(Service_NVDRV, "Using {} GPU decoder", av_hwdevice_get_type_name(type));
return true;
}
}
}
return false;
}
void Codec::InitializeAvCodecContext() {
av_codec_ctx = avcodec_alloc_context3(av_codec);
av_opt_set(av_codec_ctx->priv_data, "tune", "zerolatency", 0);
}
void Codec::InitializeGpuDecoder() {
if (!CreateGpuAvDevice()) {
av_buffer_unref(&av_gpu_decoder);
return;
}
auto* hw_device_ctx = av_buffer_ref(av_gpu_decoder);
ASSERT_MSG(hw_device_ctx, "av_buffer_ref failed"); ASSERT_MSG(hw_device_ctx, "av_buffer_ref failed");
av_codec_ctx->hw_device_ctx = hw_device_ctx; av_codec_ctx->hw_device_ctx = hw_device_ctx;
av_codec_ctx->get_format = GetGpuFormat; av_codec_ctx->get_format = GetHwFormat;
}
void Codec::TestGpuDecoder() {
static constexpr std::array<u8, 48> vp9_test{
0x92, 0x49, 0x83, 0x42, 0x00, 0x09, 0xf8, 0x05, 0x9b, 0x09, 0x1c, 0x12,
0x0e, 0x0c, 0x32, 0x00, 0x02, 0x08, 0x7f, 0xcd, 0xec, 0x3f, 0x3b, 0x77,
0x81, 0xf6, 0x47, 0xe2, 0xf0, 0x7e, 0x8b, 0x41, 0xfe, 0x3f, 0x87, 0x89,
0x7d, 0x37, 0xa2, 0xd1, 0x3e, 0x9c, 0x59, 0x5f, 0x75, 0xee, 0xbb, 0x97,
};
static constexpr std::array<u8, 48> h264_test{
0x00, 0x00, 0x00, 0x01, 0x67, 0x64, 0x00, 0x0D, 0xAC, 0x34, 0xE5, 0x05,
0x06, 0x7E, 0x78, 0x40, 0x00, 0x00, 0x19, 0x00, 0x00, 0x05, 0xDA, 0xA3,
0xC5, 0x0A, 0x45, 0x80, 0x00, 0x00, 0x00, 0x01, 0x68, 0xEE, 0xB2, 0xC8,
0xB0, 0x00, 0x00, 0x01, 0x65, 0x88, 0x80, 0x20, 0x01, 0xFF, 0xF3, 0x7E,
};
auto test_data = [&] {
switch (current_codec) {
case NvdecCommon::VideoCodec::H264:
return h264_test;
case NvdecCommon::VideoCodec::Vp9:
return vp9_test;
default:
UNIMPLEMENTED_MSG("Unknown codec {}", current_codec);
return std::array<u8, 48>{};
}
}();
AVPacketPtr packet{av_packet_alloc(), AVPacketDeleter};
if (!packet) {
LOG_ERROR(Service_NVDRV, "av_packet_alloc failed");
return; return;
} }
// Temporarily disable logging, the test frames are incomplete and will log errors that can be #endif
// ignored for our purposes. // TODO more GPU accelerated decoders
av_log_set_level(AV_LOG_QUIET);
packet->data = test_data.data();
packet->size = static_cast<s32>(test_data.size());
avcodec_send_packet(av_codec_ctx, packet.get());
av_log_set_level(AV_LOG_WARNING);
// GetGpuFormat is invoked after the avcodec_send_packet call.
// Fallback to CPU decoding if no compatible GPU format was found.
if (av_codec_ctx->pix_fmt != AV_PIX_FMT_NONE) {
avcodec_flush_buffers(av_codec_ctx);
using_gpu_decode = true;
return;
}
avcodec_close(av_codec_ctx);
av_buffer_unref(&av_gpu_decoder);
InitializeAvCodecContext();
avcodec_open2(av_codec_ctx, av_codec, nullptr);
} }
void Codec::Initialize() { void Codec::Initialize() {
const AVCodecID codec = [&] { AVCodecID codec;
switch (current_codec) { switch (current_codec) {
case NvdecCommon::VideoCodec::H264: case NvdecCommon::VideoCodec::H264:
return AV_CODEC_ID_H264; codec = AV_CODEC_ID_H264;
break;
case NvdecCommon::VideoCodec::Vp9: case NvdecCommon::VideoCodec::Vp9:
return AV_CODEC_ID_VP9; codec = AV_CODEC_ID_VP9;
break;
default: default:
UNIMPLEMENTED_MSG("Unknown codec {}", current_codec); UNIMPLEMENTED_MSG("Unknown codec {}", current_codec);
return AV_CODEC_ID_NONE;
}
}();
av_codec = avcodec_find_decoder(codec);
InitializeAvCodecContext();
if (Settings::values.nvdec_emulation.GetValue() == Settings::NvdecEmulation::GPU) {
InitializeGpuDecoder();
}
if (const int res = avcodec_open2(av_codec_ctx, av_codec, nullptr); res < 0) {
LOG_ERROR(Service_NVDRV, "avcodec_open2() Failed with result {}", res);
avcodec_free_context(&av_codec_ctx);
av_buffer_unref(&av_gpu_decoder);
return; return;
} }
if (av_codec_ctx->hw_device_ctx) { av_codec = avcodec_find_decoder(codec);
TestGpuDecoder(); av_codec_ctx = avcodec_alloc_context3(av_codec);
} else { av_opt_set(av_codec_ctx->priv_data, "tune", "zerolatency", 0);
InitializeHwdec();
if (!av_codec_ctx->hw_device_ctx) {
LOG_INFO(Service_NVDRV, "Using FFmpeg software decoding"); LOG_INFO(Service_NVDRV, "Using FFmpeg software decoding");
} }
const auto av_error = avcodec_open2(av_codec_ctx, av_codec, nullptr);
if (av_error < 0) {
LOG_ERROR(Service_NVDRV, "avcodec_open2() Failed.");
avcodec_close(av_codec_ctx);
av_buffer_unref(&av_hw_device);
return;
}
initialized = true; initialized = true;
} }
@ -216,9 +133,6 @@ void Codec::Decode() {
if (is_first_frame) { if (is_first_frame) {
Initialize(); Initialize();
} }
if (!initialized) {
return;
}
bool vp9_hidden_frame = false; bool vp9_hidden_frame = false;
std::vector<u8> frame_data; std::vector<u8> frame_data;
if (current_codec == NvdecCommon::VideoCodec::H264) { if (current_codec == NvdecCommon::VideoCodec::H264) {
@ -227,48 +141,50 @@ void Codec::Decode() {
frame_data = vp9_decoder->ComposeFrameHeader(state); frame_data = vp9_decoder->ComposeFrameHeader(state);
vp9_hidden_frame = vp9_decoder->WasFrameHidden(); vp9_hidden_frame = vp9_decoder->WasFrameHidden();
} }
AVPacketPtr packet{av_packet_alloc(), AVPacketDeleter}; AVPacket packet{};
if (!packet) { av_init_packet(&packet);
LOG_ERROR(Service_NVDRV, "av_packet_alloc failed"); packet.data = frame_data.data();
return; packet.size = static_cast<s32>(frame_data.size());
} if (const int ret = avcodec_send_packet(av_codec_ctx, &packet); ret) {
packet->data = frame_data.data(); LOG_DEBUG(Service_NVDRV, "avcodec_send_packet error {}", ret);
packet->size = static_cast<s32>(frame_data.size());
if (const int res = avcodec_send_packet(av_codec_ctx, packet.get()); res != 0) {
LOG_DEBUG(Service_NVDRV, "avcodec_send_packet error {}", res);
return; return;
} }
// Only receive/store visible frames // Only receive/store visible frames
if (vp9_hidden_frame) { if (vp9_hidden_frame) {
return; return;
} }
AVFramePtr initial_frame{av_frame_alloc(), AVFrameDeleter}; AVFrame* hw_frame = av_frame_alloc();
AVFramePtr final_frame{nullptr, AVFrameDeleter}; AVFrame* sw_frame = hw_frame;
ASSERT_MSG(initial_frame, "av_frame_alloc initial_frame failed"); ASSERT_MSG(hw_frame, "av_frame_alloc hw_frame failed");
if (const int ret = avcodec_receive_frame(av_codec_ctx, initial_frame.get()); ret) { if (const int ret = avcodec_receive_frame(av_codec_ctx, hw_frame); ret) {
LOG_DEBUG(Service_NVDRV, "avcodec_receive_frame error {}", ret); LOG_DEBUG(Service_NVDRV, "avcodec_receive_frame error {}", ret);
av_frame_free(&hw_frame);
return; return;
} }
if (initial_frame->width == 0 || initial_frame->height == 0) { if (!hw_frame->width || !hw_frame->height) {
LOG_WARNING(Service_NVDRV, "Zero width or height in frame"); LOG_WARNING(Service_NVDRV, "Zero width or height in frame");
av_frame_free(&hw_frame);
return; return;
} }
if (using_gpu_decode) { #if defined(LIBVA_FOUND)
final_frame = AVFramePtr{av_frame_alloc(), AVFrameDeleter}; // Hardware acceleration code from FFmpeg/doc/examples/hw_decode.c under MIT license
ASSERT_MSG(final_frame, "av_frame_alloc final_frame failed"); if (hw_frame->format == AV_PIX_FMT_VAAPI) {
sw_frame = av_frame_alloc();
ASSERT_MSG(sw_frame, "av_frame_alloc sw_frame failed");
// Can't use AV_PIX_FMT_YUV420P and share code with software decoding in vic.cpp // Can't use AV_PIX_FMT_YUV420P and share code with software decoding in vic.cpp
// because Intel drivers crash unless using AV_PIX_FMT_NV12 // because Intel drivers crash unless using AV_PIX_FMT_NV12
final_frame->format = AV_PIX_FMT_NV12; sw_frame->format = AV_PIX_FMT_NV12;
const int ret = av_hwframe_transfer_data(final_frame.get(), initial_frame.get(), 0); const int transfer_data_ret = av_hwframe_transfer_data(sw_frame, hw_frame, 0);
ASSERT_MSG(!ret, "av_hwframe_transfer_data error {}", ret); ASSERT_MSG(!transfer_data_ret, "av_hwframe_transfer_data error {}", transfer_data_ret);
} else { av_frame_free(&hw_frame);
final_frame = std::move(initial_frame);
} }
if (final_frame->format != AV_PIX_FMT_YUV420P && final_frame->format != AV_PIX_FMT_NV12) { #endif
UNIMPLEMENTED_MSG("Unexpected video format: {}", final_frame->format); if (sw_frame->format != AV_PIX_FMT_YUV420P && sw_frame->format != AV_PIX_FMT_NV12) {
UNIMPLEMENTED_MSG("Unexpected video format from host graphics: {}", sw_frame->format);
av_frame_free(&sw_frame);
return; return;
} }
av_frames.push(std::move(final_frame)); av_frames.push(AVFramePtr{sw_frame, AVFrameDeleter});
if (av_frames.size() > 10) { if (av_frames.size() > 10) {
LOG_TRACE(Service_NVDRV, "av_frames.push overflow dropped frame"); LOG_TRACE(Service_NVDRV, "av_frames.push overflow dropped frame");
av_frames.pop(); av_frames.pop();

View file

@ -50,26 +50,18 @@ public:
/// Returns the value of current_codec /// Returns the value of current_codec
[[nodiscard]] NvdecCommon::VideoCodec GetCurrentCodec() const; [[nodiscard]] NvdecCommon::VideoCodec GetCurrentCodec() const;
/// Return name of the current codec /// Return name of the current codec
[[nodiscard]] std::string_view GetCurrentCodecName() const; [[nodiscard]] std::string_view GetCurrentCodecName() const;
private: private:
void InitializeAvCodecContext(); void InitializeHwdec();
void InitializeGpuDecoder();
void TestGpuDecoder();
bool CreateGpuAvDevice();
bool initialized{}; bool initialized{};
bool using_gpu_decode{};
NvdecCommon::VideoCodec current_codec{NvdecCommon::VideoCodec::None}; NvdecCommon::VideoCodec current_codec{NvdecCommon::VideoCodec::None};
AVCodec* av_codec{nullptr}; AVCodec* av_codec{nullptr};
AVBufferRef* av_hw_device{nullptr};
AVCodecContext* av_codec_ctx{nullptr}; AVCodecContext* av_codec_ctx{nullptr};
AVBufferRef* av_gpu_decoder{nullptr};
GPU& gpu; GPU& gpu;
const NvdecCommon::NvdecRegisters& state; const NvdecCommon::NvdecRegisters& state;

View file

@ -15,7 +15,7 @@
#include "video_core/renderer_opengl/gl_shader_util.h" #include "video_core/renderer_opengl/gl_shader_util.h"
#include "video_core/renderer_opengl/gl_state_tracker.h" #include "video_core/renderer_opengl/gl_state_tracker.h"
#include "video_core/shader_notify.h" #include "video_core/shader_notify.h"
#include "video_core/texture_cache/texture_cache.h" #include "video_core/texture_cache/texture_cache_base.h"
#if defined(_MSC_VER) && defined(NDEBUG) #if defined(_MSC_VER) && defined(NDEBUG)
#define LAMBDA_FORCEINLINE [[msvc::forceinline]] #define LAMBDA_FORCEINLINE [[msvc::forceinline]]

View file

@ -32,7 +32,7 @@
#include "video_core/renderer_opengl/maxwell_to_gl.h" #include "video_core/renderer_opengl/maxwell_to_gl.h"
#include "video_core/renderer_opengl/renderer_opengl.h" #include "video_core/renderer_opengl/renderer_opengl.h"
#include "video_core/shader_cache.h" #include "video_core/shader_cache.h"
#include "video_core/texture_cache/texture_cache.h" #include "video_core/texture_cache/texture_cache_base.h"
namespace OpenGL { namespace OpenGL {

View file

@ -18,10 +18,8 @@
#include "video_core/renderer_opengl/maxwell_to_gl.h" #include "video_core/renderer_opengl/maxwell_to_gl.h"
#include "video_core/renderer_opengl/util_shaders.h" #include "video_core/renderer_opengl/util_shaders.h"
#include "video_core/surface.h" #include "video_core/surface.h"
#include "video_core/texture_cache/format_lookup_table.h" #include "video_core/texture_cache/formatter.h"
#include "video_core/texture_cache/samples_helper.h" #include "video_core/texture_cache/samples_helper.h"
#include "video_core/texture_cache/texture_cache.h"
#include "video_core/textures/decoders.h"
namespace OpenGL { namespace OpenGL {
namespace { namespace {

View file

@ -12,7 +12,7 @@
#include "shader_recompiler/shader_info.h" #include "shader_recompiler/shader_info.h"
#include "video_core/renderer_opengl/gl_resource_manager.h" #include "video_core/renderer_opengl/gl_resource_manager.h"
#include "video_core/renderer_opengl/util_shaders.h" #include "video_core/renderer_opengl/util_shaders.h"
#include "video_core/texture_cache/texture_cache.h" #include "video_core/texture_cache/texture_cache_base.h"
namespace OpenGL { namespace OpenGL {

View file

@ -0,0 +1,10 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "video_core/renderer_opengl/gl_texture_cache.h"
#include "video_core/texture_cache/texture_cache.h"
namespace VideoCommon {
template class VideoCommon::TextureCache<OpenGL::TextureCacheParams>;
}

View file

@ -32,7 +32,7 @@
#include "video_core/renderer_vulkan/vk_texture_cache.h" #include "video_core/renderer_vulkan/vk_texture_cache.h"
#include "video_core/renderer_vulkan/vk_update_descriptor.h" #include "video_core/renderer_vulkan/vk_update_descriptor.h"
#include "video_core/shader_cache.h" #include "video_core/shader_cache.h"
#include "video_core/texture_cache/texture_cache.h" #include "video_core/texture_cache/texture_cache_base.h"
#include "video_core/vulkan_common/vulkan_device.h" #include "video_core/vulkan_common/vulkan_device.h"
#include "video_core/vulkan_common/vulkan_wrapper.h" #include "video_core/vulkan_common/vulkan_wrapper.h"

View file

@ -19,6 +19,8 @@
#include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/renderer_vulkan/vk_scheduler.h"
#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h"
#include "video_core/renderer_vulkan/vk_texture_cache.h" #include "video_core/renderer_vulkan/vk_texture_cache.h"
#include "video_core/texture_cache/formatter.h"
#include "video_core/texture_cache/samples_helper.h"
#include "video_core/vulkan_common/vulkan_device.h" #include "video_core/vulkan_common/vulkan_device.h"
#include "video_core/vulkan_common/vulkan_memory_allocator.h" #include "video_core/vulkan_common/vulkan_memory_allocator.h"
#include "video_core/vulkan_common/vulkan_wrapper.h" #include "video_core/vulkan_common/vulkan_wrapper.h"

View file

@ -9,7 +9,7 @@
#include "shader_recompiler/shader_info.h" #include "shader_recompiler/shader_info.h"
#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h"
#include "video_core/texture_cache/texture_cache.h" #include "video_core/texture_cache/texture_cache_base.h"
#include "video_core/vulkan_common/vulkan_memory_allocator.h" #include "video_core/vulkan_common/vulkan_memory_allocator.h"
#include "video_core/vulkan_common/vulkan_wrapper.h" #include "video_core/vulkan_common/vulkan_wrapper.h"

View file

@ -0,0 +1,10 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "video_core/renderer_vulkan/vk_texture_cache.h"
#include "video_core/texture_cache/texture_cache.h"
namespace VideoCommon {
template class VideoCommon::TextureCache<Vulkan::TextureCacheParams>;
}

View file

@ -6,7 +6,7 @@
#include "common/assert.h" #include "common/assert.h"
#include "video_core/texture_cache/image_view_info.h" #include "video_core/texture_cache/image_view_info.h"
#include "video_core/texture_cache/texture_cache.h" #include "video_core/texture_cache/texture_cache_base.h"
#include "video_core/texture_cache/types.h" #include "video_core/texture_cache/types.h"
#include "video_core/textures/texture.h" #include "video_core/textures/texture.h"
@ -14,6 +14,8 @@ namespace VideoCommon {
namespace { namespace {
using Tegra::Texture::TextureType;
constexpr u8 RENDER_TARGET_SWIZZLE = std::numeric_limits<u8>::max(); constexpr u8 RENDER_TARGET_SWIZZLE = std::numeric_limits<u8>::max();
[[nodiscard]] u8 CastSwizzle(SwizzleSource source) { [[nodiscard]] u8 CastSwizzle(SwizzleSource source) {

View file

@ -4,48 +4,11 @@
#pragma once #pragma once
#include <algorithm>
#include <array>
#include <bit>
#include <memory>
#include <mutex>
#include <optional>
#include <span>
#include <type_traits>
#include <unordered_map>
#include <unordered_set>
#include <utility>
#include <vector>
#include <boost/container/small_vector.hpp>
#include "common/alignment.h" #include "common/alignment.h"
#include "common/common_types.h"
#include "common/literals.h"
#include "common/logging/log.h"
#include "common/settings.h" #include "common/settings.h"
#include "video_core/compatible_formats.h"
#include "video_core/delayed_destruction_ring.h"
#include "video_core/dirty_flags.h" #include "video_core/dirty_flags.h"
#include "video_core/engines/fermi_2d.h"
#include "video_core/engines/kepler_compute.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/memory_manager.h"
#include "video_core/rasterizer_interface.h"
#include "video_core/surface.h"
#include "video_core/texture_cache/descriptor_table.h"
#include "video_core/texture_cache/format_lookup_table.h"
#include "video_core/texture_cache/formatter.h"
#include "video_core/texture_cache/image_base.h"
#include "video_core/texture_cache/image_info.h"
#include "video_core/texture_cache/image_view_base.h"
#include "video_core/texture_cache/image_view_info.h"
#include "video_core/texture_cache/render_targets.h"
#include "video_core/texture_cache/samples_helper.h" #include "video_core/texture_cache/samples_helper.h"
#include "video_core/texture_cache/slot_vector.h" #include "video_core/texture_cache/texture_cache_base.h"
#include "video_core/texture_cache/types.h"
#include "video_core/texture_cache/util.h"
#include "video_core/textures/texture.h"
namespace VideoCommon { namespace VideoCommon {
@ -61,352 +24,6 @@ using VideoCore::Surface::PixelFormatFromRenderTargetFormat;
using VideoCore::Surface::SurfaceType; using VideoCore::Surface::SurfaceType;
using namespace Common::Literals; using namespace Common::Literals;
template <class P>
class TextureCache {
/// Address shift for caching images into a hash table
static constexpr u64 PAGE_BITS = 20;
/// Enables debugging features to the texture cache
static constexpr bool ENABLE_VALIDATION = P::ENABLE_VALIDATION;
/// Implement blits as copies between framebuffers
static constexpr bool FRAMEBUFFER_BLITS = P::FRAMEBUFFER_BLITS;
/// True when some copies have to be emulated
static constexpr bool HAS_EMULATED_COPIES = P::HAS_EMULATED_COPIES;
/// True when the API can provide info about the memory of the device.
static constexpr bool HAS_DEVICE_MEMORY_INFO = P::HAS_DEVICE_MEMORY_INFO;
/// Image view ID for null descriptors
static constexpr ImageViewId NULL_IMAGE_VIEW_ID{0};
/// Sampler ID for bugged sampler ids
static constexpr SamplerId NULL_SAMPLER_ID{0};
static constexpr u64 DEFAULT_EXPECTED_MEMORY = 1_GiB;
static constexpr u64 DEFAULT_CRITICAL_MEMORY = 2_GiB;
using Runtime = typename P::Runtime;
using Image = typename P::Image;
using ImageAlloc = typename P::ImageAlloc;
using ImageView = typename P::ImageView;
using Sampler = typename P::Sampler;
using Framebuffer = typename P::Framebuffer;
struct BlitImages {
ImageId dst_id;
ImageId src_id;
PixelFormat dst_format;
PixelFormat src_format;
};
template <typename T>
struct IdentityHash {
[[nodiscard]] size_t operator()(T value) const noexcept {
return static_cast<size_t>(value);
}
};
public:
explicit TextureCache(Runtime&, VideoCore::RasterizerInterface&, Tegra::Engines::Maxwell3D&,
Tegra::Engines::KeplerCompute&, Tegra::MemoryManager&);
/// Notify the cache that a new frame has been queued
void TickFrame();
/// Return a constant reference to the given image view id
[[nodiscard]] const ImageView& GetImageView(ImageViewId id) const noexcept;
/// Return a reference to the given image view id
[[nodiscard]] ImageView& GetImageView(ImageViewId id) noexcept;
/// Mark an image as modified from the GPU
void MarkModification(ImageId id) noexcept;
/// Fill image_view_ids with the graphics images in indices
void FillGraphicsImageViews(std::span<const u32> indices,
std::span<ImageViewId> image_view_ids);
/// Fill image_view_ids with the compute images in indices
void FillComputeImageViews(std::span<const u32> indices, std::span<ImageViewId> image_view_ids);
/// Get the sampler from the graphics descriptor table in the specified index
Sampler* GetGraphicsSampler(u32 index);
/// Get the sampler from the compute descriptor table in the specified index
Sampler* GetComputeSampler(u32 index);
/// Refresh the state for graphics image view and sampler descriptors
void SynchronizeGraphicsDescriptors();
/// Refresh the state for compute image view and sampler descriptors
void SynchronizeComputeDescriptors();
/// Update bound render targets and upload memory if necessary
/// @param is_clear True when the render targets are being used for clears
void UpdateRenderTargets(bool is_clear);
/// Find a framebuffer with the currently bound render targets
/// UpdateRenderTargets should be called before this
Framebuffer* GetFramebuffer();
/// Mark images in a range as modified from the CPU
void WriteMemory(VAddr cpu_addr, size_t size);
/// Download contents of host images to guest memory in a region
void DownloadMemory(VAddr cpu_addr, size_t size);
/// Remove images in a region
void UnmapMemory(VAddr cpu_addr, size_t size);
/// Remove images in a region
void UnmapGPUMemory(GPUVAddr gpu_addr, size_t size);
/// Blit an image with the given parameters
void BlitImage(const Tegra::Engines::Fermi2D::Surface& dst,
const Tegra::Engines::Fermi2D::Surface& src,
const Tegra::Engines::Fermi2D::Config& copy);
/// Invalidate the contents of the color buffer index
/// These contents become unspecified, the cache can assume aggressive optimizations.
void InvalidateColorBuffer(size_t index);
/// Invalidate the contents of the depth buffer
/// These contents become unspecified, the cache can assume aggressive optimizations.
void InvalidateDepthBuffer();
/// Try to find a cached image view in the given CPU address
[[nodiscard]] ImageView* TryFindFramebufferImageView(VAddr cpu_addr);
/// Return true when there are uncommitted images to be downloaded
[[nodiscard]] bool HasUncommittedFlushes() const noexcept;
/// Return true when the caller should wait for async downloads
[[nodiscard]] bool ShouldWaitAsyncFlushes() const noexcept;
/// Commit asynchronous downloads
void CommitAsyncFlushes();
/// Pop asynchronous downloads
void PopAsyncFlushes();
/// Return true when a CPU region is modified from the GPU
[[nodiscard]] bool IsRegionGpuModified(VAddr addr, size_t size);
std::mutex mutex;
private:
/// Iterate over all page indices in a range
template <typename Func>
static void ForEachCPUPage(VAddr addr, size_t size, Func&& func) {
static constexpr bool RETURNS_BOOL = std::is_same_v<std::invoke_result<Func, u64>, bool>;
const u64 page_end = (addr + size - 1) >> PAGE_BITS;
for (u64 page = addr >> PAGE_BITS; page <= page_end; ++page) {
if constexpr (RETURNS_BOOL) {
if (func(page)) {
break;
}
} else {
func(page);
}
}
}
template <typename Func>
static void ForEachGPUPage(GPUVAddr addr, size_t size, Func&& func) {
static constexpr bool RETURNS_BOOL = std::is_same_v<std::invoke_result<Func, u64>, bool>;
const u64 page_end = (addr + size - 1) >> PAGE_BITS;
for (u64 page = addr >> PAGE_BITS; page <= page_end; ++page) {
if constexpr (RETURNS_BOOL) {
if (func(page)) {
break;
}
} else {
func(page);
}
}
}
/// Runs the Garbage Collector.
void RunGarbageCollector();
/// Fills image_view_ids in the image views in indices
void FillImageViews(DescriptorTable<TICEntry>& table,
std::span<ImageViewId> cached_image_view_ids, std::span<const u32> indices,
std::span<ImageViewId> image_view_ids);
/// Find or create an image view in the guest descriptor table
ImageViewId VisitImageView(DescriptorTable<TICEntry>& table,
std::span<ImageViewId> cached_image_view_ids, u32 index);
/// Find or create a framebuffer with the given render target parameters
FramebufferId GetFramebufferId(const RenderTargets& key);
/// Refresh the contents (pixel data) of an image
void RefreshContents(Image& image, ImageId image_id);
/// Upload data from guest to an image
template <typename StagingBuffer>
void UploadImageContents(Image& image, StagingBuffer& staging_buffer);
/// Find or create an image view from a guest descriptor
[[nodiscard]] ImageViewId FindImageView(const TICEntry& config);
/// Create a new image view from a guest descriptor
[[nodiscard]] ImageViewId CreateImageView(const TICEntry& config);
/// Find or create an image from the given parameters
[[nodiscard]] ImageId FindOrInsertImage(const ImageInfo& info, GPUVAddr gpu_addr,
RelaxedOptions options = RelaxedOptions{});
/// Find an image from the given parameters
[[nodiscard]] ImageId FindImage(const ImageInfo& info, GPUVAddr gpu_addr,
RelaxedOptions options);
/// Create an image from the given parameters
[[nodiscard]] ImageId InsertImage(const ImageInfo& info, GPUVAddr gpu_addr,
RelaxedOptions options);
/// Create a new image and join perfectly matching existing images
/// Remove joined images from the cache
[[nodiscard]] ImageId JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VAddr cpu_addr);
/// Return a blit image pair from the given guest blit parameters
[[nodiscard]] BlitImages GetBlitImages(const Tegra::Engines::Fermi2D::Surface& dst,
const Tegra::Engines::Fermi2D::Surface& src);
/// Find or create a sampler from a guest descriptor sampler
[[nodiscard]] SamplerId FindSampler(const TSCEntry& config);
/// Find or create an image view for the given color buffer index
[[nodiscard]] ImageViewId FindColorBuffer(size_t index, bool is_clear);
/// Find or create an image view for the depth buffer
[[nodiscard]] ImageViewId FindDepthBuffer(bool is_clear);
/// Find or create a view for a render target with the given image parameters
[[nodiscard]] ImageViewId FindRenderTargetView(const ImageInfo& info, GPUVAddr gpu_addr,
bool is_clear);
/// Iterates over all the images in a region calling func
template <typename Func>
void ForEachImageInRegion(VAddr cpu_addr, size_t size, Func&& func);
template <typename Func>
void ForEachImageInRegionGPU(GPUVAddr gpu_addr, size_t size, Func&& func);
template <typename Func>
void ForEachSparseImageInRegion(GPUVAddr gpu_addr, size_t size, Func&& func);
/// Iterates over all the images in a region calling func
template <typename Func>
void ForEachSparseSegment(ImageBase& image, Func&& func);
/// Find or create an image view in the given image with the passed parameters
[[nodiscard]] ImageViewId FindOrEmplaceImageView(ImageId image_id, const ImageViewInfo& info);
/// Register image in the page table
void RegisterImage(ImageId image);
/// Unregister image from the page table
void UnregisterImage(ImageId image);
/// Track CPU reads and writes for image
void TrackImage(ImageBase& image, ImageId image_id);
/// Stop tracking CPU reads and writes for image
void UntrackImage(ImageBase& image, ImageId image_id);
/// Delete image from the cache
void DeleteImage(ImageId image);
/// Remove image views references from the cache
void RemoveImageViewReferences(std::span<const ImageViewId> removed_views);
/// Remove framebuffers using the given image views from the cache
void RemoveFramebuffers(std::span<const ImageViewId> removed_views);
/// Mark an image as modified from the GPU
void MarkModification(ImageBase& image) noexcept;
/// Synchronize image aliases, copying data if needed
void SynchronizeAliases(ImageId image_id);
/// Prepare an image to be used
void PrepareImage(ImageId image_id, bool is_modification, bool invalidate);
/// Prepare an image view to be used
void PrepareImageView(ImageViewId image_view_id, bool is_modification, bool invalidate);
/// Execute copies from one image to the other, even if they are incompatible
void CopyImage(ImageId dst_id, ImageId src_id, std::span<const ImageCopy> copies);
/// Bind an image view as render target, downloading resources preemtively if needed
void BindRenderTarget(ImageViewId* old_id, ImageViewId new_id);
/// Create a render target from a given image and image view parameters
[[nodiscard]] std::pair<FramebufferId, ImageViewId> RenderTargetFromImage(
ImageId, const ImageViewInfo& view_info);
/// Returns true if the current clear parameters clear the whole image of a given image view
[[nodiscard]] bool IsFullClear(ImageViewId id);
Runtime& runtime;
VideoCore::RasterizerInterface& rasterizer;
Tegra::Engines::Maxwell3D& maxwell3d;
Tegra::Engines::KeplerCompute& kepler_compute;
Tegra::MemoryManager& gpu_memory;
DescriptorTable<TICEntry> graphics_image_table{gpu_memory};
DescriptorTable<TSCEntry> graphics_sampler_table{gpu_memory};
std::vector<SamplerId> graphics_sampler_ids;
std::vector<ImageViewId> graphics_image_view_ids;
DescriptorTable<TICEntry> compute_image_table{gpu_memory};
DescriptorTable<TSCEntry> compute_sampler_table{gpu_memory};
std::vector<SamplerId> compute_sampler_ids;
std::vector<ImageViewId> compute_image_view_ids;
RenderTargets render_targets;
std::unordered_map<TICEntry, ImageViewId> image_views;
std::unordered_map<TSCEntry, SamplerId> samplers;
std::unordered_map<RenderTargets, FramebufferId> framebuffers;
std::unordered_map<u64, std::vector<ImageMapId>, IdentityHash<u64>> page_table;
std::unordered_map<u64, std::vector<ImageId>, IdentityHash<u64>> gpu_page_table;
std::unordered_map<u64, std::vector<ImageId>, IdentityHash<u64>> sparse_page_table;
std::unordered_map<ImageId, std::vector<ImageViewId>> sparse_views;
VAddr virtual_invalid_space{};
bool has_deleted_images = false;
u64 total_used_memory = 0;
u64 minimum_memory;
u64 expected_memory;
u64 critical_memory;
SlotVector<Image> slot_images;
SlotVector<ImageMapView> slot_map_views;
SlotVector<ImageView> slot_image_views;
SlotVector<ImageAlloc> slot_image_allocs;
SlotVector<Sampler> slot_samplers;
SlotVector<Framebuffer> slot_framebuffers;
// TODO: This data structure is not optimal and it should be reworked
std::vector<ImageId> uncommitted_downloads;
std::queue<std::vector<ImageId>> committed_downloads;
static constexpr size_t TICKS_TO_DESTROY = 6;
DelayedDestructionRing<Image, TICKS_TO_DESTROY> sentenced_images;
DelayedDestructionRing<ImageView, TICKS_TO_DESTROY> sentenced_image_view;
DelayedDestructionRing<Framebuffer, TICKS_TO_DESTROY> sentenced_framebuffers;
std::unordered_map<GPUVAddr, ImageAllocId> image_allocs_table;
u64 modification_tick = 0;
u64 frame_tick = 0;
typename SlotVector<Image>::Iterator deletion_iterator;
};
template <class P> template <class P>
TextureCache<P>::TextureCache(Runtime& runtime_, VideoCore::RasterizerInterface& rasterizer_, TextureCache<P>::TextureCache(Runtime& runtime_, VideoCore::RasterizerInterface& rasterizer_,
Tegra::Engines::Maxwell3D& maxwell3d_, Tegra::Engines::Maxwell3D& maxwell3d_,
@ -820,40 +437,6 @@ void TextureCache<P>::BlitImage(const Tegra::Engines::Fermi2D::Surface& dst,
} }
} }
template <class P>
void TextureCache<P>::InvalidateColorBuffer(size_t index) {
ImageViewId& color_buffer_id = render_targets.color_buffer_ids[index];
color_buffer_id = FindColorBuffer(index, false);
if (!color_buffer_id) {
LOG_ERROR(HW_GPU, "Invalidating invalid color buffer in index={}", index);
return;
}
// When invalidating a color buffer, the old contents are no longer relevant
ImageView& color_buffer = slot_image_views[color_buffer_id];
Image& image = slot_images[color_buffer.image_id];
image.flags &= ~ImageFlagBits::CpuModified;
image.flags &= ~ImageFlagBits::GpuModified;
runtime.InvalidateColorBuffer(color_buffer, index);
}
template <class P>
void TextureCache<P>::InvalidateDepthBuffer() {
ImageViewId& depth_buffer_id = render_targets.depth_buffer_id;
depth_buffer_id = FindDepthBuffer(false);
if (!depth_buffer_id) {
LOG_ERROR(HW_GPU, "Invalidating invalid depth buffer");
return;
}
// When invalidating the depth buffer, the old contents are no longer relevant
ImageBase& image = slot_images[slot_image_views[depth_buffer_id].image_id];
image.flags &= ~ImageFlagBits::CpuModified;
image.flags &= ~ImageFlagBits::GpuModified;
ImageView& depth_buffer = slot_image_views[depth_buffer_id];
runtime.InvalidateDepthBuffer(depth_buffer);
}
template <class P> template <class P>
typename P::ImageView* TextureCache<P>::TryFindFramebufferImageView(VAddr cpu_addr) { typename P::ImageView* TextureCache<P>::TryFindFramebufferImageView(VAddr cpu_addr) {
// TODO: Properly implement this // TODO: Properly implement this

View file

@ -0,0 +1,385 @@
// Copyright 2019 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <array>
#include <mutex>
#include <span>
#include <type_traits>
#include <unordered_map>
#include <unordered_set>
#include <vector>
#include "common/common_types.h"
#include "common/literals.h"
#include "video_core/compatible_formats.h"
#include "video_core/delayed_destruction_ring.h"
#include "video_core/engines/fermi_2d.h"
#include "video_core/engines/kepler_compute.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/memory_manager.h"
#include "video_core/rasterizer_interface.h"
#include "video_core/surface.h"
#include "video_core/texture_cache/descriptor_table.h"
#include "video_core/texture_cache/image_base.h"
#include "video_core/texture_cache/image_info.h"
#include "video_core/texture_cache/image_view_info.h"
#include "video_core/texture_cache/render_targets.h"
#include "video_core/texture_cache/slot_vector.h"
#include "video_core/texture_cache/types.h"
#include "video_core/texture_cache/util.h"
#include "video_core/textures/texture.h"
namespace VideoCommon {
using Tegra::Texture::SwizzleSource;
using Tegra::Texture::TICEntry;
using Tegra::Texture::TSCEntry;
using VideoCore::Surface::GetFormatType;
using VideoCore::Surface::IsCopyCompatible;
using VideoCore::Surface::PixelFormat;
using VideoCore::Surface::PixelFormatFromDepthFormat;
using VideoCore::Surface::PixelFormatFromRenderTargetFormat;
using namespace Common::Literals;
template <class P>
class TextureCache {
/// Address shift for caching images into a hash table
static constexpr u64 PAGE_BITS = 20;
/// Enables debugging features to the texture cache
static constexpr bool ENABLE_VALIDATION = P::ENABLE_VALIDATION;
/// Implement blits as copies between framebuffers
static constexpr bool FRAMEBUFFER_BLITS = P::FRAMEBUFFER_BLITS;
/// True when some copies have to be emulated
static constexpr bool HAS_EMULATED_COPIES = P::HAS_EMULATED_COPIES;
/// True when the API can provide info about the memory of the device.
static constexpr bool HAS_DEVICE_MEMORY_INFO = P::HAS_DEVICE_MEMORY_INFO;
/// Image view ID for null descriptors
static constexpr ImageViewId NULL_IMAGE_VIEW_ID{0};
/// Sampler ID for bugged sampler ids
static constexpr SamplerId NULL_SAMPLER_ID{0};
static constexpr u64 DEFAULT_EXPECTED_MEMORY = 1_GiB;
static constexpr u64 DEFAULT_CRITICAL_MEMORY = 2_GiB;
using Runtime = typename P::Runtime;
using Image = typename P::Image;
using ImageAlloc = typename P::ImageAlloc;
using ImageView = typename P::ImageView;
using Sampler = typename P::Sampler;
using Framebuffer = typename P::Framebuffer;
struct BlitImages {
ImageId dst_id;
ImageId src_id;
PixelFormat dst_format;
PixelFormat src_format;
};
template <typename T>
struct IdentityHash {
[[nodiscard]] size_t operator()(T value) const noexcept {
return static_cast<size_t>(value);
}
};
public:
explicit TextureCache(Runtime&, VideoCore::RasterizerInterface&, Tegra::Engines::Maxwell3D&,
Tegra::Engines::KeplerCompute&, Tegra::MemoryManager&);
/// Notify the cache that a new frame has been queued
void TickFrame();
/// Return a constant reference to the given image view id
[[nodiscard]] const ImageView& GetImageView(ImageViewId id) const noexcept;
/// Return a reference to the given image view id
[[nodiscard]] ImageView& GetImageView(ImageViewId id) noexcept;
/// Mark an image as modified from the GPU
void MarkModification(ImageId id) noexcept;
/// Fill image_view_ids with the graphics images in indices
void FillGraphicsImageViews(std::span<const u32> indices,
std::span<ImageViewId> image_view_ids);
/// Fill image_view_ids with the compute images in indices
void FillComputeImageViews(std::span<const u32> indices, std::span<ImageViewId> image_view_ids);
/// Get the sampler from the graphics descriptor table in the specified index
Sampler* GetGraphicsSampler(u32 index);
/// Get the sampler from the compute descriptor table in the specified index
Sampler* GetComputeSampler(u32 index);
/// Refresh the state for graphics image view and sampler descriptors
void SynchronizeGraphicsDescriptors();
/// Refresh the state for compute image view and sampler descriptors
void SynchronizeComputeDescriptors();
/// Update bound render targets and upload memory if necessary
/// @param is_clear True when the render targets are being used for clears
void UpdateRenderTargets(bool is_clear);
/// Find a framebuffer with the currently bound render targets
/// UpdateRenderTargets should be called before this
Framebuffer* GetFramebuffer();
/// Mark images in a range as modified from the CPU
void WriteMemory(VAddr cpu_addr, size_t size);
/// Download contents of host images to guest memory in a region
void DownloadMemory(VAddr cpu_addr, size_t size);
/// Remove images in a region
void UnmapMemory(VAddr cpu_addr, size_t size);
/// Remove images in a region
void UnmapGPUMemory(GPUVAddr gpu_addr, size_t size);
/// Blit an image with the given parameters
void BlitImage(const Tegra::Engines::Fermi2D::Surface& dst,
const Tegra::Engines::Fermi2D::Surface& src,
const Tegra::Engines::Fermi2D::Config& copy);
/// Try to find a cached image view in the given CPU address
[[nodiscard]] ImageView* TryFindFramebufferImageView(VAddr cpu_addr);
/// Return true when there are uncommitted images to be downloaded
[[nodiscard]] bool HasUncommittedFlushes() const noexcept;
/// Return true when the caller should wait for async downloads
[[nodiscard]] bool ShouldWaitAsyncFlushes() const noexcept;
/// Commit asynchronous downloads
void CommitAsyncFlushes();
/// Pop asynchronous downloads
void PopAsyncFlushes();
/// Return true when a CPU region is modified from the GPU
[[nodiscard]] bool IsRegionGpuModified(VAddr addr, size_t size);
std::mutex mutex;
private:
/// Iterate over all page indices in a range
template <typename Func>
static void ForEachCPUPage(VAddr addr, size_t size, Func&& func) {
static constexpr bool RETURNS_BOOL = std::is_same_v<std::invoke_result<Func, u64>, bool>;
const u64 page_end = (addr + size - 1) >> PAGE_BITS;
for (u64 page = addr >> PAGE_BITS; page <= page_end; ++page) {
if constexpr (RETURNS_BOOL) {
if (func(page)) {
break;
}
} else {
func(page);
}
}
}
template <typename Func>
static void ForEachGPUPage(GPUVAddr addr, size_t size, Func&& func) {
static constexpr bool RETURNS_BOOL = std::is_same_v<std::invoke_result<Func, u64>, bool>;
const u64 page_end = (addr + size - 1) >> PAGE_BITS;
for (u64 page = addr >> PAGE_BITS; page <= page_end; ++page) {
if constexpr (RETURNS_BOOL) {
if (func(page)) {
break;
}
} else {
func(page);
}
}
}
/// Runs the Garbage Collector.
void RunGarbageCollector();
/// Fills image_view_ids in the image views in indices
void FillImageViews(DescriptorTable<TICEntry>& table,
std::span<ImageViewId> cached_image_view_ids, std::span<const u32> indices,
std::span<ImageViewId> image_view_ids);
/// Find or create an image view in the guest descriptor table
ImageViewId VisitImageView(DescriptorTable<TICEntry>& table,
std::span<ImageViewId> cached_image_view_ids, u32 index);
/// Find or create a framebuffer with the given render target parameters
FramebufferId GetFramebufferId(const RenderTargets& key);
/// Refresh the contents (pixel data) of an image
void RefreshContents(Image& image, ImageId image_id);
/// Upload data from guest to an image
template <typename StagingBuffer>
void UploadImageContents(Image& image, StagingBuffer& staging_buffer);
/// Find or create an image view from a guest descriptor
[[nodiscard]] ImageViewId FindImageView(const TICEntry& config);
/// Create a new image view from a guest descriptor
[[nodiscard]] ImageViewId CreateImageView(const TICEntry& config);
/// Find or create an image from the given parameters
[[nodiscard]] ImageId FindOrInsertImage(const ImageInfo& info, GPUVAddr gpu_addr,
RelaxedOptions options = RelaxedOptions{});
/// Find an image from the given parameters
[[nodiscard]] ImageId FindImage(const ImageInfo& info, GPUVAddr gpu_addr,
RelaxedOptions options);
/// Create an image from the given parameters
[[nodiscard]] ImageId InsertImage(const ImageInfo& info, GPUVAddr gpu_addr,
RelaxedOptions options);
/// Create a new image and join perfectly matching existing images
/// Remove joined images from the cache
[[nodiscard]] ImageId JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VAddr cpu_addr);
/// Return a blit image pair from the given guest blit parameters
[[nodiscard]] BlitImages GetBlitImages(const Tegra::Engines::Fermi2D::Surface& dst,
const Tegra::Engines::Fermi2D::Surface& src);
/// Find or create a sampler from a guest descriptor sampler
[[nodiscard]] SamplerId FindSampler(const TSCEntry& config);
/// Find or create an image view for the given color buffer index
[[nodiscard]] ImageViewId FindColorBuffer(size_t index, bool is_clear);
/// Find or create an image view for the depth buffer
[[nodiscard]] ImageViewId FindDepthBuffer(bool is_clear);
/// Find or create a view for a render target with the given image parameters
[[nodiscard]] ImageViewId FindRenderTargetView(const ImageInfo& info, GPUVAddr gpu_addr,
bool is_clear);
/// Iterates over all the images in a region calling func
template <typename Func>
void ForEachImageInRegion(VAddr cpu_addr, size_t size, Func&& func);
template <typename Func>
void ForEachImageInRegionGPU(GPUVAddr gpu_addr, size_t size, Func&& func);
template <typename Func>
void ForEachSparseImageInRegion(GPUVAddr gpu_addr, size_t size, Func&& func);
/// Iterates over all the images in a region calling func
template <typename Func>
void ForEachSparseSegment(ImageBase& image, Func&& func);
/// Find or create an image view in the given image with the passed parameters
[[nodiscard]] ImageViewId FindOrEmplaceImageView(ImageId image_id, const ImageViewInfo& info);
/// Register image in the page table
void RegisterImage(ImageId image);
/// Unregister image from the page table
void UnregisterImage(ImageId image);
/// Track CPU reads and writes for image
void TrackImage(ImageBase& image, ImageId image_id);
/// Stop tracking CPU reads and writes for image
void UntrackImage(ImageBase& image, ImageId image_id);
/// Delete image from the cache
void DeleteImage(ImageId image);
/// Remove image views references from the cache
void RemoveImageViewReferences(std::span<const ImageViewId> removed_views);
/// Remove framebuffers using the given image views from the cache
void RemoveFramebuffers(std::span<const ImageViewId> removed_views);
/// Mark an image as modified from the GPU
void MarkModification(ImageBase& image) noexcept;
/// Synchronize image aliases, copying data if needed
void SynchronizeAliases(ImageId image_id);
/// Prepare an image to be used
void PrepareImage(ImageId image_id, bool is_modification, bool invalidate);
/// Prepare an image view to be used
void PrepareImageView(ImageViewId image_view_id, bool is_modification, bool invalidate);
/// Execute copies from one image to the other, even if they are incompatible
void CopyImage(ImageId dst_id, ImageId src_id, std::span<const ImageCopy> copies);
/// Bind an image view as render target, downloading resources preemtively if needed
void BindRenderTarget(ImageViewId* old_id, ImageViewId new_id);
/// Create a render target from a given image and image view parameters
[[nodiscard]] std::pair<FramebufferId, ImageViewId> RenderTargetFromImage(
ImageId, const ImageViewInfo& view_info);
/// Returns true if the current clear parameters clear the whole image of a given image view
[[nodiscard]] bool IsFullClear(ImageViewId id);
Runtime& runtime;
VideoCore::RasterizerInterface& rasterizer;
Tegra::Engines::Maxwell3D& maxwell3d;
Tegra::Engines::KeplerCompute& kepler_compute;
Tegra::MemoryManager& gpu_memory;
DescriptorTable<TICEntry> graphics_image_table{gpu_memory};
DescriptorTable<TSCEntry> graphics_sampler_table{gpu_memory};
std::vector<SamplerId> graphics_sampler_ids;
std::vector<ImageViewId> graphics_image_view_ids;
DescriptorTable<TICEntry> compute_image_table{gpu_memory};
DescriptorTable<TSCEntry> compute_sampler_table{gpu_memory};
std::vector<SamplerId> compute_sampler_ids;
std::vector<ImageViewId> compute_image_view_ids;
RenderTargets render_targets;
std::unordered_map<TICEntry, ImageViewId> image_views;
std::unordered_map<TSCEntry, SamplerId> samplers;
std::unordered_map<RenderTargets, FramebufferId> framebuffers;
std::unordered_map<u64, std::vector<ImageMapId>, IdentityHash<u64>> page_table;
std::unordered_map<u64, std::vector<ImageId>, IdentityHash<u64>> gpu_page_table;
std::unordered_map<u64, std::vector<ImageId>, IdentityHash<u64>> sparse_page_table;
std::unordered_map<ImageId, std::vector<ImageViewId>> sparse_views;
VAddr virtual_invalid_space{};
bool has_deleted_images = false;
u64 total_used_memory = 0;
u64 minimum_memory;
u64 expected_memory;
u64 critical_memory;
SlotVector<Image> slot_images;
SlotVector<ImageMapView> slot_map_views;
SlotVector<ImageView> slot_image_views;
SlotVector<ImageAlloc> slot_image_allocs;
SlotVector<Sampler> slot_samplers;
SlotVector<Framebuffer> slot_framebuffers;
// TODO: This data structure is not optimal and it should be reworked
std::vector<ImageId> uncommitted_downloads;
std::queue<std::vector<ImageId>> committed_downloads;
static constexpr size_t TICKS_TO_DESTROY = 6;
DelayedDestructionRing<Image, TICKS_TO_DESTROY> sentenced_images;
DelayedDestructionRing<ImageView, TICKS_TO_DESTROY> sentenced_image_view;
DelayedDestructionRing<Framebuffer, TICKS_TO_DESTROY> sentenced_framebuffers;
std::unordered_map<GPUVAddr, ImageAllocId> image_allocs_table;
u64 modification_tick = 0;
u64 frame_tick = 0;
typename SlotVector<Image>::Iterator deletion_iterator;
};
} // namespace VideoCommon

View file

@ -37,8 +37,7 @@ std::unique_ptr<VideoCore::RendererBase> CreateRenderer(
namespace VideoCore { namespace VideoCore {
std::unique_ptr<Tegra::GPU> CreateGPU(Core::Frontend::EmuWindow& emu_window, Core::System& system) { std::unique_ptr<Tegra::GPU> CreateGPU(Core::Frontend::EmuWindow& emu_window, Core::System& system) {
const auto nvdec_value = Settings::values.nvdec_emulation.GetValue(); const bool use_nvdec = Settings::values.use_nvdec_emulation.GetValue();
const bool use_nvdec = nvdec_value != Settings::NvdecEmulation::Off;
const bool use_async = Settings::values.use_asynchronous_gpu_emulation.GetValue(); const bool use_async = Settings::values.use_asynchronous_gpu_emulation.GetValue();
auto gpu = std::make_unique<Tegra::GPU>(system, use_async, use_nvdec); auto gpu = std::make_unique<Tegra::GPU>(system, use_async, use_nvdec);
auto context = emu_window.CreateSharedContext(); auto context = emu_window.CreateSharedContext();

View file

@ -826,7 +826,7 @@ void Config::ReadRendererValues() {
ReadGlobalSetting(Settings::values.use_disk_shader_cache); ReadGlobalSetting(Settings::values.use_disk_shader_cache);
ReadGlobalSetting(Settings::values.gpu_accuracy); ReadGlobalSetting(Settings::values.gpu_accuracy);
ReadGlobalSetting(Settings::values.use_asynchronous_gpu_emulation); ReadGlobalSetting(Settings::values.use_asynchronous_gpu_emulation);
ReadGlobalSetting(Settings::values.nvdec_emulation); ReadGlobalSetting(Settings::values.use_nvdec_emulation);
ReadGlobalSetting(Settings::values.accelerate_astc); ReadGlobalSetting(Settings::values.accelerate_astc);
ReadGlobalSetting(Settings::values.use_vsync); ReadGlobalSetting(Settings::values.use_vsync);
ReadGlobalSetting(Settings::values.shader_backend); ReadGlobalSetting(Settings::values.shader_backend);
@ -1372,10 +1372,7 @@ void Config::SaveRendererValues() {
static_cast<u32>(Settings::values.gpu_accuracy.GetDefault()), static_cast<u32>(Settings::values.gpu_accuracy.GetDefault()),
Settings::values.gpu_accuracy.UsingGlobal()); Settings::values.gpu_accuracy.UsingGlobal());
WriteGlobalSetting(Settings::values.use_asynchronous_gpu_emulation); WriteGlobalSetting(Settings::values.use_asynchronous_gpu_emulation);
WriteSetting(QString::fromStdString(Settings::values.nvdec_emulation.GetLabel()), WriteGlobalSetting(Settings::values.use_nvdec_emulation);
static_cast<u32>(Settings::values.nvdec_emulation.GetValue(global)),
static_cast<u32>(Settings::values.nvdec_emulation.GetDefault()),
Settings::values.nvdec_emulation.UsingGlobal());
WriteGlobalSetting(Settings::values.accelerate_astc); WriteGlobalSetting(Settings::values.accelerate_astc);
WriteGlobalSetting(Settings::values.use_vsync); WriteGlobalSetting(Settings::values.use_vsync);
WriteSetting(QString::fromStdString(Settings::values.shader_backend.GetLabel()), WriteSetting(QString::fromStdString(Settings::values.shader_backend.GetLabel()),

View file

@ -182,6 +182,5 @@ private:
Q_DECLARE_METATYPE(Settings::CPUAccuracy); Q_DECLARE_METATYPE(Settings::CPUAccuracy);
Q_DECLARE_METATYPE(Settings::GPUAccuracy); Q_DECLARE_METATYPE(Settings::GPUAccuracy);
Q_DECLARE_METATYPE(Settings::FullscreenMode); Q_DECLARE_METATYPE(Settings::FullscreenMode);
Q_DECLARE_METATYPE(Settings::NvdecEmulation);
Q_DECLARE_METATYPE(Settings::RendererBackend); Q_DECLARE_METATYPE(Settings::RendererBackend);
Q_DECLARE_METATYPE(Settings::ShaderBackend); Q_DECLARE_METATYPE(Settings::ShaderBackend);

View file

@ -88,30 +88,24 @@ void ConfigureGraphics::SetConfiguration() {
ui->api_widget->setEnabled(runtime_lock); ui->api_widget->setEnabled(runtime_lock);
ui->use_asynchronous_gpu_emulation->setEnabled(runtime_lock); ui->use_asynchronous_gpu_emulation->setEnabled(runtime_lock);
ui->use_disk_shader_cache->setEnabled(runtime_lock); ui->use_disk_shader_cache->setEnabled(runtime_lock);
ui->nvdec_emulation_widget->setEnabled(runtime_lock); ui->use_nvdec_emulation->setEnabled(runtime_lock);
ui->accelerate_astc->setEnabled(runtime_lock); ui->accelerate_astc->setEnabled(runtime_lock);
ui->use_disk_shader_cache->setChecked(Settings::values.use_disk_shader_cache.GetValue()); ui->use_disk_shader_cache->setChecked(Settings::values.use_disk_shader_cache.GetValue());
ui->use_asynchronous_gpu_emulation->setChecked( ui->use_asynchronous_gpu_emulation->setChecked(
Settings::values.use_asynchronous_gpu_emulation.GetValue()); Settings::values.use_asynchronous_gpu_emulation.GetValue());
ui->use_nvdec_emulation->setChecked(Settings::values.use_nvdec_emulation.GetValue());
ui->accelerate_astc->setChecked(Settings::values.accelerate_astc.GetValue()); ui->accelerate_astc->setChecked(Settings::values.accelerate_astc.GetValue());
if (Settings::IsConfiguringGlobal()) { if (Settings::IsConfiguringGlobal()) {
ui->api->setCurrentIndex(static_cast<int>(Settings::values.renderer_backend.GetValue())); ui->api->setCurrentIndex(static_cast<int>(Settings::values.renderer_backend.GetValue()));
ui->fullscreen_mode_combobox->setCurrentIndex( ui->fullscreen_mode_combobox->setCurrentIndex(
static_cast<int>(Settings::values.fullscreen_mode.GetValue())); static_cast<int>(Settings::values.fullscreen_mode.GetValue()));
ui->nvdec_emulation->setCurrentIndex(
static_cast<int>(Settings::values.nvdec_emulation.GetValue()));
ui->aspect_ratio_combobox->setCurrentIndex(Settings::values.aspect_ratio.GetValue()); ui->aspect_ratio_combobox->setCurrentIndex(Settings::values.aspect_ratio.GetValue());
} else { } else {
ConfigurationShared::SetPerGameSetting(ui->api, &Settings::values.renderer_backend); ConfigurationShared::SetPerGameSetting(ui->api, &Settings::values.renderer_backend);
ConfigurationShared::SetHighlight(ui->api_widget, ConfigurationShared::SetHighlight(ui->api_widget,
!Settings::values.renderer_backend.UsingGlobal()); !Settings::values.renderer_backend.UsingGlobal());
ConfigurationShared::SetPerGameSetting(ui->nvdec_emulation,
&Settings::values.nvdec_emulation);
ConfigurationShared::SetHighlight(ui->nvdec_emulation_widget,
!Settings::values.nvdec_emulation.UsingGlobal());
ConfigurationShared::SetPerGameSetting(ui->fullscreen_mode_combobox, ConfigurationShared::SetPerGameSetting(ui->fullscreen_mode_combobox,
&Settings::values.fullscreen_mode); &Settings::values.fullscreen_mode);
ConfigurationShared::SetHighlight(ui->fullscreen_mode_label, ConfigurationShared::SetHighlight(ui->fullscreen_mode_label,
@ -143,6 +137,8 @@ void ConfigureGraphics::ApplyConfiguration() {
ConfigurationShared::ApplyPerGameSetting(&Settings::values.use_asynchronous_gpu_emulation, ConfigurationShared::ApplyPerGameSetting(&Settings::values.use_asynchronous_gpu_emulation,
ui->use_asynchronous_gpu_emulation, ui->use_asynchronous_gpu_emulation,
use_asynchronous_gpu_emulation); use_asynchronous_gpu_emulation);
ConfigurationShared::ApplyPerGameSetting(&Settings::values.use_nvdec_emulation,
ui->use_nvdec_emulation, use_nvdec_emulation);
ConfigurationShared::ApplyPerGameSetting(&Settings::values.accelerate_astc, ui->accelerate_astc, ConfigurationShared::ApplyPerGameSetting(&Settings::values.accelerate_astc, ui->accelerate_astc,
accelerate_astc); accelerate_astc);
@ -151,9 +147,6 @@ void ConfigureGraphics::ApplyConfiguration() {
if (Settings::values.renderer_backend.UsingGlobal()) { if (Settings::values.renderer_backend.UsingGlobal()) {
Settings::values.renderer_backend.SetValue(GetCurrentGraphicsBackend()); Settings::values.renderer_backend.SetValue(GetCurrentGraphicsBackend());
} }
if (Settings::values.nvdec_emulation.UsingGlobal()) {
Settings::values.nvdec_emulation.SetValue(GetCurrentNvdecEmulation());
}
if (Settings::values.shader_backend.UsingGlobal()) { if (Settings::values.shader_backend.UsingGlobal()) {
Settings::values.shader_backend.SetValue(shader_backend); Settings::values.shader_backend.SetValue(shader_backend);
} }
@ -187,13 +180,6 @@ void ConfigureGraphics::ApplyConfiguration() {
} }
} }
if (ui->nvdec_emulation->currentIndex() == ConfigurationShared::USE_GLOBAL_INDEX) {
Settings::values.nvdec_emulation.SetGlobal(true);
} else {
Settings::values.nvdec_emulation.SetGlobal(false);
Settings::values.nvdec_emulation.SetValue(GetCurrentNvdecEmulation());
}
if (ui->bg_combobox->currentIndex() == ConfigurationShared::USE_GLOBAL_INDEX) { if (ui->bg_combobox->currentIndex() == ConfigurationShared::USE_GLOBAL_INDEX) {
Settings::values.bg_red.SetGlobal(true); Settings::values.bg_red.SetGlobal(true);
Settings::values.bg_green.SetGlobal(true); Settings::values.bg_green.SetGlobal(true);
@ -292,20 +278,6 @@ Settings::RendererBackend ConfigureGraphics::GetCurrentGraphicsBackend() const {
ConfigurationShared::USE_GLOBAL_OFFSET); ConfigurationShared::USE_GLOBAL_OFFSET);
} }
Settings::NvdecEmulation ConfigureGraphics::GetCurrentNvdecEmulation() const {
if (Settings::IsConfiguringGlobal()) {
return static_cast<Settings::NvdecEmulation>(ui->nvdec_emulation->currentIndex());
}
if (ui->nvdec_emulation->currentIndex() == ConfigurationShared::USE_GLOBAL_INDEX) {
Settings::values.nvdec_emulation.SetGlobal(true);
return Settings::values.nvdec_emulation.GetValue();
}
Settings::values.nvdec_emulation.SetGlobal(false);
return static_cast<Settings::NvdecEmulation>(ui->nvdec_emulation->currentIndex() -
ConfigurationShared::USE_GLOBAL_OFFSET);
}
void ConfigureGraphics::SetupPerGameUI() { void ConfigureGraphics::SetupPerGameUI() {
if (Settings::IsConfiguringGlobal()) { if (Settings::IsConfiguringGlobal()) {
ui->api->setEnabled(Settings::values.renderer_backend.UsingGlobal()); ui->api->setEnabled(Settings::values.renderer_backend.UsingGlobal());
@ -314,7 +286,7 @@ void ConfigureGraphics::SetupPerGameUI() {
ui->aspect_ratio_combobox->setEnabled(Settings::values.aspect_ratio.UsingGlobal()); ui->aspect_ratio_combobox->setEnabled(Settings::values.aspect_ratio.UsingGlobal());
ui->use_asynchronous_gpu_emulation->setEnabled( ui->use_asynchronous_gpu_emulation->setEnabled(
Settings::values.use_asynchronous_gpu_emulation.UsingGlobal()); Settings::values.use_asynchronous_gpu_emulation.UsingGlobal());
ui->nvdec_emulation->setEnabled(Settings::values.nvdec_emulation.UsingGlobal()); ui->use_nvdec_emulation->setEnabled(Settings::values.use_nvdec_emulation.UsingGlobal());
ui->accelerate_astc->setEnabled(Settings::values.accelerate_astc.UsingGlobal()); ui->accelerate_astc->setEnabled(Settings::values.accelerate_astc.UsingGlobal());
ui->use_disk_shader_cache->setEnabled(Settings::values.use_disk_shader_cache.UsingGlobal()); ui->use_disk_shader_cache->setEnabled(Settings::values.use_disk_shader_cache.UsingGlobal());
ui->bg_button->setEnabled(Settings::values.bg_red.UsingGlobal()); ui->bg_button->setEnabled(Settings::values.bg_red.UsingGlobal());
@ -329,6 +301,8 @@ void ConfigureGraphics::SetupPerGameUI() {
ConfigurationShared::SetColoredTristate( ConfigurationShared::SetColoredTristate(
ui->use_disk_shader_cache, Settings::values.use_disk_shader_cache, use_disk_shader_cache); ui->use_disk_shader_cache, Settings::values.use_disk_shader_cache, use_disk_shader_cache);
ConfigurationShared::SetColoredTristate(
ui->use_nvdec_emulation, Settings::values.use_nvdec_emulation, use_nvdec_emulation);
ConfigurationShared::SetColoredTristate(ui->accelerate_astc, Settings::values.accelerate_astc, ConfigurationShared::SetColoredTristate(ui->accelerate_astc, Settings::values.accelerate_astc,
accelerate_astc); accelerate_astc);
ConfigurationShared::SetColoredTristate(ui->use_asynchronous_gpu_emulation, ConfigurationShared::SetColoredTristate(ui->use_asynchronous_gpu_emulation,
@ -342,6 +316,4 @@ void ConfigureGraphics::SetupPerGameUI() {
static_cast<int>(Settings::values.fullscreen_mode.GetValue(true))); static_cast<int>(Settings::values.fullscreen_mode.GetValue(true)));
ConfigurationShared::InsertGlobalItem( ConfigurationShared::InsertGlobalItem(
ui->api, static_cast<int>(Settings::values.renderer_backend.GetValue(true))); ui->api, static_cast<int>(Settings::values.renderer_backend.GetValue(true)));
ConfigurationShared::InsertGlobalItem(
ui->nvdec_emulation, static_cast<int>(Settings::values.nvdec_emulation.GetValue(true)));
} }

View file

@ -43,7 +43,6 @@ private:
void SetupPerGameUI(); void SetupPerGameUI();
Settings::RendererBackend GetCurrentGraphicsBackend() const; Settings::RendererBackend GetCurrentGraphicsBackend() const;
Settings::NvdecEmulation GetCurrentNvdecEmulation() const;
std::unique_ptr<Ui::ConfigureGraphics> ui; std::unique_ptr<Ui::ConfigureGraphics> ui;
QColor bg_color; QColor bg_color;

View file

@ -167,6 +167,13 @@
</property> </property>
</widget> </widget>
</item> </item>
<item>
<widget class="QCheckBox" name="use_nvdec_emulation">
<property name="text">
<string>Use NVDEC emulation</string>
</property>
</widget>
</item>
<item> <item>
<widget class="QCheckBox" name="accelerate_astc"> <widget class="QCheckBox" name="accelerate_astc">
<property name="text"> <property name="text">
@ -174,50 +181,6 @@
</property> </property>
</widget> </widget>
</item> </item>
<item>
<widget class="QWidget" name="nvdec_emulation_widget" native="true">
<layout class="QHBoxLayout" name="nvdec_emulation_layout">
<property name="leftMargin">
<number>0</number>
</property>
<property name="topMargin">
<number>0</number>
</property>
<property name="rightMargin">
<number>0</number>
</property>
<property name="bottomMargin">
<number>0</number>
</property>
<item>
<widget class="QLabel" name="nvdec_emulation_label">
<property name="text">
<string>NVDEC emulation:</string>
</property>
</widget>
</item>
<item>
<widget class="QComboBox" name="nvdec_emulation">
<item>
<property name="text">
<string>Disabled</string>
</property>
</item>
<item>
<property name="text">
<string>CPU Decoding</string>
</property>
</item>
<item>
<property name="text">
<string>GPU Decoding</string>
</property>
</item>
</widget>
</item>
</layout>
</widget>
</item>
<item> <item>
<widget class="QWidget" name="fullscreen_mode_layout" native="true"> <widget class="QWidget" name="fullscreen_mode_layout" native="true">
<layout class="QHBoxLayout" name="horizontalLayout_1"> <layout class="QHBoxLayout" name="horizontalLayout_1">

View file

@ -2859,8 +2859,6 @@ void GMainWindow::OnToggleFilterBar() {
} }
void GMainWindow::OnCaptureScreenshot() { void GMainWindow::OnCaptureScreenshot() {
OnPauseGame();
const u64 title_id = Core::System::GetInstance().CurrentProcess()->GetTitleID(); const u64 title_id = Core::System::GetInstance().CurrentProcess()->GetTitleID();
const auto screenshot_path = const auto screenshot_path =
QString::fromStdString(Common::FS::GetYuzuPathString(Common::FS::YuzuPath::ScreenshotsDir)); QString::fromStdString(Common::FS::GetYuzuPathString(Common::FS::YuzuPath::ScreenshotsDir));
@ -2872,23 +2870,22 @@ void GMainWindow::OnCaptureScreenshot() {
.arg(date); .arg(date);
if (!Common::FS::CreateDir(screenshot_path.toStdString())) { if (!Common::FS::CreateDir(screenshot_path.toStdString())) {
OnStartGame();
return; return;
} }
#ifdef _WIN32 #ifdef _WIN32
if (UISettings::values.enable_screenshot_save_as) { if (UISettings::values.enable_screenshot_save_as) {
OnPauseGame();
filename = QFileDialog::getSaveFileName(this, tr("Capture Screenshot"), filename, filename = QFileDialog::getSaveFileName(this, tr("Capture Screenshot"), filename,
tr("PNG Image (*.png)")); tr("PNG Image (*.png)"));
if (filename.isEmpty()) {
OnStartGame(); OnStartGame();
if (filename.isEmpty()) {
return; return;
} }
} }
#endif #endif
render_window->CaptureScreenshot(UISettings::values.screenshot_resolution_factor.GetValue(), render_window->CaptureScreenshot(UISettings::values.screenshot_resolution_factor.GetValue(),
filename); filename);
OnStartGame();
} }
// TODO: Written 2020-10-01: Remove per-game config migration code when it is irrelevant // TODO: Written 2020-10-01: Remove per-game config migration code when it is irrelevant

View file

@ -465,7 +465,7 @@ void Config::ReadValues() {
ReadSetting("Renderer", Settings::values.disable_fps_limit); ReadSetting("Renderer", Settings::values.disable_fps_limit);
ReadSetting("Renderer", Settings::values.shader_backend); ReadSetting("Renderer", Settings::values.shader_backend);
ReadSetting("Renderer", Settings::values.use_asynchronous_shaders); ReadSetting("Renderer", Settings::values.use_asynchronous_shaders);
ReadSetting("Renderer", Settings::values.nvdec_emulation); ReadSetting("Renderer", Settings::values.use_nvdec_emulation);
ReadSetting("Renderer", Settings::values.accelerate_astc); ReadSetting("Renderer", Settings::values.accelerate_astc);
ReadSetting("Renderer", Settings::values.use_fast_gpu_time); ReadSetting("Renderer", Settings::values.use_fast_gpu_time);
ReadSetting("Renderer", Settings::values.use_caches_gc); ReadSetting("Renderer", Settings::values.use_caches_gc);

View file

@ -261,9 +261,9 @@ shader_backend =
# 0 (default): Off, 1: On # 0 (default): Off, 1: On
use_asynchronous_shaders = use_asynchronous_shaders =
# NVDEC emulation. # Enable NVDEC emulation.
# 0: Disabled, 1: CPU Decoding, 2 (default): GPU Decoding # 0: Off, 1 (default): On
nvdec_emulation = use_nvdec_emulation =
# Accelerate ASTC texture decoding. # Accelerate ASTC texture decoding.
# 0: Off, 1 (default): On # 0: Off, 1 (default): On