early-access version 1829
This commit is contained in:
parent
230a0aa491
commit
d2ffa9ae83
10 changed files with 93 additions and 67 deletions
|
@ -1,7 +1,7 @@
|
||||||
yuzu emulator early access
|
yuzu emulator early access
|
||||||
=============
|
=============
|
||||||
|
|
||||||
This is the source code for early-access 1826.
|
This is the source code for early-access 1829.
|
||||||
|
|
||||||
## Legal Notice
|
## Legal Notice
|
||||||
|
|
||||||
|
|
|
@ -19,29 +19,26 @@
|
||||||
namespace Service::Nvidia::Devices {
|
namespace Service::Nvidia::Devices {
|
||||||
|
|
||||||
namespace {
|
namespace {
|
||||||
// Copies count amount of type T from the input vector into the dst vector.
|
// Splice vectors will copy count amount of type T from the input vector into the dst vector.
|
||||||
// Returns the number of bytes written into dst.
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
std::size_t SpliceVectors(const std::vector<u8>& input, std::vector<T>& dst, std::size_t count,
|
std::size_t SpliceVectors(const std::vector<u8>& input, std::vector<T>& dst, std::size_t count,
|
||||||
std::size_t offset) {
|
std::size_t offset) {
|
||||||
if (dst.empty()) {
|
if (!dst.empty()) {
|
||||||
return 0;
|
std::memcpy(dst.data(), input.data() + offset, count * sizeof(T));
|
||||||
}
|
}
|
||||||
const size_t bytes_copied = count * sizeof(T);
|
return 0;
|
||||||
std::memcpy(dst.data(), input.data() + offset, bytes_copied);
|
|
||||||
return bytes_copied;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Writes the data in src to an offset into the dst vector. The offset is specified in bytes
|
// Write vectors will write data to the output buffer
|
||||||
// Returns the number of bytes written into dst.
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
std::size_t WriteVectors(std::vector<u8>& dst, const std::vector<T>& src, std::size_t offset) {
|
std::size_t WriteVectors(std::vector<u8>& dst, const std::vector<T>& src, std::size_t offset) {
|
||||||
if (src.empty()) {
|
if (src.empty()) {
|
||||||
return 0;
|
return 0;
|
||||||
|
} else {
|
||||||
|
std::memcpy(dst.data() + offset, src.data(), src.size() * sizeof(T));
|
||||||
|
offset += src.size() * sizeof(T);
|
||||||
|
return offset;
|
||||||
}
|
}
|
||||||
const size_t bytes_copied = src.size() * sizeof(T);
|
|
||||||
std::memcpy(dst.data() + offset, src.data(), bytes_copied);
|
|
||||||
return bytes_copied;
|
|
||||||
}
|
}
|
||||||
} // Anonymous namespace
|
} // Anonymous namespace
|
||||||
|
|
||||||
|
@ -65,6 +62,7 @@ NvResult nvhost_nvdec_common::Submit(const std::vector<u8>& input, std::vector<u
|
||||||
LOG_DEBUG(Service_NVDRV, "called NVDEC Submit, cmd_buffer_count={}", params.cmd_buffer_count);
|
LOG_DEBUG(Service_NVDRV, "called NVDEC Submit, cmd_buffer_count={}", params.cmd_buffer_count);
|
||||||
|
|
||||||
// Instantiate param buffers
|
// Instantiate param buffers
|
||||||
|
std::size_t offset = sizeof(IoctlSubmit);
|
||||||
std::vector<CommandBuffer> command_buffers(params.cmd_buffer_count);
|
std::vector<CommandBuffer> command_buffers(params.cmd_buffer_count);
|
||||||
std::vector<Reloc> relocs(params.relocation_count);
|
std::vector<Reloc> relocs(params.relocation_count);
|
||||||
std::vector<u32> reloc_shifts(params.relocation_count);
|
std::vector<u32> reloc_shifts(params.relocation_count);
|
||||||
|
@ -73,13 +71,12 @@ NvResult nvhost_nvdec_common::Submit(const std::vector<u8>& input, std::vector<u
|
||||||
std::vector<Fence> fences(params.fence_count);
|
std::vector<Fence> fences(params.fence_count);
|
||||||
|
|
||||||
// Splice input into their respective buffers
|
// Splice input into their respective buffers
|
||||||
std::size_t offset = sizeof(IoctlSubmit);
|
offset = SpliceVectors(input, command_buffers, params.cmd_buffer_count, offset);
|
||||||
offset += SpliceVectors(input, command_buffers, params.cmd_buffer_count, offset);
|
offset = SpliceVectors(input, relocs, params.relocation_count, offset);
|
||||||
offset += SpliceVectors(input, relocs, params.relocation_count, offset);
|
offset = SpliceVectors(input, reloc_shifts, params.relocation_count, offset);
|
||||||
offset += SpliceVectors(input, reloc_shifts, params.relocation_count, offset);
|
offset = SpliceVectors(input, syncpt_increments, params.syncpoint_count, offset);
|
||||||
offset += SpliceVectors(input, syncpt_increments, params.syncpoint_count, offset);
|
offset = SpliceVectors(input, wait_checks, params.syncpoint_count, offset);
|
||||||
offset += SpliceVectors(input, wait_checks, params.syncpoint_count, offset);
|
offset = SpliceVectors(input, fences, params.fence_count, offset);
|
||||||
offset += SpliceVectors(input, fences, params.fence_count, offset);
|
|
||||||
|
|
||||||
auto& gpu = system.GPU();
|
auto& gpu = system.GPU();
|
||||||
if (gpu.UseNvdec()) {
|
if (gpu.UseNvdec()) {
|
||||||
|
@ -91,7 +88,7 @@ NvResult nvhost_nvdec_common::Submit(const std::vector<u8>& input, std::vector<u
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
for (const auto& cmd_buffer : command_buffers) {
|
for (const auto& cmd_buffer : command_buffers) {
|
||||||
const auto object = nvmap_dev->GetObject(cmd_buffer.memory_id);
|
auto object = nvmap_dev->GetObject(cmd_buffer.memory_id);
|
||||||
ASSERT_OR_EXECUTE(object, return NvResult::InvalidState;);
|
ASSERT_OR_EXECUTE(object, return NvResult::InvalidState;);
|
||||||
const auto map = FindBufferMap(object->dma_map_addr);
|
const auto map = FindBufferMap(object->dma_map_addr);
|
||||||
if (!map) {
|
if (!map) {
|
||||||
|
@ -105,19 +102,21 @@ NvResult nvhost_nvdec_common::Submit(const std::vector<u8>& input, std::vector<u
|
||||||
gpu.PushCommandBuffer(cmdlist);
|
gpu.PushCommandBuffer(cmdlist);
|
||||||
}
|
}
|
||||||
if (gpu.UseNvdec()) {
|
if (gpu.UseNvdec()) {
|
||||||
|
|
||||||
fences[0].value = syncpoint_manager.IncreaseSyncpoint(fences[0].id, 1);
|
fences[0].value = syncpoint_manager.IncreaseSyncpoint(fences[0].id, 1);
|
||||||
|
|
||||||
Tegra::ChCommandHeaderList cmdlist{{(4 << 28) | fences[0].id}};
|
Tegra::ChCommandHeaderList cmdlist{{(4 << 28) | fences[0].id}};
|
||||||
gpu.PushCommandBuffer(cmdlist);
|
gpu.PushCommandBuffer(cmdlist);
|
||||||
}
|
}
|
||||||
std::memcpy(output.data(), ¶ms, sizeof(IoctlSubmit));
|
std::memcpy(output.data(), ¶ms, sizeof(IoctlSubmit));
|
||||||
// Some games expect command_buffers to be written back
|
// Some games expect command_buffers to be written back
|
||||||
offset = sizeof(IoctlSubmit);
|
offset = sizeof(IoctlSubmit);
|
||||||
offset += WriteVectors(output, command_buffers, offset);
|
offset = WriteVectors(output, command_buffers, offset);
|
||||||
offset += WriteVectors(output, relocs, offset);
|
offset = WriteVectors(output, relocs, offset);
|
||||||
offset += WriteVectors(output, reloc_shifts, offset);
|
offset = WriteVectors(output, reloc_shifts, offset);
|
||||||
offset += WriteVectors(output, syncpt_increments, offset);
|
offset = WriteVectors(output, syncpt_increments, offset);
|
||||||
offset += WriteVectors(output, wait_checks, offset);
|
offset = WriteVectors(output, wait_checks, offset);
|
||||||
offset += WriteVectors(output, fences, offset);
|
offset = WriteVectors(output, fences, offset);
|
||||||
|
|
||||||
return NvResult::Success;
|
return NvResult::Success;
|
||||||
}
|
}
|
||||||
|
@ -153,10 +152,10 @@ NvResult nvhost_nvdec_common::MapBuffer(const std::vector<u8>& input, std::vecto
|
||||||
|
|
||||||
auto& gpu = system.GPU();
|
auto& gpu = system.GPU();
|
||||||
|
|
||||||
for (auto& cmd_buffer : cmd_buffer_handles) {
|
for (auto& cmf_buff : cmd_buffer_handles) {
|
||||||
auto object{nvmap_dev->GetObject(cmd_buffer.map_handle)};
|
auto object{nvmap_dev->GetObject(cmf_buff.map_handle)};
|
||||||
if (!object) {
|
if (!object) {
|
||||||
LOG_ERROR(Service_NVDRV, "invalid cmd_buffer nvmap_handle={:X}", cmd_buffer.map_handle);
|
LOG_ERROR(Service_NVDRV, "invalid cmd_buffer nvmap_handle={:X}", cmf_buff.map_handle);
|
||||||
std::memcpy(output.data(), ¶ms, output.size());
|
std::memcpy(output.data(), ¶ms, output.size());
|
||||||
return NvResult::InvalidState;
|
return NvResult::InvalidState;
|
||||||
}
|
}
|
||||||
|
@ -171,7 +170,7 @@ NvResult nvhost_nvdec_common::MapBuffer(const std::vector<u8>& input, std::vecto
|
||||||
if (!object->dma_map_addr) {
|
if (!object->dma_map_addr) {
|
||||||
LOG_ERROR(Service_NVDRV, "failed to map size={}", object->size);
|
LOG_ERROR(Service_NVDRV, "failed to map size={}", object->size);
|
||||||
} else {
|
} else {
|
||||||
cmd_buffer.map_address = object->dma_map_addr;
|
cmf_buff.map_address = object->dma_map_addr;
|
||||||
AddBufferMap(object->dma_map_addr, object->size, object->addr,
|
AddBufferMap(object->dma_map_addr, object->size, object->addr,
|
||||||
object->status == nvmap::Object::Status::Allocated);
|
object->status == nvmap::Object::Status::Allocated);
|
||||||
}
|
}
|
||||||
|
@ -191,10 +190,10 @@ NvResult nvhost_nvdec_common::UnmapBuffer(const std::vector<u8>& input, std::vec
|
||||||
|
|
||||||
auto& gpu = system.GPU();
|
auto& gpu = system.GPU();
|
||||||
|
|
||||||
for (auto& cmd_buffer : cmd_buffer_handles) {
|
for (auto& cmf_buff : cmd_buffer_handles) {
|
||||||
const auto object{nvmap_dev->GetObject(cmd_buffer.map_handle)};
|
const auto object{nvmap_dev->GetObject(cmf_buff.map_handle)};
|
||||||
if (!object) {
|
if (!object) {
|
||||||
LOG_ERROR(Service_NVDRV, "invalid cmd_buffer nvmap_handle={:X}", cmd_buffer.map_handle);
|
LOG_ERROR(Service_NVDRV, "invalid cmd_buffer nvmap_handle={:X}", cmf_buff.map_handle);
|
||||||
std::memcpy(output.data(), ¶ms, output.size());
|
std::memcpy(output.data(), ¶ms, output.size());
|
||||||
return NvResult::InvalidState;
|
return NvResult::InvalidState;
|
||||||
}
|
}
|
||||||
|
|
|
@ -293,6 +293,7 @@ endif()
|
||||||
if (MSVC)
|
if (MSVC)
|
||||||
target_compile_options(video_core PRIVATE
|
target_compile_options(video_core PRIVATE
|
||||||
/we4267 # 'var' : conversion from 'size_t' to 'type', possible loss of data
|
/we4267 # 'var' : conversion from 'size_t' to 'type', possible loss of data
|
||||||
|
/we4244 # 'var' : conversion from integer to 'type', possible loss of data
|
||||||
/we4456 # Declaration of 'identifier' hides previous local declaration
|
/we4456 # Declaration of 'identifier' hides previous local declaration
|
||||||
/we4457 # Declaration of 'identifier' hides function parameter
|
/we4457 # Declaration of 'identifier' hides function parameter
|
||||||
/we4458 # Declaration of 'identifier' hides class member
|
/we4458 # Declaration of 'identifier' hides class member
|
||||||
|
|
|
@ -99,7 +99,7 @@ class BufferCache {
|
||||||
};
|
};
|
||||||
|
|
||||||
public:
|
public:
|
||||||
static constexpr u32 DEFAULT_SKIP_CACHE_SIZE = 4_KiB;
|
static constexpr u32 DEFAULT_SKIP_CACHE_SIZE = static_cast<u32>(4_KiB);
|
||||||
|
|
||||||
explicit BufferCache(VideoCore::RasterizerInterface& rasterizer_,
|
explicit BufferCache(VideoCore::RasterizerInterface& rasterizer_,
|
||||||
Tegra::Engines::Maxwell3D& maxwell3d_,
|
Tegra::Engines::Maxwell3D& maxwell3d_,
|
||||||
|
@ -109,8 +109,6 @@ public:
|
||||||
|
|
||||||
void TickFrame();
|
void TickFrame();
|
||||||
|
|
||||||
void RunGarbageCollector();
|
|
||||||
|
|
||||||
void WriteMemory(VAddr cpu_addr, u64 size);
|
void WriteMemory(VAddr cpu_addr, u64 size);
|
||||||
|
|
||||||
void CachedWriteMemory(VAddr cpu_addr, u64 size);
|
void CachedWriteMemory(VAddr cpu_addr, u64 size);
|
||||||
|
@ -197,6 +195,8 @@ private:
|
||||||
((cpu_addr + size) & ~Core::Memory::PAGE_MASK);
|
((cpu_addr + size) & ~Core::Memory::PAGE_MASK);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void RunGarbageCollector();
|
||||||
|
|
||||||
void BindHostIndexBuffer();
|
void BindHostIndexBuffer();
|
||||||
|
|
||||||
void BindHostVertexBuffers();
|
void BindHostVertexBuffers();
|
||||||
|
@ -416,8 +416,9 @@ void BufferCache<P>::CachedWriteMemory(VAddr cpu_addr, u64 size) {
|
||||||
|
|
||||||
template <class P>
|
template <class P>
|
||||||
void BufferCache<P>::DownloadMemory(VAddr cpu_addr, u64 size) {
|
void BufferCache<P>::DownloadMemory(VAddr cpu_addr, u64 size) {
|
||||||
ForEachBufferInRange(cpu_addr, size,
|
ForEachBufferInRange(cpu_addr, size, [&](BufferId, Buffer& buffer) {
|
||||||
[&](BufferId, Buffer& buffer) { DownloadBufferMemory(buffer); });
|
DownloadBufferMemory(buffer, cpu_addr, size);
|
||||||
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
template <class P>
|
template <class P>
|
||||||
|
|
|
@ -14,10 +14,18 @@ extern "C" {
|
||||||
#pragma GCC diagnostic push
|
#pragma GCC diagnostic push
|
||||||
#pragma GCC diagnostic ignored "-Wconversion"
|
#pragma GCC diagnostic ignored "-Wconversion"
|
||||||
#endif
|
#endif
|
||||||
|
#ifdef _MSC_VER
|
||||||
|
#pragma warning(push)
|
||||||
|
#pragma warning(disable : 4242) // conversion from 'type' to 'type', possible loss of data
|
||||||
|
#pragma warning(disable : 4244) // conversion from 'type' to 'type', possible loss of data
|
||||||
|
#endif
|
||||||
#include <libavcodec/avcodec.h>
|
#include <libavcodec/avcodec.h>
|
||||||
#if defined(__GNUC__) || defined(__clang__)
|
#if defined(__GNUC__) || defined(__clang__)
|
||||||
#pragma GCC diagnostic pop
|
#pragma GCC diagnostic pop
|
||||||
#endif
|
#endif
|
||||||
|
#ifdef _MSC_VER
|
||||||
|
#pragma warning(pop)
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
namespace Tegra {
|
namespace Tegra {
|
||||||
|
|
|
@ -3,7 +3,28 @@
|
||||||
// Refer to the license.txt file included.
|
// Refer to the license.txt file included.
|
||||||
|
|
||||||
#include <array>
|
#include <array>
|
||||||
|
|
||||||
|
extern "C" {
|
||||||
|
#if defined(__GNUC__) || defined(__clang__)
|
||||||
|
#pragma GCC diagnostic push
|
||||||
|
#pragma GCC diagnostic ignored "-Wconversion"
|
||||||
|
#endif
|
||||||
|
#ifdef _MSC_VER
|
||||||
|
#pragma warning(disable : 4244) // conversion from 'type' to 'type', possible loss of data
|
||||||
|
#pragma warning(push)
|
||||||
|
#endif
|
||||||
|
#include <libswscale/swscale.h>
|
||||||
|
#if defined(__GNUC__) || defined(__clang__)
|
||||||
|
#pragma GCC diagnostic pop
|
||||||
|
#endif
|
||||||
|
#ifdef _MSC_VER
|
||||||
|
#pragma warning(pop)
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
#include "common/assert.h"
|
#include "common/assert.h"
|
||||||
|
#include "common/logging/log.h"
|
||||||
|
|
||||||
#include "video_core/command_classes/nvdec.h"
|
#include "video_core/command_classes/nvdec.h"
|
||||||
#include "video_core/command_classes/vic.h"
|
#include "video_core/command_classes/vic.h"
|
||||||
#include "video_core/engines/maxwell_3d.h"
|
#include "video_core/engines/maxwell_3d.h"
|
||||||
|
@ -11,10 +32,6 @@
|
||||||
#include "video_core/memory_manager.h"
|
#include "video_core/memory_manager.h"
|
||||||
#include "video_core/textures/decoders.h"
|
#include "video_core/textures/decoders.h"
|
||||||
|
|
||||||
extern "C" {
|
|
||||||
#include <libswscale/swscale.h>
|
|
||||||
}
|
|
||||||
|
|
||||||
namespace Tegra {
|
namespace Tegra {
|
||||||
|
|
||||||
Vic::Vic(GPU& gpu_, std::shared_ptr<Nvdec> nvdec_processor_)
|
Vic::Vic(GPU& gpu_, std::shared_ptr<Nvdec> nvdec_processor_)
|
||||||
|
|
|
@ -55,8 +55,9 @@ size_t BytesPerIndex(VkIndexType index_type) {
|
||||||
template <typename T>
|
template <typename T>
|
||||||
std::array<T, 6> MakeQuadIndices(u32 quad, u32 first) {
|
std::array<T, 6> MakeQuadIndices(u32 quad, u32 first) {
|
||||||
std::array<T, 6> indices{0, 1, 2, 0, 2, 3};
|
std::array<T, 6> indices{0, 1, 2, 0, 2, 3};
|
||||||
std::ranges::transform(indices, indices.begin(),
|
for (T& index : indices) {
|
||||||
[quad, first](u32 index) { return first + index + quad * 4; });
|
index = static_cast<T>(first + index + quad * 4);
|
||||||
|
}
|
||||||
return indices;
|
return indices;
|
||||||
}
|
}
|
||||||
} // Anonymous namespace
|
} // Anonymous namespace
|
||||||
|
|
|
@ -111,9 +111,6 @@ public:
|
||||||
/// Notify the cache that a new frame has been queued
|
/// Notify the cache that a new frame has been queued
|
||||||
void TickFrame();
|
void TickFrame();
|
||||||
|
|
||||||
/// Runs the Garbage Collector.
|
|
||||||
void RunGarbageCollector();
|
|
||||||
|
|
||||||
/// Return a constant reference to the given image view id
|
/// Return a constant reference to the given image view id
|
||||||
[[nodiscard]] const ImageView& GetImageView(ImageViewId id) const noexcept;
|
[[nodiscard]] const ImageView& GetImageView(ImageViewId id) const noexcept;
|
||||||
|
|
||||||
|
@ -226,6 +223,9 @@ private:
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Runs the Garbage Collector.
|
||||||
|
void RunGarbageCollector();
|
||||||
|
|
||||||
/// Fills image_view_ids in the image views in indices
|
/// Fills image_view_ids in the image views in indices
|
||||||
void FillImageViews(DescriptorTable<TICEntry>& table,
|
void FillImageViews(DescriptorTable<TICEntry>& table,
|
||||||
std::span<ImageViewId> cached_image_view_ids, std::span<const u32> indices,
|
std::span<ImageViewId> cached_image_view_ids, std::span<const u32> indices,
|
||||||
|
|
|
@ -842,9 +842,9 @@ void Device::CollectTelemetryParameters() {
|
||||||
|
|
||||||
void Device::CollectPhysicalMemoryInfo() {
|
void Device::CollectPhysicalMemoryInfo() {
|
||||||
const auto mem_properties = physical.GetMemoryProperties();
|
const auto mem_properties = physical.GetMemoryProperties();
|
||||||
const std::size_t num_properties = mem_properties.memoryHeapCount;
|
const size_t num_properties = mem_properties.memoryHeapCount;
|
||||||
device_access_memory = 0;
|
device_access_memory = 0;
|
||||||
for (std::size_t element = 0; element < num_properties; element++) {
|
for (size_t element = 0; element < num_properties; ++element) {
|
||||||
if ((mem_properties.memoryHeaps[element].flags & VK_MEMORY_HEAP_DEVICE_LOCAL_BIT) != 0) {
|
if ((mem_properties.memoryHeaps[element].flags & VK_MEMORY_HEAP_DEVICE_LOCAL_BIT) != 0) {
|
||||||
device_access_memory += mem_properties.memoryHeaps[element].size;
|
device_access_memory += mem_properties.memoryHeaps[element].size;
|
||||||
}
|
}
|
||||||
|
|
|
@ -278,7 +278,8 @@ private:
|
||||||
u32 graphics_family{}; ///< Main graphics queue family index.
|
u32 graphics_family{}; ///< Main graphics queue family index.
|
||||||
u32 present_family{}; ///< Main present queue family index.
|
u32 present_family{}; ///< Main present queue family index.
|
||||||
VkDriverIdKHR driver_id{}; ///< Driver ID.
|
VkDriverIdKHR driver_id{}; ///< Driver ID.
|
||||||
VkShaderStageFlags guest_warp_stages{}; ///< Stages where the guest warp size can be forced.ed
|
VkShaderStageFlags guest_warp_stages{}; ///< Stages where the guest warp size can be forced.
|
||||||
|
u64 device_access_memory{}; ///< Total size of device local memory in bytes.
|
||||||
bool is_optimal_astc_supported{}; ///< Support for native ASTC.
|
bool is_optimal_astc_supported{}; ///< Support for native ASTC.
|
||||||
bool is_float16_supported{}; ///< Support for float16 arithmetics.
|
bool is_float16_supported{}; ///< Support for float16 arithmetics.
|
||||||
bool is_warp_potentially_bigger{}; ///< Host warp size can be bigger than guest.
|
bool is_warp_potentially_bigger{}; ///< Host warp size can be bigger than guest.
|
||||||
|
@ -312,8 +313,6 @@ private:
|
||||||
|
|
||||||
/// Nsight Aftermath GPU crash tracker
|
/// Nsight Aftermath GPU crash tracker
|
||||||
std::unique_ptr<NsightAftermathTracker> nsight_aftermath_tracker;
|
std::unique_ptr<NsightAftermathTracker> nsight_aftermath_tracker;
|
||||||
|
|
||||||
u64 device_access_memory;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace Vulkan
|
} // namespace Vulkan
|
||||||
|
|
Loading…
Reference in a new issue