early-access version 1495
This commit is contained in:
parent
4a76db903f
commit
697bdb26cb
5 changed files with 39 additions and 26 deletions
|
@ -1,7 +1,7 @@
|
||||||
yuzu emulator early access
|
yuzu emulator early access
|
||||||
=============
|
=============
|
||||||
|
|
||||||
This is the source code for early-access 1494.
|
This is the source code for early-access 1495.
|
||||||
|
|
||||||
## Legal Notice
|
## Legal Notice
|
||||||
|
|
||||||
|
|
|
@ -299,28 +299,17 @@ struct System::Impl {
|
||||||
gpu_core->WaitIdle();
|
gpu_core->WaitIdle();
|
||||||
}
|
}
|
||||||
|
|
||||||
// Shutdown emulation session
|
|
||||||
services.reset();
|
services.reset();
|
||||||
service_manager.reset();
|
service_manager.reset();
|
||||||
cheat_engine.reset();
|
cheat_engine.reset();
|
||||||
telemetry_session.reset();
|
telemetry_session.reset();
|
||||||
|
|
||||||
// Close all CPU/threading state
|
|
||||||
cpu_manager.Shutdown();
|
cpu_manager.Shutdown();
|
||||||
|
|
||||||
// Release the Time Manager's resources
|
|
||||||
time_manager.Shutdown();
|
time_manager.Shutdown();
|
||||||
|
|
||||||
// Shutdown kernel and core timing
|
|
||||||
core_timing.Shutdown();
|
core_timing.Shutdown();
|
||||||
kernel.Shutdown();
|
|
||||||
|
|
||||||
// Close app loader
|
|
||||||
app_loader.reset();
|
app_loader.reset();
|
||||||
gpu_core.reset();
|
gpu_core.reset();
|
||||||
perf_stats.reset();
|
perf_stats.reset();
|
||||||
|
kernel.Shutdown();
|
||||||
// Clear all applets
|
|
||||||
applet_manager.ClearAll();
|
applet_manager.ClearAll();
|
||||||
|
|
||||||
LOG_DEBUG(Core, "Shutdown OK");
|
LOG_DEBUG(Core, "Shutdown OK");
|
||||||
|
|
|
@ -9,6 +9,7 @@
|
||||||
#include <deque>
|
#include <deque>
|
||||||
#include <memory>
|
#include <memory>
|
||||||
#include <mutex>
|
#include <mutex>
|
||||||
|
#include <numeric>
|
||||||
#include <span>
|
#include <span>
|
||||||
#include <unordered_map>
|
#include <unordered_map>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
@ -91,7 +92,7 @@ class BufferCache {
|
||||||
};
|
};
|
||||||
|
|
||||||
public:
|
public:
|
||||||
static constexpr u32 SKIP_CACHE_SIZE = 4096;
|
static constexpr u32 DEFAULT_SKIP_CACHE_SIZE = 4096;
|
||||||
|
|
||||||
explicit BufferCache(VideoCore::RasterizerInterface& rasterizer_,
|
explicit BufferCache(VideoCore::RasterizerInterface& rasterizer_,
|
||||||
Tegra::Engines::Maxwell3D& maxwell3d_,
|
Tegra::Engines::Maxwell3D& maxwell3d_,
|
||||||
|
@ -240,9 +241,9 @@ private:
|
||||||
template <bool insert>
|
template <bool insert>
|
||||||
void ChangeRegister(BufferId buffer_id);
|
void ChangeRegister(BufferId buffer_id);
|
||||||
|
|
||||||
void SynchronizeBuffer(Buffer& buffer, VAddr cpu_addr, u32 size);
|
bool SynchronizeBuffer(Buffer& buffer, VAddr cpu_addr, u32 size);
|
||||||
|
|
||||||
void SynchronizeBufferImpl(Buffer& buffer, VAddr cpu_addr, u32 size);
|
bool SynchronizeBufferImpl(Buffer& buffer, VAddr cpu_addr, u32 size);
|
||||||
|
|
||||||
void UploadMemory(Buffer& buffer, u64 total_size_bytes, u64 largest_copy,
|
void UploadMemory(Buffer& buffer, u64 total_size_bytes, u64 largest_copy,
|
||||||
std::span<BufferCopy> copies);
|
std::span<BufferCopy> copies);
|
||||||
|
@ -297,6 +298,11 @@ private:
|
||||||
|
|
||||||
std::array<u32, NUM_STAGES> fast_bound_uniform_buffers{};
|
std::array<u32, NUM_STAGES> fast_bound_uniform_buffers{};
|
||||||
|
|
||||||
|
std::array<u32, 16> uniform_cache_hits{};
|
||||||
|
std::array<u32, 16> uniform_cache_shots{};
|
||||||
|
|
||||||
|
u32 uniform_buffer_skip_cache_size = DEFAULT_SKIP_CACHE_SIZE;
|
||||||
|
|
||||||
bool has_deleted_buffers = false;
|
bool has_deleted_buffers = false;
|
||||||
|
|
||||||
std::conditional_t<HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS, std::array<u32, NUM_STAGES>, Empty>
|
std::conditional_t<HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS, std::array<u32, NUM_STAGES>, Empty>
|
||||||
|
@ -328,6 +334,19 @@ BufferCache<P>::BufferCache(VideoCore::RasterizerInterface& rasterizer_,
|
||||||
|
|
||||||
template <class P>
|
template <class P>
|
||||||
void BufferCache<P>::TickFrame() {
|
void BufferCache<P>::TickFrame() {
|
||||||
|
// Calculate hits and shots and move hit bits to the right
|
||||||
|
const u32 hits = std::reduce(uniform_cache_hits.begin(), uniform_cache_hits.end());
|
||||||
|
const u32 shots = std::reduce(uniform_cache_shots.begin(), uniform_cache_shots.end());
|
||||||
|
std::copy_n(uniform_cache_hits.begin(), uniform_cache_hits.size() - 1,
|
||||||
|
uniform_cache_hits.begin() + 1);
|
||||||
|
std::copy_n(uniform_cache_shots.begin(), uniform_cache_shots.size() - 1,
|
||||||
|
uniform_cache_shots.begin() + 1);
|
||||||
|
uniform_cache_hits[0] = 0;
|
||||||
|
uniform_cache_shots[0] = 0;
|
||||||
|
|
||||||
|
const bool skip_preferred = hits * 256 < shots * 251;
|
||||||
|
uniform_buffer_skip_cache_size = skip_preferred ? DEFAULT_SKIP_CACHE_SIZE : 0;
|
||||||
|
|
||||||
delayed_destruction_ring.Tick();
|
delayed_destruction_ring.Tick();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -671,7 +690,7 @@ void BufferCache<P>::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32
|
||||||
const VAddr cpu_addr = binding.cpu_addr;
|
const VAddr cpu_addr = binding.cpu_addr;
|
||||||
const u32 size = binding.size;
|
const u32 size = binding.size;
|
||||||
Buffer& buffer = slot_buffers[binding.buffer_id];
|
Buffer& buffer = slot_buffers[binding.buffer_id];
|
||||||
if (size <= SKIP_CACHE_SIZE && !buffer.IsRegionGpuModified(cpu_addr, size)) {
|
if (size <= uniform_buffer_skip_cache_size && !buffer.IsRegionGpuModified(cpu_addr, size)) {
|
||||||
if constexpr (IS_OPENGL) {
|
if constexpr (IS_OPENGL) {
|
||||||
if (runtime.HasFastBufferSubData()) {
|
if (runtime.HasFastBufferSubData()) {
|
||||||
// Fast path for Nvidia
|
// Fast path for Nvidia
|
||||||
|
@ -692,7 +711,12 @@ void BufferCache<P>::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
// Classic cached path
|
// Classic cached path
|
||||||
SynchronizeBuffer(buffer, cpu_addr, size);
|
const bool sync_cached = SynchronizeBuffer(buffer, cpu_addr, size);
|
||||||
|
if (sync_cached) {
|
||||||
|
++uniform_cache_hits[0];
|
||||||
|
}
|
||||||
|
++uniform_cache_shots[0];
|
||||||
|
|
||||||
if (!needs_bind && !HasFastUniformBufferBound(stage, binding_index)) {
|
if (!needs_bind && !HasFastUniformBufferBound(stage, binding_index)) {
|
||||||
// Skip binding if it's not needed and if the bound buffer is not the fast version
|
// Skip binding if it's not needed and if the bound buffer is not the fast version
|
||||||
// This exists to avoid instances where the fast buffer is bound and a GPU write happens
|
// This exists to avoid instances where the fast buffer is bound and a GPU write happens
|
||||||
|
@ -1106,15 +1130,15 @@ void BufferCache<P>::ChangeRegister(BufferId buffer_id) {
|
||||||
}
|
}
|
||||||
|
|
||||||
template <class P>
|
template <class P>
|
||||||
void BufferCache<P>::SynchronizeBuffer(Buffer& buffer, VAddr cpu_addr, u32 size) {
|
bool BufferCache<P>::SynchronizeBuffer(Buffer& buffer, VAddr cpu_addr, u32 size) {
|
||||||
if (buffer.CpuAddr() == 0) {
|
if (buffer.CpuAddr() == 0) {
|
||||||
return;
|
return true;
|
||||||
}
|
}
|
||||||
SynchronizeBufferImpl(buffer, cpu_addr, size);
|
return SynchronizeBufferImpl(buffer, cpu_addr, size);
|
||||||
}
|
}
|
||||||
|
|
||||||
template <class P>
|
template <class P>
|
||||||
void BufferCache<P>::SynchronizeBufferImpl(Buffer& buffer, VAddr cpu_addr, u32 size) {
|
bool BufferCache<P>::SynchronizeBufferImpl(Buffer& buffer, VAddr cpu_addr, u32 size) {
|
||||||
boost::container::small_vector<BufferCopy, 4> copies;
|
boost::container::small_vector<BufferCopy, 4> copies;
|
||||||
u64 total_size_bytes = 0;
|
u64 total_size_bytes = 0;
|
||||||
u64 largest_copy = 0;
|
u64 largest_copy = 0;
|
||||||
|
@ -1128,10 +1152,11 @@ void BufferCache<P>::SynchronizeBufferImpl(Buffer& buffer, VAddr cpu_addr, u32 s
|
||||||
largest_copy = std::max(largest_copy, range_size);
|
largest_copy = std::max(largest_copy, range_size);
|
||||||
});
|
});
|
||||||
if (total_size_bytes == 0) {
|
if (total_size_bytes == 0) {
|
||||||
return;
|
return true;
|
||||||
}
|
}
|
||||||
const std::span<BufferCopy> copies_span(copies.data(), copies.size());
|
const std::span<BufferCopy> copies_span(copies.data(), copies.size());
|
||||||
UploadMemory(buffer, total_size_bytes, largest_copy, copies_span);
|
UploadMemory(buffer, total_size_bytes, largest_copy, copies_span);
|
||||||
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
template <class P>
|
template <class P>
|
||||||
|
|
|
@ -73,7 +73,8 @@ BufferCacheRuntime::BufferCacheRuntime(const Device& device_)
|
||||||
for (auto& stage_uniforms : fast_uniforms) {
|
for (auto& stage_uniforms : fast_uniforms) {
|
||||||
for (OGLBuffer& buffer : stage_uniforms) {
|
for (OGLBuffer& buffer : stage_uniforms) {
|
||||||
buffer.Create();
|
buffer.Create();
|
||||||
glNamedBufferData(buffer.handle, BufferCache::SKIP_CACHE_SIZE, nullptr, GL_STREAM_DRAW);
|
glNamedBufferData(buffer.handle, BufferCache::DEFAULT_SKIP_CACHE_SIZE, nullptr,
|
||||||
|
GL_STREAM_DRAW);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
for (auto& stage_uniforms : copy_uniforms) {
|
for (auto& stage_uniforms : copy_uniforms) {
|
||||||
|
|
|
@ -501,10 +501,8 @@ void ASTCDecoderPass::Assemble(Image& image, const StagingBufferRef& map,
|
||||||
const u32 num_dispatches_x = Common::DivCeil(swizzle.num_tiles.width, 32U);
|
const u32 num_dispatches_x = Common::DivCeil(swizzle.num_tiles.width, 32U);
|
||||||
const u32 num_dispatches_y = Common::DivCeil(swizzle.num_tiles.height, 32U);
|
const u32 num_dispatches_y = Common::DivCeil(swizzle.num_tiles.height, 32U);
|
||||||
const u32 num_dispatches_z = image.info.resources.layers;
|
const u32 num_dispatches_z = image.info.resources.layers;
|
||||||
const std::array num_image_blocks{swizzle.num_tiles.width, swizzle.num_tiles.height};
|
|
||||||
|
|
||||||
update_descriptor_queue.Acquire();
|
update_descriptor_queue.Acquire();
|
||||||
|
|
||||||
update_descriptor_queue.AddBuffer(map.buffer, input_offset,
|
update_descriptor_queue.AddBuffer(map.buffer, input_offset,
|
||||||
image.guest_size_bytes - swizzle.buffer_offset);
|
image.guest_size_bytes - swizzle.buffer_offset);
|
||||||
update_descriptor_queue.AddBuffer(*data_buffer, offsetof(AstcBufferData, encoding_values),
|
update_descriptor_queue.AddBuffer(*data_buffer, offsetof(AstcBufferData, encoding_values),
|
||||||
|
|
Loading…
Reference in a new issue