Merge pull request #6897 from FernandoS27/pineapple-does-not-belong-in-pizza
Project <tentative title>: Rework Garbage Collection.
This commit is contained in:
commit
956171f024
13 changed files with 220 additions and 126 deletions
140
src/common/lru_cache.h
Normal file
140
src/common/lru_cache.h
Normal file
|
@ -0,0 +1,140 @@
|
||||||
|
// Copyright 2021 yuzu Emulator Project
|
||||||
|
// Licensed under GPLv2+ or any later version
|
||||||
|
// Refer to the license.txt file included.
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <deque>
|
||||||
|
#include <memory>
|
||||||
|
#include <type_traits>
|
||||||
|
|
||||||
|
#include "common/common_types.h"
|
||||||
|
|
||||||
|
namespace Common {
|
||||||
|
|
||||||
|
template <class Traits>
|
||||||
|
class LeastRecentlyUsedCache {
|
||||||
|
using ObjectType = typename Traits::ObjectType;
|
||||||
|
using TickType = typename Traits::TickType;
|
||||||
|
|
||||||
|
struct Item {
|
||||||
|
ObjectType obj;
|
||||||
|
TickType tick;
|
||||||
|
Item* next{};
|
||||||
|
Item* prev{};
|
||||||
|
};
|
||||||
|
|
||||||
|
public:
|
||||||
|
LeastRecentlyUsedCache() : first_item{}, last_item{} {}
|
||||||
|
~LeastRecentlyUsedCache() = default;
|
||||||
|
|
||||||
|
size_t Insert(ObjectType obj, TickType tick) {
|
||||||
|
const auto new_id = Build();
|
||||||
|
auto& item = item_pool[new_id];
|
||||||
|
item.obj = obj;
|
||||||
|
item.tick = tick;
|
||||||
|
Attach(item);
|
||||||
|
return new_id;
|
||||||
|
}
|
||||||
|
|
||||||
|
void Touch(size_t id, TickType tick) {
|
||||||
|
auto& item = item_pool[id];
|
||||||
|
if (item.tick >= tick) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
item.tick = tick;
|
||||||
|
if (&item == last_item) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
Detach(item);
|
||||||
|
Attach(item);
|
||||||
|
}
|
||||||
|
|
||||||
|
void Free(size_t id) {
|
||||||
|
auto& item = item_pool[id];
|
||||||
|
Detach(item);
|
||||||
|
item.prev = nullptr;
|
||||||
|
item.next = nullptr;
|
||||||
|
free_items.push_back(id);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename Func>
|
||||||
|
void ForEachItemBelow(TickType tick, Func&& func) {
|
||||||
|
static constexpr bool RETURNS_BOOL =
|
||||||
|
std::is_same_v<std::invoke_result<Func, ObjectType>, bool>;
|
||||||
|
Item* iterator = first_item;
|
||||||
|
while (iterator) {
|
||||||
|
if (static_cast<s64>(tick) - static_cast<s64>(iterator->tick) < 0) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
Item* next = iterator->next;
|
||||||
|
if constexpr (RETURNS_BOOL) {
|
||||||
|
if (func(iterator->obj)) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
func(iterator->obj);
|
||||||
|
}
|
||||||
|
iterator = next;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
size_t Build() {
|
||||||
|
if (free_items.empty()) {
|
||||||
|
const size_t item_id = item_pool.size();
|
||||||
|
auto& item = item_pool.emplace_back();
|
||||||
|
item.next = nullptr;
|
||||||
|
item.prev = nullptr;
|
||||||
|
return item_id;
|
||||||
|
}
|
||||||
|
const size_t item_id = free_items.front();
|
||||||
|
free_items.pop_front();
|
||||||
|
auto& item = item_pool[item_id];
|
||||||
|
item.next = nullptr;
|
||||||
|
item.prev = nullptr;
|
||||||
|
return item_id;
|
||||||
|
}
|
||||||
|
|
||||||
|
void Attach(Item& item) {
|
||||||
|
if (!first_item) {
|
||||||
|
first_item = &item;
|
||||||
|
}
|
||||||
|
if (!last_item) {
|
||||||
|
last_item = &item;
|
||||||
|
} else {
|
||||||
|
item.prev = last_item;
|
||||||
|
last_item->next = &item;
|
||||||
|
item.next = nullptr;
|
||||||
|
last_item = &item;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void Detach(Item& item) {
|
||||||
|
if (item.prev) {
|
||||||
|
item.prev->next = item.next;
|
||||||
|
}
|
||||||
|
if (item.next) {
|
||||||
|
item.next->prev = item.prev;
|
||||||
|
}
|
||||||
|
if (&item == first_item) {
|
||||||
|
first_item = item.next;
|
||||||
|
if (first_item) {
|
||||||
|
first_item->prev = nullptr;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (&item == last_item) {
|
||||||
|
last_item = item.prev;
|
||||||
|
if (last_item) {
|
||||||
|
last_item->next = nullptr;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
std::deque<Item> item_pool;
|
||||||
|
std::deque<size_t> free_items;
|
||||||
|
Item* first_item{};
|
||||||
|
Item* last_item{};
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace Common
|
|
@ -59,7 +59,6 @@ void LogSettings() {
|
||||||
log_setting("Renderer_UseVsync", values.use_vsync.GetValue());
|
log_setting("Renderer_UseVsync", values.use_vsync.GetValue());
|
||||||
log_setting("Renderer_ShaderBackend", values.shader_backend.GetValue());
|
log_setting("Renderer_ShaderBackend", values.shader_backend.GetValue());
|
||||||
log_setting("Renderer_UseAsynchronousShaders", values.use_asynchronous_shaders.GetValue());
|
log_setting("Renderer_UseAsynchronousShaders", values.use_asynchronous_shaders.GetValue());
|
||||||
log_setting("Renderer_UseGarbageCollection", values.use_caches_gc.GetValue());
|
|
||||||
log_setting("Renderer_AnisotropicFilteringLevel", values.max_anisotropy.GetValue());
|
log_setting("Renderer_AnisotropicFilteringLevel", values.max_anisotropy.GetValue());
|
||||||
log_setting("Audio_OutputEngine", values.sink_id.GetValue());
|
log_setting("Audio_OutputEngine", values.sink_id.GetValue());
|
||||||
log_setting("Audio_EnableAudioStretching", values.enable_audio_stretching.GetValue());
|
log_setting("Audio_EnableAudioStretching", values.enable_audio_stretching.GetValue());
|
||||||
|
@ -143,7 +142,6 @@ void RestoreGlobalState(bool is_powered_on) {
|
||||||
values.shader_backend.SetGlobal(true);
|
values.shader_backend.SetGlobal(true);
|
||||||
values.use_asynchronous_shaders.SetGlobal(true);
|
values.use_asynchronous_shaders.SetGlobal(true);
|
||||||
values.use_fast_gpu_time.SetGlobal(true);
|
values.use_fast_gpu_time.SetGlobal(true);
|
||||||
values.use_caches_gc.SetGlobal(true);
|
|
||||||
values.bg_red.SetGlobal(true);
|
values.bg_red.SetGlobal(true);
|
||||||
values.bg_green.SetGlobal(true);
|
values.bg_green.SetGlobal(true);
|
||||||
values.bg_blue.SetGlobal(true);
|
values.bg_blue.SetGlobal(true);
|
||||||
|
|
|
@ -475,7 +475,6 @@ struct Values {
|
||||||
ShaderBackend::SPIRV, "shader_backend"};
|
ShaderBackend::SPIRV, "shader_backend"};
|
||||||
Setting<bool> use_asynchronous_shaders{false, "use_asynchronous_shaders"};
|
Setting<bool> use_asynchronous_shaders{false, "use_asynchronous_shaders"};
|
||||||
Setting<bool> use_fast_gpu_time{true, "use_fast_gpu_time"};
|
Setting<bool> use_fast_gpu_time{true, "use_fast_gpu_time"};
|
||||||
Setting<bool> use_caches_gc{false, "use_caches_gc"};
|
|
||||||
|
|
||||||
Setting<u8> bg_red{0, "bg_red"};
|
Setting<u8> bg_red{0, "bg_red"};
|
||||||
Setting<u8> bg_green{0, "bg_green"};
|
Setting<u8> bg_green{0, "bg_green"};
|
||||||
|
|
|
@ -261,16 +261,6 @@ public:
|
||||||
stream_score += score;
|
stream_score += score;
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Sets the new frame tick
|
|
||||||
void SetFrameTick(u64 new_frame_tick) noexcept {
|
|
||||||
frame_tick = new_frame_tick;
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Returns the new frame tick
|
|
||||||
[[nodiscard]] u64 FrameTick() const noexcept {
|
|
||||||
return frame_tick;
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Returns the likeliness of this being a stream buffer
|
/// Returns the likeliness of this being a stream buffer
|
||||||
[[nodiscard]] int StreamScore() const noexcept {
|
[[nodiscard]] int StreamScore() const noexcept {
|
||||||
return stream_score;
|
return stream_score;
|
||||||
|
@ -307,6 +297,14 @@ public:
|
||||||
return words.size_bytes;
|
return words.size_bytes;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
size_t getLRUID() const noexcept {
|
||||||
|
return lru_id;
|
||||||
|
}
|
||||||
|
|
||||||
|
void setLRUID(size_t lru_id_) {
|
||||||
|
lru_id = lru_id_;
|
||||||
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
template <Type type>
|
template <Type type>
|
||||||
u64* Array() noexcept {
|
u64* Array() noexcept {
|
||||||
|
@ -603,9 +601,9 @@ private:
|
||||||
RasterizerInterface* rasterizer = nullptr;
|
RasterizerInterface* rasterizer = nullptr;
|
||||||
VAddr cpu_addr = 0;
|
VAddr cpu_addr = 0;
|
||||||
Words words;
|
Words words;
|
||||||
u64 frame_tick = 0;
|
|
||||||
BufferFlagBits flags{};
|
BufferFlagBits flags{};
|
||||||
int stream_score = 0;
|
int stream_score = 0;
|
||||||
|
size_t lru_id = SIZE_MAX;
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace VideoCommon
|
} // namespace VideoCommon
|
||||||
|
|
|
@ -20,6 +20,7 @@
|
||||||
#include "common/common_types.h"
|
#include "common/common_types.h"
|
||||||
#include "common/div_ceil.h"
|
#include "common/div_ceil.h"
|
||||||
#include "common/literals.h"
|
#include "common/literals.h"
|
||||||
|
#include "common/lru_cache.h"
|
||||||
#include "common/microprofile.h"
|
#include "common/microprofile.h"
|
||||||
#include "common/scope_exit.h"
|
#include "common/scope_exit.h"
|
||||||
#include "common/settings.h"
|
#include "common/settings.h"
|
||||||
|
@ -330,7 +331,7 @@ private:
|
||||||
template <bool insert>
|
template <bool insert>
|
||||||
void ChangeRegister(BufferId buffer_id);
|
void ChangeRegister(BufferId buffer_id);
|
||||||
|
|
||||||
void TouchBuffer(Buffer& buffer) const noexcept;
|
void TouchBuffer(Buffer& buffer, BufferId buffer_id) noexcept;
|
||||||
|
|
||||||
bool SynchronizeBuffer(Buffer& buffer, VAddr cpu_addr, u32 size);
|
bool SynchronizeBuffer(Buffer& buffer, VAddr cpu_addr, u32 size);
|
||||||
|
|
||||||
|
@ -428,7 +429,11 @@ private:
|
||||||
size_t immediate_buffer_capacity = 0;
|
size_t immediate_buffer_capacity = 0;
|
||||||
std::unique_ptr<u8[]> immediate_buffer_alloc;
|
std::unique_ptr<u8[]> immediate_buffer_alloc;
|
||||||
|
|
||||||
typename SlotVector<Buffer>::Iterator deletion_iterator;
|
struct LRUItemParams {
|
||||||
|
using ObjectType = BufferId;
|
||||||
|
using TickType = u64;
|
||||||
|
};
|
||||||
|
Common::LeastRecentlyUsedCache<LRUItemParams> lru_cache;
|
||||||
u64 frame_tick = 0;
|
u64 frame_tick = 0;
|
||||||
u64 total_used_memory = 0;
|
u64 total_used_memory = 0;
|
||||||
|
|
||||||
|
@ -445,7 +450,6 @@ BufferCache<P>::BufferCache(VideoCore::RasterizerInterface& rasterizer_,
|
||||||
kepler_compute{kepler_compute_}, gpu_memory{gpu_memory_}, cpu_memory{cpu_memory_} {
|
kepler_compute{kepler_compute_}, gpu_memory{gpu_memory_}, cpu_memory{cpu_memory_} {
|
||||||
// Ensure the first slot is used for the null buffer
|
// Ensure the first slot is used for the null buffer
|
||||||
void(slot_buffers.insert(runtime, NullBufferParams{}));
|
void(slot_buffers.insert(runtime, NullBufferParams{}));
|
||||||
deletion_iterator = slot_buffers.end();
|
|
||||||
common_ranges.clear();
|
common_ranges.clear();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -454,20 +458,17 @@ void BufferCache<P>::RunGarbageCollector() {
|
||||||
const bool aggressive_gc = total_used_memory >= CRITICAL_MEMORY;
|
const bool aggressive_gc = total_used_memory >= CRITICAL_MEMORY;
|
||||||
const u64 ticks_to_destroy = aggressive_gc ? 60 : 120;
|
const u64 ticks_to_destroy = aggressive_gc ? 60 : 120;
|
||||||
int num_iterations = aggressive_gc ? 64 : 32;
|
int num_iterations = aggressive_gc ? 64 : 32;
|
||||||
for (; num_iterations > 0; --num_iterations) {
|
const auto clean_up = [this, &num_iterations](BufferId buffer_id) {
|
||||||
if (deletion_iterator == slot_buffers.end()) {
|
if (num_iterations == 0) {
|
||||||
deletion_iterator = slot_buffers.begin();
|
return true;
|
||||||
}
|
}
|
||||||
++deletion_iterator;
|
--num_iterations;
|
||||||
if (deletion_iterator == slot_buffers.end()) {
|
auto& buffer = slot_buffers[buffer_id];
|
||||||
break;
|
DownloadBufferMemory(buffer);
|
||||||
}
|
DeleteBuffer(buffer_id);
|
||||||
const auto [buffer_id, buffer] = *deletion_iterator;
|
return false;
|
||||||
if (buffer->FrameTick() + ticks_to_destroy < frame_tick) {
|
};
|
||||||
DownloadBufferMemory(*buffer);
|
lru_cache.ForEachItemBelow(frame_tick - ticks_to_destroy, clean_up);
|
||||||
DeleteBuffer(buffer_id);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
template <class P>
|
template <class P>
|
||||||
|
@ -485,7 +486,7 @@ void BufferCache<P>::TickFrame() {
|
||||||
const bool skip_preferred = hits * 256 < shots * 251;
|
const bool skip_preferred = hits * 256 < shots * 251;
|
||||||
uniform_buffer_skip_cache_size = skip_preferred ? DEFAULT_SKIP_CACHE_SIZE : 0;
|
uniform_buffer_skip_cache_size = skip_preferred ? DEFAULT_SKIP_CACHE_SIZE : 0;
|
||||||
|
|
||||||
if (Settings::values.use_caches_gc.GetValue() && total_used_memory >= EXPECTED_MEMORY) {
|
if (total_used_memory >= EXPECTED_MEMORY) {
|
||||||
RunGarbageCollector();
|
RunGarbageCollector();
|
||||||
}
|
}
|
||||||
++frame_tick;
|
++frame_tick;
|
||||||
|
@ -954,7 +955,7 @@ bool BufferCache<P>::IsRegionCpuModified(VAddr addr, size_t size) {
|
||||||
template <class P>
|
template <class P>
|
||||||
void BufferCache<P>::BindHostIndexBuffer() {
|
void BufferCache<P>::BindHostIndexBuffer() {
|
||||||
Buffer& buffer = slot_buffers[index_buffer.buffer_id];
|
Buffer& buffer = slot_buffers[index_buffer.buffer_id];
|
||||||
TouchBuffer(buffer);
|
TouchBuffer(buffer, index_buffer.buffer_id);
|
||||||
const u32 offset = buffer.Offset(index_buffer.cpu_addr);
|
const u32 offset = buffer.Offset(index_buffer.cpu_addr);
|
||||||
const u32 size = index_buffer.size;
|
const u32 size = index_buffer.size;
|
||||||
SynchronizeBuffer(buffer, index_buffer.cpu_addr, size);
|
SynchronizeBuffer(buffer, index_buffer.cpu_addr, size);
|
||||||
|
@ -975,7 +976,7 @@ void BufferCache<P>::BindHostVertexBuffers() {
|
||||||
for (u32 index = 0; index < NUM_VERTEX_BUFFERS; ++index) {
|
for (u32 index = 0; index < NUM_VERTEX_BUFFERS; ++index) {
|
||||||
const Binding& binding = vertex_buffers[index];
|
const Binding& binding = vertex_buffers[index];
|
||||||
Buffer& buffer = slot_buffers[binding.buffer_id];
|
Buffer& buffer = slot_buffers[binding.buffer_id];
|
||||||
TouchBuffer(buffer);
|
TouchBuffer(buffer, binding.buffer_id);
|
||||||
SynchronizeBuffer(buffer, binding.cpu_addr, binding.size);
|
SynchronizeBuffer(buffer, binding.cpu_addr, binding.size);
|
||||||
if (!flags[Dirty::VertexBuffer0 + index]) {
|
if (!flags[Dirty::VertexBuffer0 + index]) {
|
||||||
continue;
|
continue;
|
||||||
|
@ -1011,7 +1012,7 @@ void BufferCache<P>::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32
|
||||||
const VAddr cpu_addr = binding.cpu_addr;
|
const VAddr cpu_addr = binding.cpu_addr;
|
||||||
const u32 size = std::min(binding.size, (*uniform_buffer_sizes)[stage][index]);
|
const u32 size = std::min(binding.size, (*uniform_buffer_sizes)[stage][index]);
|
||||||
Buffer& buffer = slot_buffers[binding.buffer_id];
|
Buffer& buffer = slot_buffers[binding.buffer_id];
|
||||||
TouchBuffer(buffer);
|
TouchBuffer(buffer, binding.buffer_id);
|
||||||
const bool use_fast_buffer = binding.buffer_id != NULL_BUFFER_ID &&
|
const bool use_fast_buffer = binding.buffer_id != NULL_BUFFER_ID &&
|
||||||
size <= uniform_buffer_skip_cache_size &&
|
size <= uniform_buffer_skip_cache_size &&
|
||||||
!buffer.IsRegionGpuModified(cpu_addr, size);
|
!buffer.IsRegionGpuModified(cpu_addr, size);
|
||||||
|
@ -1083,7 +1084,7 @@ void BufferCache<P>::BindHostGraphicsStorageBuffers(size_t stage) {
|
||||||
ForEachEnabledBit(enabled_storage_buffers[stage], [&](u32 index) {
|
ForEachEnabledBit(enabled_storage_buffers[stage], [&](u32 index) {
|
||||||
const Binding& binding = storage_buffers[stage][index];
|
const Binding& binding = storage_buffers[stage][index];
|
||||||
Buffer& buffer = slot_buffers[binding.buffer_id];
|
Buffer& buffer = slot_buffers[binding.buffer_id];
|
||||||
TouchBuffer(buffer);
|
TouchBuffer(buffer, binding.buffer_id);
|
||||||
const u32 size = binding.size;
|
const u32 size = binding.size;
|
||||||
SynchronizeBuffer(buffer, binding.cpu_addr, size);
|
SynchronizeBuffer(buffer, binding.cpu_addr, size);
|
||||||
|
|
||||||
|
@ -1128,7 +1129,7 @@ void BufferCache<P>::BindHostTransformFeedbackBuffers() {
|
||||||
for (u32 index = 0; index < NUM_TRANSFORM_FEEDBACK_BUFFERS; ++index) {
|
for (u32 index = 0; index < NUM_TRANSFORM_FEEDBACK_BUFFERS; ++index) {
|
||||||
const Binding& binding = transform_feedback_buffers[index];
|
const Binding& binding = transform_feedback_buffers[index];
|
||||||
Buffer& buffer = slot_buffers[binding.buffer_id];
|
Buffer& buffer = slot_buffers[binding.buffer_id];
|
||||||
TouchBuffer(buffer);
|
TouchBuffer(buffer, binding.buffer_id);
|
||||||
const u32 size = binding.size;
|
const u32 size = binding.size;
|
||||||
SynchronizeBuffer(buffer, binding.cpu_addr, size);
|
SynchronizeBuffer(buffer, binding.cpu_addr, size);
|
||||||
|
|
||||||
|
@ -1148,7 +1149,7 @@ void BufferCache<P>::BindHostComputeUniformBuffers() {
|
||||||
ForEachEnabledBit(enabled_compute_uniform_buffer_mask, [&](u32 index) {
|
ForEachEnabledBit(enabled_compute_uniform_buffer_mask, [&](u32 index) {
|
||||||
const Binding& binding = compute_uniform_buffers[index];
|
const Binding& binding = compute_uniform_buffers[index];
|
||||||
Buffer& buffer = slot_buffers[binding.buffer_id];
|
Buffer& buffer = slot_buffers[binding.buffer_id];
|
||||||
TouchBuffer(buffer);
|
TouchBuffer(buffer, binding.buffer_id);
|
||||||
const u32 size = std::min(binding.size, (*compute_uniform_buffer_sizes)[index]);
|
const u32 size = std::min(binding.size, (*compute_uniform_buffer_sizes)[index]);
|
||||||
SynchronizeBuffer(buffer, binding.cpu_addr, size);
|
SynchronizeBuffer(buffer, binding.cpu_addr, size);
|
||||||
|
|
||||||
|
@ -1168,7 +1169,7 @@ void BufferCache<P>::BindHostComputeStorageBuffers() {
|
||||||
ForEachEnabledBit(enabled_compute_storage_buffers, [&](u32 index) {
|
ForEachEnabledBit(enabled_compute_storage_buffers, [&](u32 index) {
|
||||||
const Binding& binding = compute_storage_buffers[index];
|
const Binding& binding = compute_storage_buffers[index];
|
||||||
Buffer& buffer = slot_buffers[binding.buffer_id];
|
Buffer& buffer = slot_buffers[binding.buffer_id];
|
||||||
TouchBuffer(buffer);
|
TouchBuffer(buffer, binding.buffer_id);
|
||||||
const u32 size = binding.size;
|
const u32 size = binding.size;
|
||||||
SynchronizeBuffer(buffer, binding.cpu_addr, size);
|
SynchronizeBuffer(buffer, binding.cpu_addr, size);
|
||||||
|
|
||||||
|
@ -1513,11 +1514,11 @@ BufferId BufferCache<P>::CreateBuffer(VAddr cpu_addr, u32 wanted_size) {
|
||||||
const OverlapResult overlap = ResolveOverlaps(cpu_addr, wanted_size);
|
const OverlapResult overlap = ResolveOverlaps(cpu_addr, wanted_size);
|
||||||
const u32 size = static_cast<u32>(overlap.end - overlap.begin);
|
const u32 size = static_cast<u32>(overlap.end - overlap.begin);
|
||||||
const BufferId new_buffer_id = slot_buffers.insert(runtime, rasterizer, overlap.begin, size);
|
const BufferId new_buffer_id = slot_buffers.insert(runtime, rasterizer, overlap.begin, size);
|
||||||
TouchBuffer(slot_buffers[new_buffer_id]);
|
|
||||||
for (const BufferId overlap_id : overlap.ids) {
|
for (const BufferId overlap_id : overlap.ids) {
|
||||||
JoinOverlap(new_buffer_id, overlap_id, !overlap.has_stream_leap);
|
JoinOverlap(new_buffer_id, overlap_id, !overlap.has_stream_leap);
|
||||||
}
|
}
|
||||||
Register(new_buffer_id);
|
Register(new_buffer_id);
|
||||||
|
TouchBuffer(slot_buffers[new_buffer_id], new_buffer_id);
|
||||||
return new_buffer_id;
|
return new_buffer_id;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1534,12 +1535,14 @@ void BufferCache<P>::Unregister(BufferId buffer_id) {
|
||||||
template <class P>
|
template <class P>
|
||||||
template <bool insert>
|
template <bool insert>
|
||||||
void BufferCache<P>::ChangeRegister(BufferId buffer_id) {
|
void BufferCache<P>::ChangeRegister(BufferId buffer_id) {
|
||||||
const Buffer& buffer = slot_buffers[buffer_id];
|
Buffer& buffer = slot_buffers[buffer_id];
|
||||||
const auto size = buffer.SizeBytes();
|
const auto size = buffer.SizeBytes();
|
||||||
if (insert) {
|
if (insert) {
|
||||||
total_used_memory += Common::AlignUp(size, 1024);
|
total_used_memory += Common::AlignUp(size, 1024);
|
||||||
|
buffer.setLRUID(lru_cache.Insert(buffer_id, frame_tick));
|
||||||
} else {
|
} else {
|
||||||
total_used_memory -= Common::AlignUp(size, 1024);
|
total_used_memory -= Common::AlignUp(size, 1024);
|
||||||
|
lru_cache.Free(buffer.getLRUID());
|
||||||
}
|
}
|
||||||
const VAddr cpu_addr_begin = buffer.CpuAddr();
|
const VAddr cpu_addr_begin = buffer.CpuAddr();
|
||||||
const VAddr cpu_addr_end = cpu_addr_begin + size;
|
const VAddr cpu_addr_end = cpu_addr_begin + size;
|
||||||
|
@ -1555,8 +1558,10 @@ void BufferCache<P>::ChangeRegister(BufferId buffer_id) {
|
||||||
}
|
}
|
||||||
|
|
||||||
template <class P>
|
template <class P>
|
||||||
void BufferCache<P>::TouchBuffer(Buffer& buffer) const noexcept {
|
void BufferCache<P>::TouchBuffer(Buffer& buffer, BufferId buffer_id) noexcept {
|
||||||
buffer.SetFrameTick(frame_tick);
|
if (buffer_id != NULL_BUFFER_ID) {
|
||||||
|
lru_cache.Touch(buffer.getLRUID(), frame_tick);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template <class P>
|
template <class P>
|
||||||
|
|
|
@ -80,7 +80,7 @@ struct ImageBase {
|
||||||
VAddr cpu_addr_end = 0;
|
VAddr cpu_addr_end = 0;
|
||||||
|
|
||||||
u64 modification_tick = 0;
|
u64 modification_tick = 0;
|
||||||
u64 frame_tick = 0;
|
size_t lru_index = SIZE_MAX;
|
||||||
|
|
||||||
std::array<u32, MAX_MIP_LEVELS> mip_level_offsets{};
|
std::array<u32, MAX_MIP_LEVELS> mip_level_offsets{};
|
||||||
|
|
||||||
|
|
|
@ -5,7 +5,6 @@
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
#include "common/alignment.h"
|
#include "common/alignment.h"
|
||||||
#include "common/settings.h"
|
|
||||||
#include "video_core/dirty_flags.h"
|
#include "video_core/dirty_flags.h"
|
||||||
#include "video_core/texture_cache/samples_helper.h"
|
#include "video_core/texture_cache/samples_helper.h"
|
||||||
#include "video_core/texture_cache/texture_cache_base.h"
|
#include "video_core/texture_cache/texture_cache_base.h"
|
||||||
|
@ -43,8 +42,6 @@ TextureCache<P>::TextureCache(Runtime& runtime_, VideoCore::RasterizerInterface&
|
||||||
void(slot_image_views.insert(runtime, NullImageParams{}));
|
void(slot_image_views.insert(runtime, NullImageParams{}));
|
||||||
void(slot_samplers.insert(runtime, sampler_descriptor));
|
void(slot_samplers.insert(runtime, sampler_descriptor));
|
||||||
|
|
||||||
deletion_iterator = slot_images.begin();
|
|
||||||
|
|
||||||
if constexpr (HAS_DEVICE_MEMORY_INFO) {
|
if constexpr (HAS_DEVICE_MEMORY_INFO) {
|
||||||
const auto device_memory = runtime.GetDeviceLocalMemory();
|
const auto device_memory = runtime.GetDeviceLocalMemory();
|
||||||
const u64 possible_expected_memory = (device_memory * 3) / 10;
|
const u64 possible_expected_memory = (device_memory * 3) / 10;
|
||||||
|
@ -64,70 +61,38 @@ template <class P>
|
||||||
void TextureCache<P>::RunGarbageCollector() {
|
void TextureCache<P>::RunGarbageCollector() {
|
||||||
const bool high_priority_mode = total_used_memory >= expected_memory;
|
const bool high_priority_mode = total_used_memory >= expected_memory;
|
||||||
const bool aggressive_mode = total_used_memory >= critical_memory;
|
const bool aggressive_mode = total_used_memory >= critical_memory;
|
||||||
const u64 ticks_to_destroy = high_priority_mode ? 60 : 100;
|
const u64 ticks_to_destroy = aggressive_mode ? 10ULL : high_priority_mode ? 25ULL : 100ULL;
|
||||||
int num_iterations = aggressive_mode ? 256 : (high_priority_mode ? 128 : 64);
|
size_t num_iterations = aggressive_mode ? 10000 : (high_priority_mode ? 100 : 5);
|
||||||
for (; num_iterations > 0; --num_iterations) {
|
const auto clean_up = [this, &num_iterations, high_priority_mode](ImageId image_id) {
|
||||||
if (deletion_iterator == slot_images.end()) {
|
if (num_iterations == 0) {
|
||||||
deletion_iterator = slot_images.begin();
|
return true;
|
||||||
if (deletion_iterator == slot_images.end()) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
auto [image_id, image_tmp] = *deletion_iterator;
|
--num_iterations;
|
||||||
Image* image = image_tmp; // fix clang error.
|
auto& image = slot_images[image_id];
|
||||||
const bool is_alias = True(image->flags & ImageFlagBits::Alias);
|
const bool must_download = image.IsSafeDownload();
|
||||||
const bool is_bad_overlap = True(image->flags & ImageFlagBits::BadOverlap);
|
if (!high_priority_mode && must_download) {
|
||||||
const bool must_download = image->IsSafeDownload();
|
return false;
|
||||||
bool should_care = is_bad_overlap || is_alias || (high_priority_mode && !must_download);
|
|
||||||
const u64 ticks_needed =
|
|
||||||
is_bad_overlap
|
|
||||||
? ticks_to_destroy >> 4
|
|
||||||
: ((should_care && aggressive_mode) ? ticks_to_destroy >> 1 : ticks_to_destroy);
|
|
||||||
should_care |= aggressive_mode;
|
|
||||||
if (should_care && image->frame_tick + ticks_needed < frame_tick) {
|
|
||||||
if (is_bad_overlap) {
|
|
||||||
const bool overlap_check = std::ranges::all_of(
|
|
||||||
image->overlapping_images, [&, image](const ImageId& overlap_id) {
|
|
||||||
auto& overlap = slot_images[overlap_id];
|
|
||||||
return overlap.frame_tick >= image->frame_tick;
|
|
||||||
});
|
|
||||||
if (!overlap_check) {
|
|
||||||
++deletion_iterator;
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (!is_bad_overlap && must_download) {
|
|
||||||
const bool alias_check = std::ranges::none_of(
|
|
||||||
image->aliased_images, [&, image](const AliasedImage& alias) {
|
|
||||||
auto& alias_image = slot_images[alias.id];
|
|
||||||
return (alias_image.frame_tick < image->frame_tick) ||
|
|
||||||
(alias_image.modification_tick < image->modification_tick);
|
|
||||||
});
|
|
||||||
|
|
||||||
if (alias_check) {
|
|
||||||
auto map = runtime.DownloadStagingBuffer(image->unswizzled_size_bytes);
|
|
||||||
const auto copies = FullDownloadCopies(image->info);
|
|
||||||
image->DownloadMemory(map, copies);
|
|
||||||
runtime.Finish();
|
|
||||||
SwizzleImage(gpu_memory, image->gpu_addr, image->info, copies, map.mapped_span);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (True(image->flags & ImageFlagBits::Tracked)) {
|
|
||||||
UntrackImage(*image, image_id);
|
|
||||||
}
|
|
||||||
UnregisterImage(image_id);
|
|
||||||
DeleteImage(image_id);
|
|
||||||
if (is_bad_overlap) {
|
|
||||||
++num_iterations;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
++deletion_iterator;
|
if (must_download) {
|
||||||
}
|
auto map = runtime.DownloadStagingBuffer(image.unswizzled_size_bytes);
|
||||||
|
const auto copies = FullDownloadCopies(image.info);
|
||||||
|
image.DownloadMemory(map, copies);
|
||||||
|
runtime.Finish();
|
||||||
|
SwizzleImage(gpu_memory, image.gpu_addr, image.info, copies, map.mapped_span);
|
||||||
|
}
|
||||||
|
if (True(image.flags & ImageFlagBits::Tracked)) {
|
||||||
|
UntrackImage(image, image_id);
|
||||||
|
}
|
||||||
|
UnregisterImage(image_id);
|
||||||
|
DeleteImage(image_id);
|
||||||
|
return false;
|
||||||
|
};
|
||||||
|
lru_cache.ForEachItemBelow(frame_tick - ticks_to_destroy, clean_up);
|
||||||
}
|
}
|
||||||
|
|
||||||
template <class P>
|
template <class P>
|
||||||
void TextureCache<P>::TickFrame() {
|
void TextureCache<P>::TickFrame() {
|
||||||
if (Settings::values.use_caches_gc.GetValue() && total_used_memory > minimum_memory) {
|
if (total_used_memory > minimum_memory) {
|
||||||
RunGarbageCollector();
|
RunGarbageCollector();
|
||||||
}
|
}
|
||||||
sentenced_images.Tick();
|
sentenced_images.Tick();
|
||||||
|
@ -1078,6 +1043,8 @@ void TextureCache<P>::RegisterImage(ImageId image_id) {
|
||||||
tentative_size = EstimatedDecompressedSize(tentative_size, image.info.format);
|
tentative_size = EstimatedDecompressedSize(tentative_size, image.info.format);
|
||||||
}
|
}
|
||||||
total_used_memory += Common::AlignUp(tentative_size, 1024);
|
total_used_memory += Common::AlignUp(tentative_size, 1024);
|
||||||
|
image.lru_index = lru_cache.Insert(image_id, frame_tick);
|
||||||
|
|
||||||
ForEachGPUPage(image.gpu_addr, image.guest_size_bytes,
|
ForEachGPUPage(image.gpu_addr, image.guest_size_bytes,
|
||||||
[this, image_id](u64 page) { gpu_page_table[page].push_back(image_id); });
|
[this, image_id](u64 page) { gpu_page_table[page].push_back(image_id); });
|
||||||
if (False(image.flags & ImageFlagBits::Sparse)) {
|
if (False(image.flags & ImageFlagBits::Sparse)) {
|
||||||
|
@ -1115,6 +1082,7 @@ void TextureCache<P>::UnregisterImage(ImageId image_id) {
|
||||||
tentative_size = EstimatedDecompressedSize(tentative_size, image.info.format);
|
tentative_size = EstimatedDecompressedSize(tentative_size, image.info.format);
|
||||||
}
|
}
|
||||||
total_used_memory -= Common::AlignUp(tentative_size, 1024);
|
total_used_memory -= Common::AlignUp(tentative_size, 1024);
|
||||||
|
lru_cache.Free(image.lru_index);
|
||||||
const auto& clear_page_table =
|
const auto& clear_page_table =
|
||||||
[this, image_id](
|
[this, image_id](
|
||||||
u64 page,
|
u64 page,
|
||||||
|
@ -1384,7 +1352,7 @@ void TextureCache<P>::PrepareImage(ImageId image_id, bool is_modification, bool
|
||||||
if (is_modification) {
|
if (is_modification) {
|
||||||
MarkModification(image);
|
MarkModification(image);
|
||||||
}
|
}
|
||||||
image.frame_tick = frame_tick;
|
lru_cache.Touch(image.lru_index, frame_tick);
|
||||||
}
|
}
|
||||||
|
|
||||||
template <class P>
|
template <class P>
|
||||||
|
|
|
@ -14,6 +14,7 @@
|
||||||
|
|
||||||
#include "common/common_types.h"
|
#include "common/common_types.h"
|
||||||
#include "common/literals.h"
|
#include "common/literals.h"
|
||||||
|
#include "common/lru_cache.h"
|
||||||
#include "video_core/compatible_formats.h"
|
#include "video_core/compatible_formats.h"
|
||||||
#include "video_core/delayed_destruction_ring.h"
|
#include "video_core/delayed_destruction_ring.h"
|
||||||
#include "video_core/engines/fermi_2d.h"
|
#include "video_core/engines/fermi_2d.h"
|
||||||
|
@ -370,6 +371,12 @@ private:
|
||||||
std::vector<ImageId> uncommitted_downloads;
|
std::vector<ImageId> uncommitted_downloads;
|
||||||
std::queue<std::vector<ImageId>> committed_downloads;
|
std::queue<std::vector<ImageId>> committed_downloads;
|
||||||
|
|
||||||
|
struct LRUItemParams {
|
||||||
|
using ObjectType = ImageId;
|
||||||
|
using TickType = u64;
|
||||||
|
};
|
||||||
|
Common::LeastRecentlyUsedCache<LRUItemParams> lru_cache;
|
||||||
|
|
||||||
static constexpr size_t TICKS_TO_DESTROY = 6;
|
static constexpr size_t TICKS_TO_DESTROY = 6;
|
||||||
DelayedDestructionRing<Image, TICKS_TO_DESTROY> sentenced_images;
|
DelayedDestructionRing<Image, TICKS_TO_DESTROY> sentenced_images;
|
||||||
DelayedDestructionRing<ImageView, TICKS_TO_DESTROY> sentenced_image_view;
|
DelayedDestructionRing<ImageView, TICKS_TO_DESTROY> sentenced_image_view;
|
||||||
|
@ -379,7 +386,6 @@ private:
|
||||||
|
|
||||||
u64 modification_tick = 0;
|
u64 modification_tick = 0;
|
||||||
u64 frame_tick = 0;
|
u64 frame_tick = 0;
|
||||||
typename SlotVector<Image>::Iterator deletion_iterator;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace VideoCommon
|
} // namespace VideoCommon
|
||||||
|
|
|
@ -818,7 +818,6 @@ void Config::ReadRendererValues() {
|
||||||
ReadGlobalSetting(Settings::values.shader_backend);
|
ReadGlobalSetting(Settings::values.shader_backend);
|
||||||
ReadGlobalSetting(Settings::values.use_asynchronous_shaders);
|
ReadGlobalSetting(Settings::values.use_asynchronous_shaders);
|
||||||
ReadGlobalSetting(Settings::values.use_fast_gpu_time);
|
ReadGlobalSetting(Settings::values.use_fast_gpu_time);
|
||||||
ReadGlobalSetting(Settings::values.use_caches_gc);
|
|
||||||
ReadGlobalSetting(Settings::values.bg_red);
|
ReadGlobalSetting(Settings::values.bg_red);
|
||||||
ReadGlobalSetting(Settings::values.bg_green);
|
ReadGlobalSetting(Settings::values.bg_green);
|
||||||
ReadGlobalSetting(Settings::values.bg_blue);
|
ReadGlobalSetting(Settings::values.bg_blue);
|
||||||
|
@ -1359,7 +1358,6 @@ void Config::SaveRendererValues() {
|
||||||
Settings::values.shader_backend.UsingGlobal());
|
Settings::values.shader_backend.UsingGlobal());
|
||||||
WriteGlobalSetting(Settings::values.use_asynchronous_shaders);
|
WriteGlobalSetting(Settings::values.use_asynchronous_shaders);
|
||||||
WriteGlobalSetting(Settings::values.use_fast_gpu_time);
|
WriteGlobalSetting(Settings::values.use_fast_gpu_time);
|
||||||
WriteGlobalSetting(Settings::values.use_caches_gc);
|
|
||||||
WriteGlobalSetting(Settings::values.bg_red);
|
WriteGlobalSetting(Settings::values.bg_red);
|
||||||
WriteGlobalSetting(Settings::values.bg_green);
|
WriteGlobalSetting(Settings::values.bg_green);
|
||||||
WriteGlobalSetting(Settings::values.bg_blue);
|
WriteGlobalSetting(Settings::values.bg_blue);
|
||||||
|
|
|
@ -28,7 +28,6 @@ void ConfigureGraphicsAdvanced::SetConfiguration() {
|
||||||
|
|
||||||
ui->use_vsync->setChecked(Settings::values.use_vsync.GetValue());
|
ui->use_vsync->setChecked(Settings::values.use_vsync.GetValue());
|
||||||
ui->use_asynchronous_shaders->setChecked(Settings::values.use_asynchronous_shaders.GetValue());
|
ui->use_asynchronous_shaders->setChecked(Settings::values.use_asynchronous_shaders.GetValue());
|
||||||
ui->use_caches_gc->setChecked(Settings::values.use_caches_gc.GetValue());
|
|
||||||
ui->use_fast_gpu_time->setChecked(Settings::values.use_fast_gpu_time.GetValue());
|
ui->use_fast_gpu_time->setChecked(Settings::values.use_fast_gpu_time.GetValue());
|
||||||
|
|
||||||
if (Settings::IsConfiguringGlobal()) {
|
if (Settings::IsConfiguringGlobal()) {
|
||||||
|
@ -55,8 +54,6 @@ void ConfigureGraphicsAdvanced::ApplyConfiguration() {
|
||||||
ConfigurationShared::ApplyPerGameSetting(&Settings::values.use_asynchronous_shaders,
|
ConfigurationShared::ApplyPerGameSetting(&Settings::values.use_asynchronous_shaders,
|
||||||
ui->use_asynchronous_shaders,
|
ui->use_asynchronous_shaders,
|
||||||
use_asynchronous_shaders);
|
use_asynchronous_shaders);
|
||||||
ConfigurationShared::ApplyPerGameSetting(&Settings::values.use_caches_gc, ui->use_caches_gc,
|
|
||||||
use_caches_gc);
|
|
||||||
ConfigurationShared::ApplyPerGameSetting(&Settings::values.use_fast_gpu_time,
|
ConfigurationShared::ApplyPerGameSetting(&Settings::values.use_fast_gpu_time,
|
||||||
ui->use_fast_gpu_time, use_fast_gpu_time);
|
ui->use_fast_gpu_time, use_fast_gpu_time);
|
||||||
}
|
}
|
||||||
|
@ -81,7 +78,6 @@ void ConfigureGraphicsAdvanced::SetupPerGameUI() {
|
||||||
ui->use_asynchronous_shaders->setEnabled(
|
ui->use_asynchronous_shaders->setEnabled(
|
||||||
Settings::values.use_asynchronous_shaders.UsingGlobal());
|
Settings::values.use_asynchronous_shaders.UsingGlobal());
|
||||||
ui->use_fast_gpu_time->setEnabled(Settings::values.use_fast_gpu_time.UsingGlobal());
|
ui->use_fast_gpu_time->setEnabled(Settings::values.use_fast_gpu_time.UsingGlobal());
|
||||||
ui->use_caches_gc->setEnabled(Settings::values.use_caches_gc.UsingGlobal());
|
|
||||||
ui->anisotropic_filtering_combobox->setEnabled(
|
ui->anisotropic_filtering_combobox->setEnabled(
|
||||||
Settings::values.max_anisotropy.UsingGlobal());
|
Settings::values.max_anisotropy.UsingGlobal());
|
||||||
|
|
||||||
|
@ -94,8 +90,6 @@ void ConfigureGraphicsAdvanced::SetupPerGameUI() {
|
||||||
use_asynchronous_shaders);
|
use_asynchronous_shaders);
|
||||||
ConfigurationShared::SetColoredTristate(ui->use_fast_gpu_time,
|
ConfigurationShared::SetColoredTristate(ui->use_fast_gpu_time,
|
||||||
Settings::values.use_fast_gpu_time, use_fast_gpu_time);
|
Settings::values.use_fast_gpu_time, use_fast_gpu_time);
|
||||||
ConfigurationShared::SetColoredTristate(ui->use_caches_gc, Settings::values.use_caches_gc,
|
|
||||||
use_caches_gc);
|
|
||||||
ConfigurationShared::SetColoredComboBox(
|
ConfigurationShared::SetColoredComboBox(
|
||||||
ui->gpu_accuracy, ui->label_gpu_accuracy,
|
ui->gpu_accuracy, ui->label_gpu_accuracy,
|
||||||
static_cast<int>(Settings::values.gpu_accuracy.GetValue(true)));
|
static_cast<int>(Settings::values.gpu_accuracy.GetValue(true)));
|
||||||
|
|
|
@ -37,5 +37,4 @@ private:
|
||||||
ConfigurationShared::CheckState use_vsync;
|
ConfigurationShared::CheckState use_vsync;
|
||||||
ConfigurationShared::CheckState use_asynchronous_shaders;
|
ConfigurationShared::CheckState use_asynchronous_shaders;
|
||||||
ConfigurationShared::CheckState use_fast_gpu_time;
|
ConfigurationShared::CheckState use_fast_gpu_time;
|
||||||
ConfigurationShared::CheckState use_caches_gc;
|
|
||||||
};
|
};
|
||||||
|
|
|
@ -96,16 +96,6 @@
|
||||||
</property>
|
</property>
|
||||||
</widget>
|
</widget>
|
||||||
</item>
|
</item>
|
||||||
<item>
|
|
||||||
<widget class="QCheckBox" name="use_caches_gc">
|
|
||||||
<property name="toolTip">
|
|
||||||
<string>Enables garbage collection for the GPU caches, this will try to keep VRAM within 3-4 GB by flushing the least used textures/buffers. May cause issues in a few games.</string>
|
|
||||||
</property>
|
|
||||||
<property name="text">
|
|
||||||
<string>Enable GPU cache garbage collection (experimental)</string>
|
|
||||||
</property>
|
|
||||||
</widget>
|
|
||||||
</item>
|
|
||||||
<item>
|
<item>
|
||||||
<widget class="QWidget" name="af_layout" native="true">
|
<widget class="QWidget" name="af_layout" native="true">
|
||||||
<layout class="QHBoxLayout" name="horizontalLayout_1">
|
<layout class="QHBoxLayout" name="horizontalLayout_1">
|
||||||
|
|
|
@ -468,7 +468,6 @@ void Config::ReadValues() {
|
||||||
ReadSetting("Renderer", Settings::values.use_nvdec_emulation);
|
ReadSetting("Renderer", Settings::values.use_nvdec_emulation);
|
||||||
ReadSetting("Renderer", Settings::values.accelerate_astc);
|
ReadSetting("Renderer", Settings::values.accelerate_astc);
|
||||||
ReadSetting("Renderer", Settings::values.use_fast_gpu_time);
|
ReadSetting("Renderer", Settings::values.use_fast_gpu_time);
|
||||||
ReadSetting("Renderer", Settings::values.use_caches_gc);
|
|
||||||
|
|
||||||
ReadSetting("Renderer", Settings::values.bg_red);
|
ReadSetting("Renderer", Settings::values.bg_red);
|
||||||
ReadSetting("Renderer", Settings::values.bg_green);
|
ReadSetting("Renderer", Settings::values.bg_green);
|
||||||
|
|
Loading…
Reference in a new issue