forked from etc/pineapple-src
early-access version 3592
This commit is contained in:
parent
d18469456b
commit
165c5bc7d0
11 changed files with 127 additions and 227 deletions
|
@ -1,7 +1,7 @@
|
||||||
yuzu emulator early access
|
yuzu emulator early access
|
||||||
=============
|
=============
|
||||||
|
|
||||||
This is the source code for early-access 3591.
|
This is the source code for early-access 3592.
|
||||||
|
|
||||||
## Legal Notice
|
## Legal Notice
|
||||||
|
|
||||||
|
|
|
@ -979,8 +979,8 @@ void Controller_NPad::VibrateController(
|
||||||
}
|
}
|
||||||
|
|
||||||
void Controller_NPad::VibrateControllers(
|
void Controller_NPad::VibrateControllers(
|
||||||
const std::vector<Core::HID::VibrationDeviceHandle>& vibration_device_handles,
|
std::span<const Core::HID::VibrationDeviceHandle> vibration_device_handles,
|
||||||
const std::vector<Core::HID::VibrationValue>& vibration_values) {
|
std::span<const Core::HID::VibrationValue> vibration_values) {
|
||||||
if (!Settings::values.vibration_enabled.GetValue() && !permit_vibration_session_enabled) {
|
if (!Settings::values.vibration_enabled.GetValue() && !permit_vibration_session_enabled) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
|
@ -112,8 +112,8 @@ public:
|
||||||
const Core::HID::VibrationValue& vibration_value);
|
const Core::HID::VibrationValue& vibration_value);
|
||||||
|
|
||||||
void VibrateControllers(
|
void VibrateControllers(
|
||||||
const std::vector<Core::HID::VibrationDeviceHandle>& vibration_device_handles,
|
std::span<const Core::HID::VibrationDeviceHandle> vibration_device_handles,
|
||||||
const std::vector<Core::HID::VibrationValue>& vibration_values);
|
std::span<const Core::HID::VibrationValue> vibration_values);
|
||||||
|
|
||||||
Core::HID::VibrationValue GetLastVibration(
|
Core::HID::VibrationValue GetLastVibration(
|
||||||
const Core::HID::VibrationDeviceHandle& vibration_device_handle) const;
|
const Core::HID::VibrationDeviceHandle& vibration_device_handle) const;
|
||||||
|
|
|
@ -1601,16 +1601,16 @@ void Hid::SendVibrationValues(HLERequestContext& ctx) {
|
||||||
IPC::RequestParser rp{ctx};
|
IPC::RequestParser rp{ctx};
|
||||||
const auto applet_resource_user_id{rp.Pop<u64>()};
|
const auto applet_resource_user_id{rp.Pop<u64>()};
|
||||||
|
|
||||||
const auto handles = ctx.ReadBuffer(0);
|
const auto handle_data = ctx.ReadBuffer(0);
|
||||||
const auto vibrations = ctx.ReadBuffer(1);
|
const auto handle_count = ctx.GetReadBufferNumElements<Core::HID::VibrationDeviceHandle>(0);
|
||||||
|
const auto vibration_data = ctx.ReadBuffer(1);
|
||||||
|
const auto vibration_count = ctx.GetReadBufferNumElements<Core::HID::VibrationValue>(1);
|
||||||
|
|
||||||
std::vector<Core::HID::VibrationDeviceHandle> vibration_device_handles(
|
auto vibration_device_handles =
|
||||||
handles.size() / sizeof(Core::HID::VibrationDeviceHandle));
|
std::span(reinterpret_cast<const Core::HID::VibrationDeviceHandle*>(handle_data.data()),
|
||||||
std::vector<Core::HID::VibrationValue> vibration_values(vibrations.size() /
|
handle_count);
|
||||||
sizeof(Core::HID::VibrationValue));
|
auto vibration_values = std::span(
|
||||||
|
reinterpret_cast<const Core::HID::VibrationValue*>(vibration_data.data()), vibration_count);
|
||||||
std::memcpy(vibration_device_handles.data(), handles.data(), handles.size());
|
|
||||||
std::memcpy(vibration_values.data(), vibrations.data(), vibrations.size());
|
|
||||||
|
|
||||||
applet_resource->GetController<Controller_NPad>(HidController::NPad)
|
applet_resource->GetController<Controller_NPad>(HidController::NPad)
|
||||||
.VibrateControllers(vibration_device_handles, vibration_values);
|
.VibrateControllers(vibration_device_handles, vibration_values);
|
||||||
|
|
|
@ -23,94 +23,42 @@ BufferCache<P>::BufferCache(VideoCore::RasterizerInterface& rasterizer_,
|
||||||
common_ranges.clear();
|
common_ranges.clear();
|
||||||
inline_buffer_id = NULL_BUFFER_ID;
|
inline_buffer_id = NULL_BUFFER_ID;
|
||||||
|
|
||||||
|
if (!runtime.CanReportMemoryUsage()) {
|
||||||
|
minimum_memory = DEFAULT_EXPECTED_MEMORY;
|
||||||
|
critical_memory = DEFAULT_CRITICAL_MEMORY;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
const s64 device_memory = static_cast<s64>(runtime.GetDeviceLocalMemory());
|
const s64 device_memory = static_cast<s64>(runtime.GetDeviceLocalMemory());
|
||||||
const u64 device_mem_per = device_memory / 100;
|
const s64 min_spacing_expected = device_memory - 1_GiB - 512_MiB;
|
||||||
minimum_memory = device_mem_per * 25;
|
const s64 min_spacing_critical = device_memory - 1_GiB;
|
||||||
expected_memory = device_mem_per * 50;
|
const s64 mem_threshold = std::min(device_memory, TARGET_THRESHOLD);
|
||||||
critical_memory = device_mem_per * 80;
|
const s64 min_vacancy_expected = (6 * mem_threshold) / 10;
|
||||||
LOG_INFO(HW_GPU, "Buffer cache device memory limits: min {} expected {} critical {}",
|
const s64 min_vacancy_critical = (3 * mem_threshold) / 10;
|
||||||
minimum_memory, expected_memory, critical_memory);
|
minimum_memory = static_cast<u64>(
|
||||||
|
std::max(std::min(device_memory - min_vacancy_expected, min_spacing_expected),
|
||||||
|
DEFAULT_EXPECTED_MEMORY));
|
||||||
|
critical_memory = static_cast<u64>(
|
||||||
|
std::max(std::min(device_memory - min_vacancy_critical, min_spacing_critical),
|
||||||
|
DEFAULT_CRITICAL_MEMORY));
|
||||||
}
|
}
|
||||||
|
|
||||||
template <class P>
|
template <class P>
|
||||||
void BufferCache<P>::RunGarbageCollector() {
|
void BufferCache<P>::RunGarbageCollector() {
|
||||||
if (total_used_memory < minimum_memory) {
|
const bool aggressive_gc = total_used_memory >= critical_memory;
|
||||||
return;
|
const u64 ticks_to_destroy = aggressive_gc ? 60 : 120;
|
||||||
}
|
int num_iterations = aggressive_gc ? 64 : 32;
|
||||||
bool is_expected = total_used_memory >= expected_memory;
|
const auto clean_up = [this, &num_iterations](BufferId buffer_id) {
|
||||||
bool is_critical = total_used_memory >= critical_memory;
|
|
||||||
const u64 ticks_to_destroy = is_critical ? 60ULL : is_expected ? 120ULL : 240ULL;
|
|
||||||
size_t num_iterations = is_critical ? 40 : (is_expected ? 20 : 10);
|
|
||||||
boost::container::small_vector<std::pair<BufferId, VideoCommon::BufferCopies>, 40> to_delete;
|
|
||||||
u64 total_size{0};
|
|
||||||
|
|
||||||
const auto clean_up = [&](BufferId buffer_id) {
|
|
||||||
if (num_iterations == 0) {
|
if (num_iterations == 0) {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
--num_iterations;
|
--num_iterations;
|
||||||
auto& buffer = slot_buffers[buffer_id];
|
auto& buffer = slot_buffers[buffer_id];
|
||||||
auto buffer_copies = FullDownloadCopies(buffer, buffer.CpuAddr(), buffer.SizeBytes());
|
DownloadBufferMemory(buffer);
|
||||||
total_size += buffer_copies.total_size;
|
DeleteBuffer(buffer_id);
|
||||||
to_delete.push_back({buffer_id, std::move(buffer_copies)});
|
|
||||||
return false;
|
return false;
|
||||||
};
|
};
|
||||||
lru_cache.ForEachItemBelow(frame_tick - ticks_to_destroy, clean_up);
|
lru_cache.ForEachItemBelow(frame_tick - ticks_to_destroy, clean_up);
|
||||||
|
|
||||||
if (total_size > 0) {
|
|
||||||
if constexpr (USE_MEMORY_MAPS) {
|
|
||||||
auto map = runtime.DownloadStagingBuffer(Common::AlignUp(total_size, 1024));
|
|
||||||
auto base_offset = map.offset;
|
|
||||||
|
|
||||||
for (auto& [buffer_id, buffer_copies] : to_delete) {
|
|
||||||
if (buffer_copies.total_size == 0) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
for (auto& copy : buffer_copies.copies) {
|
|
||||||
copy.dst_offset += map.offset;
|
|
||||||
}
|
|
||||||
|
|
||||||
auto& buffer = slot_buffers[buffer_id];
|
|
||||||
runtime.CopyBuffer(map.buffer, buffer, buffer_copies.copies);
|
|
||||||
map.offset += buffer_copies.total_size;
|
|
||||||
}
|
|
||||||
|
|
||||||
runtime.Finish();
|
|
||||||
|
|
||||||
for (auto& [buffer_id, buffer_copies] : to_delete) {
|
|
||||||
if (buffer_copies.total_size > 0) {
|
|
||||||
auto& buffer = slot_buffers[buffer_id];
|
|
||||||
for (const auto& copy : buffer_copies.copies) {
|
|
||||||
const VAddr copy_cpu_addr = buffer.CpuAddr() + copy.src_offset;
|
|
||||||
const u8* copy_mapped_memory =
|
|
||||||
map.mapped_span.data() + copy.dst_offset - base_offset;
|
|
||||||
cpu_memory.WriteBlockUnsafe(copy_cpu_addr, copy_mapped_memory, copy.size);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
DeleteBuffer(buffer_id);
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
for (auto& [buffer_id, buffer_copies] : to_delete) {
|
|
||||||
if (buffer_copies.total_size == 0) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
const std::span<u8> immediate_buffer = ImmediateBuffer(buffer_copies.total_size);
|
|
||||||
auto& buffer = slot_buffers[buffer_id];
|
|
||||||
for (const BufferCopy& copy : buffer_copies.copies) {
|
|
||||||
buffer.ImmediateDownload(copy.src_offset,
|
|
||||||
immediate_buffer.subspan(0, copy.size));
|
|
||||||
const VAddr copy_cpu_addr = buffer.CpuAddr() + copy.src_offset;
|
|
||||||
cpu_memory.WriteBlockUnsafe(copy_cpu_addr, immediate_buffer.data(), copy.size);
|
|
||||||
}
|
|
||||||
DeleteBuffer(buffer_id);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
for (auto& [buffer_id, buffer_copies] : to_delete) {
|
|
||||||
DeleteBuffer(buffer_id);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
template <class P>
|
template <class P>
|
||||||
|
@ -129,10 +77,12 @@ void BufferCache<P>::TickFrame() {
|
||||||
uniform_buffer_skip_cache_size = skip_preferred ? DEFAULT_SKIP_CACHE_SIZE : 0;
|
uniform_buffer_skip_cache_size = skip_preferred ? DEFAULT_SKIP_CACHE_SIZE : 0;
|
||||||
|
|
||||||
// If we can obtain the memory info, use it instead of the estimate.
|
// If we can obtain the memory info, use it instead of the estimate.
|
||||||
if (runtime.CanReportMemoryUsage() && frame_tick % 60 == 0) {
|
if (runtime.CanReportMemoryUsage()) {
|
||||||
total_used_memory = runtime.GetDeviceMemoryUsage();
|
total_used_memory = runtime.GetDeviceMemoryUsage();
|
||||||
}
|
}
|
||||||
RunGarbageCollector();
|
if (total_used_memory >= minimum_memory) {
|
||||||
|
RunGarbageCollector();
|
||||||
|
}
|
||||||
++frame_tick;
|
++frame_tick;
|
||||||
delayed_destruction_ring.Tick();
|
delayed_destruction_ring.Tick();
|
||||||
|
|
||||||
|
@ -1586,13 +1536,17 @@ bool BufferCache<P>::InlineMemory(VAddr dest_address, size_t copy_size,
|
||||||
}
|
}
|
||||||
|
|
||||||
template <class P>
|
template <class P>
|
||||||
VideoCommon::BufferCopies BufferCache<P>::FullDownloadCopies(Buffer& buffer, VAddr cpu_addr,
|
void BufferCache<P>::DownloadBufferMemory(Buffer& buffer) {
|
||||||
u64 size, bool clear) {
|
DownloadBufferMemory(buffer, buffer.CpuAddr(), buffer.SizeBytes());
|
||||||
boost::container::small_vector<BufferCopy, 16> copies;
|
}
|
||||||
|
|
||||||
|
template <class P>
|
||||||
|
void BufferCache<P>::DownloadBufferMemory(Buffer& buffer, VAddr cpu_addr, u64 size) {
|
||||||
|
boost::container::small_vector<BufferCopy, 1> copies;
|
||||||
u64 total_size_bytes = 0;
|
u64 total_size_bytes = 0;
|
||||||
u64 largest_copy = 0;
|
u64 largest_copy = 0;
|
||||||
memory_tracker.ForEachDownloadRange(
|
memory_tracker.ForEachDownloadRangeAndClear(
|
||||||
cpu_addr, size, clear, [&](u64 cpu_addr_out, u64 range_size) {
|
cpu_addr, size, [&](u64 cpu_addr_out, u64 range_size) {
|
||||||
const VAddr buffer_addr = buffer.CpuAddr();
|
const VAddr buffer_addr = buffer.CpuAddr();
|
||||||
const auto add_download = [&](VAddr start, VAddr end) {
|
const auto add_download = [&](VAddr start, VAddr end) {
|
||||||
const u64 new_offset = start - buffer_addr;
|
const u64 new_offset = start - buffer_addr;
|
||||||
|
@ -1616,35 +1570,22 @@ VideoCommon::BufferCopies BufferCache<P>::FullDownloadCopies(Buffer& buffer, VAd
|
||||||
ClearDownload(subtract_interval);
|
ClearDownload(subtract_interval);
|
||||||
common_ranges.subtract(subtract_interval);
|
common_ranges.subtract(subtract_interval);
|
||||||
});
|
});
|
||||||
return {total_size_bytes, largest_copy, std::move(copies)};
|
if (total_size_bytes == 0) {
|
||||||
}
|
|
||||||
|
|
||||||
template <class P>
|
|
||||||
void BufferCache<P>::DownloadBufferMemory(Buffer& buffer) {
|
|
||||||
DownloadBufferMemory(buffer, buffer.CpuAddr(), buffer.SizeBytes());
|
|
||||||
}
|
|
||||||
|
|
||||||
template <class P>
|
|
||||||
void BufferCache<P>::DownloadBufferMemory(Buffer& buffer, VAddr cpu_addr, u64 size) {
|
|
||||||
auto buffer_copies = FullDownloadCopies(buffer, cpu_addr, size);
|
|
||||||
if (buffer_copies.total_size == 0) {
|
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
MICROPROFILE_SCOPE(GPU_DownloadMemory);
|
MICROPROFILE_SCOPE(GPU_DownloadMemory);
|
||||||
|
|
||||||
if constexpr (USE_MEMORY_MAPS) {
|
if constexpr (USE_MEMORY_MAPS) {
|
||||||
auto download_staging = runtime.DownloadStagingBuffer(buffer_copies.total_size);
|
auto download_staging = runtime.DownloadStagingBuffer(total_size_bytes);
|
||||||
const u8* const mapped_memory = download_staging.mapped_span.data();
|
const u8* const mapped_memory = download_staging.mapped_span.data();
|
||||||
const std::span<BufferCopy> copies_span(buffer_copies.copies.data(),
|
const std::span<BufferCopy> copies_span(copies.data(), copies.data() + copies.size());
|
||||||
buffer_copies.copies.size());
|
for (BufferCopy& copy : copies) {
|
||||||
for (BufferCopy& copy : buffer_copies.copies) {
|
|
||||||
// Modify copies to have the staging offset in mind
|
// Modify copies to have the staging offset in mind
|
||||||
copy.dst_offset += download_staging.offset;
|
copy.dst_offset += download_staging.offset;
|
||||||
}
|
}
|
||||||
runtime.CopyBuffer(download_staging.buffer, buffer, copies_span);
|
runtime.CopyBuffer(download_staging.buffer, buffer, copies_span);
|
||||||
runtime.Finish();
|
runtime.Finish();
|
||||||
for (const BufferCopy& copy : buffer_copies.copies) {
|
for (const BufferCopy& copy : copies) {
|
||||||
const VAddr copy_cpu_addr = buffer.CpuAddr() + copy.src_offset;
|
const VAddr copy_cpu_addr = buffer.CpuAddr() + copy.src_offset;
|
||||||
// Undo the modified offset
|
// Undo the modified offset
|
||||||
const u64 dst_offset = copy.dst_offset - download_staging.offset;
|
const u64 dst_offset = copy.dst_offset - download_staging.offset;
|
||||||
|
@ -1652,8 +1593,8 @@ void BufferCache<P>::DownloadBufferMemory(Buffer& buffer, VAddr cpu_addr, u64 si
|
||||||
cpu_memory.WriteBlockUnsafe(copy_cpu_addr, copy_mapped_memory, copy.size);
|
cpu_memory.WriteBlockUnsafe(copy_cpu_addr, copy_mapped_memory, copy.size);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
const std::span<u8> immediate_buffer = ImmediateBuffer(buffer_copies.largest_copy);
|
const std::span<u8> immediate_buffer = ImmediateBuffer(largest_copy);
|
||||||
for (const BufferCopy& copy : buffer_copies.copies) {
|
for (const BufferCopy& copy : copies) {
|
||||||
buffer.ImmediateDownload(copy.src_offset, immediate_buffer.subspan(0, copy.size));
|
buffer.ImmediateDownload(copy.src_offset, immediate_buffer.subspan(0, copy.size));
|
||||||
const VAddr copy_cpu_addr = buffer.CpuAddr() + copy.src_offset;
|
const VAddr copy_cpu_addr = buffer.CpuAddr() + copy.src_offset;
|
||||||
cpu_memory.WriteBlockUnsafe(copy_cpu_addr, immediate_buffer.data(), copy.size);
|
cpu_memory.WriteBlockUnsafe(copy_cpu_addr, immediate_buffer.data(), copy.size);
|
||||||
|
|
|
@ -57,6 +57,8 @@ MICROPROFILE_DECLARE(GPU_PrepareBuffers);
|
||||||
MICROPROFILE_DECLARE(GPU_BindUploadBuffers);
|
MICROPROFILE_DECLARE(GPU_BindUploadBuffers);
|
||||||
MICROPROFILE_DECLARE(GPU_DownloadMemory);
|
MICROPROFILE_DECLARE(GPU_DownloadMemory);
|
||||||
|
|
||||||
|
using BufferId = SlotId;
|
||||||
|
|
||||||
using VideoCore::Surface::PixelFormat;
|
using VideoCore::Surface::PixelFormat;
|
||||||
using namespace Common::Literals;
|
using namespace Common::Literals;
|
||||||
|
|
||||||
|
@ -462,9 +464,6 @@ private:
|
||||||
|
|
||||||
void MappedUploadMemory(Buffer& buffer, u64 total_size_bytes, std::span<BufferCopy> copies);
|
void MappedUploadMemory(Buffer& buffer, u64 total_size_bytes, std::span<BufferCopy> copies);
|
||||||
|
|
||||||
[[nodiscard]] VideoCommon::BufferCopies FullDownloadCopies(Buffer& buffer, VAddr cpu_addr,
|
|
||||||
u64 size, bool clear = true);
|
|
||||||
|
|
||||||
void DownloadBufferMemory(Buffer& buffer_id);
|
void DownloadBufferMemory(Buffer& buffer_id);
|
||||||
|
|
||||||
void DownloadBufferMemory(Buffer& buffer_id, VAddr cpu_addr, u64 size);
|
void DownloadBufferMemory(Buffer& buffer_id, VAddr cpu_addr, u64 size);
|
||||||
|
@ -567,7 +566,6 @@ private:
|
||||||
u64 frame_tick = 0;
|
u64 frame_tick = 0;
|
||||||
u64 total_used_memory = 0;
|
u64 total_used_memory = 0;
|
||||||
u64 minimum_memory = 0;
|
u64 minimum_memory = 0;
|
||||||
u64 expected_memory = 0;
|
|
||||||
u64 critical_memory = 0;
|
u64 critical_memory = 0;
|
||||||
BufferId inline_buffer_id;
|
BufferId inline_buffer_id;
|
||||||
|
|
||||||
|
|
|
@ -348,25 +348,12 @@ void RasterizerVulkan::Clear(u32 layer_count) {
|
||||||
|
|
||||||
const u32 color_attachment = regs.clear_surface.RT;
|
const u32 color_attachment = regs.clear_surface.RT;
|
||||||
if (use_color && framebuffer->HasAspectColorBit(color_attachment)) {
|
if (use_color && framebuffer->HasAspectColorBit(color_attachment)) {
|
||||||
VkClearValue clear_value;
|
const auto format =
|
||||||
bool is_integer = false;
|
VideoCore::Surface::PixelFormatFromRenderTargetFormat(regs.rt[color_attachment].format);
|
||||||
bool is_signed = false;
|
bool is_integer = IsPixelFormatInteger(format);
|
||||||
size_t int_size = 8;
|
bool is_signed = IsPixelFormatSignedInteger(format);
|
||||||
for (std::size_t i = 0; i < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets; ++i) {
|
size_t int_size = PixelComponentSizeBitsInteger(format);
|
||||||
const auto& this_rt = regs.rt[i];
|
VkClearValue clear_value{};
|
||||||
if (this_rt.Address() == 0) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
if (this_rt.format == Tegra::RenderTargetFormat::NONE) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
const auto format =
|
|
||||||
VideoCore::Surface::PixelFormatFromRenderTargetFormat(this_rt.format);
|
|
||||||
is_integer = IsPixelFormatInteger(format);
|
|
||||||
is_signed = IsPixelFormatSignedInteger(format);
|
|
||||||
int_size = PixelComponentSizeBitsInteger(format);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
if (!is_integer) {
|
if (!is_integer) {
|
||||||
std::memcpy(clear_value.color.float32, regs.clear_color.data(),
|
std::memcpy(clear_value.color.float32, regs.clear_color.data(),
|
||||||
regs.clear_color.size() * sizeof(f32));
|
regs.clear_color.size() * sizeof(f32));
|
||||||
|
|
|
@ -47,31 +47,35 @@ TextureCache<P>::TextureCache(Runtime& runtime_, VideoCore::RasterizerInterface&
|
||||||
void(slot_image_views.insert(runtime, NullImageViewParams{}));
|
void(slot_image_views.insert(runtime, NullImageViewParams{}));
|
||||||
void(slot_samplers.insert(runtime, sampler_descriptor));
|
void(slot_samplers.insert(runtime, sampler_descriptor));
|
||||||
|
|
||||||
const s64 device_memory = static_cast<s64>(runtime.GetDeviceLocalMemory());
|
if constexpr (HAS_DEVICE_MEMORY_INFO) {
|
||||||
const u64 device_mem_per = device_memory / 100;
|
const s64 device_memory = static_cast<s64>(runtime.GetDeviceLocalMemory());
|
||||||
minimum_memory = device_mem_per * 25;
|
const s64 min_spacing_expected = device_memory - 1_GiB - 512_MiB;
|
||||||
expected_memory = device_mem_per * 50;
|
const s64 min_spacing_critical = device_memory - 1_GiB;
|
||||||
critical_memory = device_mem_per * 80;
|
const s64 mem_threshold = std::min(device_memory, TARGET_THRESHOLD);
|
||||||
LOG_INFO(HW_GPU, "Texture cache device memory limits: min {} expected {} critical {}",
|
const s64 min_vacancy_expected = (6 * mem_threshold) / 10;
|
||||||
minimum_memory, expected_memory, critical_memory);
|
const s64 min_vacancy_critical = (3 * mem_threshold) / 10;
|
||||||
|
expected_memory = static_cast<u64>(
|
||||||
|
std::max(std::min(device_memory - min_vacancy_expected, min_spacing_expected),
|
||||||
|
DEFAULT_EXPECTED_MEMORY));
|
||||||
|
critical_memory = static_cast<u64>(
|
||||||
|
std::max(std::min(device_memory - min_vacancy_critical, min_spacing_critical),
|
||||||
|
DEFAULT_CRITICAL_MEMORY));
|
||||||
|
minimum_memory = static_cast<u64>((device_memory - mem_threshold) / 2);
|
||||||
|
} else {
|
||||||
|
expected_memory = DEFAULT_EXPECTED_MEMORY + 512_MiB;
|
||||||
|
critical_memory = DEFAULT_CRITICAL_MEMORY + 1_GiB;
|
||||||
|
minimum_memory = 0;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template <class P>
|
template <class P>
|
||||||
void TextureCache<P>::RunGarbageCollector() {
|
void TextureCache<P>::RunGarbageCollector() {
|
||||||
if (total_used_memory < minimum_memory) {
|
bool high_priority_mode = total_used_memory >= expected_memory;
|
||||||
return;
|
bool aggressive_mode = total_used_memory >= critical_memory;
|
||||||
}
|
const u64 ticks_to_destroy = aggressive_mode ? 10ULL : high_priority_mode ? 25ULL : 50ULL;
|
||||||
bool is_expected = total_used_memory >= expected_memory;
|
size_t num_iterations = aggressive_mode ? 40 : (high_priority_mode ? 20 : 10);
|
||||||
bool is_critical = total_used_memory >= critical_memory;
|
const auto clean_up = [this, &num_iterations, &high_priority_mode,
|
||||||
const u64 ticks_to_destroy = is_critical ? 10ULL : is_expected ? 25ULL : 50ULL;
|
&aggressive_mode](ImageId image_id) {
|
||||||
size_t num_iterations = is_critical ? 40 : (is_expected ? 20 : 10);
|
|
||||||
boost::container::small_vector<
|
|
||||||
std::tuple<ImageId, bool, boost::container::small_vector<BufferImageCopy, 16>>, 40>
|
|
||||||
to_delete;
|
|
||||||
u64 total_download_size{0};
|
|
||||||
u32 largest_download_size{0};
|
|
||||||
|
|
||||||
const auto clean_up = [&](ImageId image_id) {
|
|
||||||
if (num_iterations == 0) {
|
if (num_iterations == 0) {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
@ -82,70 +86,51 @@ void TextureCache<P>::RunGarbageCollector() {
|
||||||
// used by the async decoder thread.
|
// used by the async decoder thread.
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
const bool must_download =
|
||||||
const bool do_download = image.IsSafeDownload() &&
|
image.IsSafeDownload() && False(image.flags & ImageFlagBits::BadOverlap);
|
||||||
False(image.flags & ImageFlagBits::BadOverlap) &&
|
if (!high_priority_mode &&
|
||||||
(False(image.flags & ImageFlagBits::CostlyLoad) || is_critical);
|
(must_download || True(image.flags & ImageFlagBits::CostlyLoad))) {
|
||||||
if (do_download) {
|
return false;
|
||||||
total_download_size += image.unswizzled_size_bytes;
|
}
|
||||||
largest_download_size = std::max(largest_download_size, image.unswizzled_size_bytes);
|
if (must_download) {
|
||||||
|
auto map = runtime.DownloadStagingBuffer(image.unswizzled_size_bytes);
|
||||||
|
const auto copies = FullDownloadCopies(image.info);
|
||||||
|
image.DownloadMemory(map, copies);
|
||||||
|
runtime.Finish();
|
||||||
|
SwizzleImage(*gpu_memory, image.gpu_addr, image.info, copies, map.mapped_span,
|
||||||
|
swizzle_data_buffer);
|
||||||
|
}
|
||||||
|
if (True(image.flags & ImageFlagBits::Tracked)) {
|
||||||
|
UntrackImage(image, image_id);
|
||||||
|
}
|
||||||
|
UnregisterImage(image_id);
|
||||||
|
DeleteImage(image_id, image.scale_tick > frame_tick + 5);
|
||||||
|
if (total_used_memory < critical_memory) {
|
||||||
|
if (aggressive_mode) {
|
||||||
|
// Sink the aggresiveness.
|
||||||
|
num_iterations >>= 2;
|
||||||
|
aggressive_mode = false;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
if (high_priority_mode && total_used_memory < expected_memory) {
|
||||||
|
num_iterations >>= 1;
|
||||||
|
high_priority_mode = false;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
to_delete.push_back({image_id, do_download, {}});
|
|
||||||
return false;
|
return false;
|
||||||
};
|
};
|
||||||
lru_cache.ForEachItemBelow(frame_tick - ticks_to_destroy, clean_up);
|
lru_cache.ForEachItemBelow(frame_tick - ticks_to_destroy, clean_up);
|
||||||
|
|
||||||
if (total_download_size > 0) {
|
|
||||||
auto map = runtime.DownloadStagingBuffer(total_download_size);
|
|
||||||
for (auto& [image_id, do_download, copies] : to_delete) {
|
|
||||||
if (!do_download) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
Image& image = slot_images[image_id];
|
|
||||||
copies = FullDownloadCopies(image.info);
|
|
||||||
image.DownloadMemory(map, copies);
|
|
||||||
map.offset += Common::AlignUp(image.unswizzled_size_bytes, 64);
|
|
||||||
}
|
|
||||||
|
|
||||||
runtime.Finish();
|
|
||||||
swizzle_data_buffer.resize_destructive(Common::AlignUp(largest_download_size, 1024));
|
|
||||||
|
|
||||||
u64 offset{0};
|
|
||||||
for (auto& [image_id, do_download, copies] : to_delete) {
|
|
||||||
Image& image = slot_images[image_id];
|
|
||||||
if (do_download) {
|
|
||||||
for (auto& copy : copies) {
|
|
||||||
copy.buffer_offset += offset;
|
|
||||||
}
|
|
||||||
SwizzleImage(*gpu_memory, image.gpu_addr, image.info, copies, map.mapped_span,
|
|
||||||
swizzle_data_buffer);
|
|
||||||
offset += Common::AlignUp(image.unswizzled_size_bytes, 64);
|
|
||||||
}
|
|
||||||
if (True(image.flags & ImageFlagBits::Tracked)) {
|
|
||||||
UntrackImage(image, image_id);
|
|
||||||
}
|
|
||||||
UnregisterImage(image_id);
|
|
||||||
DeleteImage(image_id, image.scale_tick > frame_tick + 5);
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
for (auto& [image_id, do_download, copies] : to_delete) {
|
|
||||||
Image& image = slot_images[image_id];
|
|
||||||
if (True(image.flags & ImageFlagBits::Tracked)) {
|
|
||||||
UntrackImage(image, image_id);
|
|
||||||
}
|
|
||||||
UnregisterImage(image_id);
|
|
||||||
DeleteImage(image_id, image.scale_tick > frame_tick + 5);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
template <class P>
|
template <class P>
|
||||||
void TextureCache<P>::TickFrame() {
|
void TextureCache<P>::TickFrame() {
|
||||||
// If we can obtain the memory info, use it instead of the estimate.
|
// If we can obtain the memory info, use it instead of the estimate.
|
||||||
if (runtime.CanReportMemoryUsage() && frame_tick % 60 == 0) {
|
if (runtime.CanReportMemoryUsage()) {
|
||||||
total_used_memory = runtime.GetDeviceMemoryUsage();
|
total_used_memory = runtime.GetDeviceMemoryUsage();
|
||||||
}
|
}
|
||||||
RunGarbageCollector();
|
if (total_used_memory > minimum_memory) {
|
||||||
|
RunGarbageCollector();
|
||||||
|
}
|
||||||
sentenced_images.Tick();
|
sentenced_images.Tick();
|
||||||
sentenced_framebuffers.Tick();
|
sentenced_framebuffers.Tick();
|
||||||
sentenced_image_view.Tick();
|
sentenced_image_view.Tick();
|
||||||
|
|
|
@ -3,8 +3,6 @@
|
||||||
|
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
#include <boost/container/small_vector.hpp>
|
|
||||||
|
|
||||||
#include "common/common_funcs.h"
|
#include "common/common_funcs.h"
|
||||||
#include "common/common_types.h"
|
#include "common/common_types.h"
|
||||||
#include "video_core/texture_cache/slot_vector.h"
|
#include "video_core/texture_cache/slot_vector.h"
|
||||||
|
@ -16,7 +14,6 @@ constexpr size_t MAX_MIP_LEVELS = 14;
|
||||||
|
|
||||||
constexpr SlotId CORRUPT_ID{0xfffffffe};
|
constexpr SlotId CORRUPT_ID{0xfffffffe};
|
||||||
|
|
||||||
using BufferId = SlotId;
|
|
||||||
using ImageId = SlotId;
|
using ImageId = SlotId;
|
||||||
using ImageMapId = SlotId;
|
using ImageMapId = SlotId;
|
||||||
using ImageViewId = SlotId;
|
using ImageViewId = SlotId;
|
||||||
|
@ -149,12 +146,6 @@ struct BufferCopy {
|
||||||
size_t size;
|
size_t size;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct BufferCopies {
|
|
||||||
u64 total_size;
|
|
||||||
u64 largest_copy;
|
|
||||||
boost::container::small_vector<BufferCopy, 16> copies;
|
|
||||||
};
|
|
||||||
|
|
||||||
struct SwizzleParameters {
|
struct SwizzleParameters {
|
||||||
Extent3D num_tiles;
|
Extent3D num_tiles;
|
||||||
Extent3D block;
|
Extent3D block;
|
||||||
|
|
|
@ -914,7 +914,7 @@ void ConvertImage(std::span<const u8> input, const ImageInfo& info, std::span<u8
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
boost::container::small_vector<BufferImageCopy, 16> FullDownloadCopies(const ImageInfo& info) {
|
std::vector<BufferImageCopy> FullDownloadCopies(const ImageInfo& info) {
|
||||||
const Extent3D size = info.size;
|
const Extent3D size = info.size;
|
||||||
const u32 bytes_per_block = BytesPerBlock(info.format);
|
const u32 bytes_per_block = BytesPerBlock(info.format);
|
||||||
if (info.type == ImageType::Linear) {
|
if (info.type == ImageType::Linear) {
|
||||||
|
@ -942,7 +942,7 @@ boost::container::small_vector<BufferImageCopy, 16> FullDownloadCopies(const Ima
|
||||||
|
|
||||||
u32 host_offset = 0;
|
u32 host_offset = 0;
|
||||||
|
|
||||||
boost::container::small_vector<BufferImageCopy, 16> copies(num_levels);
|
std::vector<BufferImageCopy> copies(num_levels);
|
||||||
for (s32 level = 0; level < num_levels; ++level) {
|
for (s32 level = 0; level < num_levels; ++level) {
|
||||||
const Extent3D level_size = AdjustMipSize(size, level);
|
const Extent3D level_size = AdjustMipSize(size, level);
|
||||||
const u32 num_blocks_per_layer = NumBlocks(level_size, tile_size);
|
const u32 num_blocks_per_layer = NumBlocks(level_size, tile_size);
|
||||||
|
|
|
@ -5,7 +5,6 @@
|
||||||
|
|
||||||
#include <optional>
|
#include <optional>
|
||||||
#include <span>
|
#include <span>
|
||||||
#include <boost/container/small_vector.hpp>
|
|
||||||
|
|
||||||
#include "common/common_types.h"
|
#include "common/common_types.h"
|
||||||
#include "common/scratch_buffer.h"
|
#include "common/scratch_buffer.h"
|
||||||
|
@ -74,8 +73,7 @@ struct OverlapResult {
|
||||||
void ConvertImage(std::span<const u8> input, const ImageInfo& info, std::span<u8> output,
|
void ConvertImage(std::span<const u8> input, const ImageInfo& info, std::span<u8> output,
|
||||||
std::span<BufferImageCopy> copies);
|
std::span<BufferImageCopy> copies);
|
||||||
|
|
||||||
[[nodiscard]] boost::container::small_vector<BufferImageCopy, 16> FullDownloadCopies(
|
[[nodiscard]] std::vector<BufferImageCopy> FullDownloadCopies(const ImageInfo& info);
|
||||||
const ImageInfo& info);
|
|
||||||
|
|
||||||
[[nodiscard]] Extent3D MipSize(Extent3D size, u32 level);
|
[[nodiscard]] Extent3D MipSize(Extent3D size, u32 level);
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue