early-access version 1843
This commit is contained in:
parent
9e85972340
commit
f97ec12f78
24 changed files with 739 additions and 166 deletions
|
@ -1,7 +1,7 @@
|
||||||
yuzu emulator early access
|
yuzu emulator early access
|
||||||
=============
|
=============
|
||||||
|
|
||||||
This is the source code for early-access 1842.
|
This is the source code for early-access 1843.
|
||||||
|
|
||||||
## Legal Notice
|
## Legal Notice
|
||||||
|
|
||||||
|
|
|
@ -41,7 +41,6 @@ void InstallInterfaces(SM::ServiceManager& service_manager, NVFlinger::NVFlinger
|
||||||
|
|
||||||
Module::Module(Core::System& system)
|
Module::Module(Core::System& system)
|
||||||
: syncpoint_manager{system.GPU()}, service_context{system, "nvdrv"} {
|
: syncpoint_manager{system.GPU()}, service_context{system, "nvdrv"} {
|
||||||
auto& kernel = system.Kernel();
|
|
||||||
for (u32 i = 0; i < MaxNvEvents; i++) {
|
for (u32 i = 0; i < MaxNvEvents; i++) {
|
||||||
events_interface.events[i].event =
|
events_interface.events[i].event =
|
||||||
service_context.CreateEvent(fmt::format("NVDRV::NvEvent_{}", i));
|
service_context.CreateEvent(fmt::format("NVDRV::NvEvent_{}", i));
|
||||||
|
|
|
@ -536,7 +536,7 @@ TEST_CASE("BufferBase: Cached write downloads") {
|
||||||
REQUIRE(rasterizer.Count() == 63);
|
REQUIRE(rasterizer.Count() == 63);
|
||||||
buffer.MarkRegionAsGpuModified(c + PAGE, PAGE);
|
buffer.MarkRegionAsGpuModified(c + PAGE, PAGE);
|
||||||
int num = 0;
|
int num = 0;
|
||||||
buffer.ForEachDownloadRange(c, WORD, [&](u64 offset, u64 size) { ++num; });
|
buffer.ForEachDownloadRange(c, WORD, true, [&](u64 offset, u64 size) { ++num; });
|
||||||
buffer.ForEachUploadRange(c, WORD, [&](u64 offset, u64 size) { ++num; });
|
buffer.ForEachUploadRange(c, WORD, [&](u64 offset, u64 size) { ++num; });
|
||||||
REQUIRE(num == 0);
|
REQUIRE(num == 0);
|
||||||
REQUIRE(!buffer.IsRegionCpuModified(c + PAGE, PAGE));
|
REQUIRE(!buffer.IsRegionCpuModified(c + PAGE, PAGE));
|
||||||
|
|
|
@ -226,19 +226,19 @@ public:
|
||||||
/// Call 'func' for each CPU modified range and unmark those pages as CPU modified
|
/// Call 'func' for each CPU modified range and unmark those pages as CPU modified
|
||||||
template <typename Func>
|
template <typename Func>
|
||||||
void ForEachUploadRange(VAddr query_cpu_range, u64 size, Func&& func) {
|
void ForEachUploadRange(VAddr query_cpu_range, u64 size, Func&& func) {
|
||||||
ForEachModifiedRange<Type::CPU>(query_cpu_range, size, func);
|
ForEachModifiedRange<Type::CPU>(query_cpu_range, size, true, func);
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Call 'func' for each GPU modified range and unmark those pages as GPU modified
|
/// Call 'func' for each GPU modified range and unmark those pages as GPU modified
|
||||||
template <typename Func>
|
template <typename Func>
|
||||||
void ForEachDownloadRange(VAddr query_cpu_range, u64 size, Func&& func) {
|
void ForEachDownloadRange(VAddr query_cpu_range, u64 size, bool clear, Func&& func) {
|
||||||
ForEachModifiedRange<Type::GPU>(query_cpu_range, size, func);
|
ForEachModifiedRange<Type::GPU>(query_cpu_range, size, clear, func);
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Call 'func' for each GPU modified range and unmark those pages as GPU modified
|
/// Call 'func' for each GPU modified range and unmark those pages as GPU modified
|
||||||
template <typename Func>
|
template <typename Func>
|
||||||
void ForEachDownloadRange(Func&& func) {
|
void ForEachDownloadRange(Func&& func) {
|
||||||
ForEachModifiedRange<Type::GPU>(cpu_addr, SizeBytes(), func);
|
ForEachModifiedRange<Type::GPU>(cpu_addr, SizeBytes(), true, func);
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Mark buffer as picked
|
/// Mark buffer as picked
|
||||||
|
@ -415,7 +415,7 @@ private:
|
||||||
* @param func Function to call for each turned off region
|
* @param func Function to call for each turned off region
|
||||||
*/
|
*/
|
||||||
template <Type type, typename Func>
|
template <Type type, typename Func>
|
||||||
void ForEachModifiedRange(VAddr query_cpu_range, s64 size, Func&& func) {
|
void ForEachModifiedRange(VAddr query_cpu_range, s64 size, bool clear, Func&& func) {
|
||||||
static_assert(type != Type::Untracked);
|
static_assert(type != Type::Untracked);
|
||||||
|
|
||||||
const s64 difference = query_cpu_range - cpu_addr;
|
const s64 difference = query_cpu_range - cpu_addr;
|
||||||
|
@ -467,7 +467,9 @@ private:
|
||||||
bits = (bits << left_offset) >> left_offset;
|
bits = (bits << left_offset) >> left_offset;
|
||||||
|
|
||||||
const u64 current_word = state_words[word_index] & bits;
|
const u64 current_word = state_words[word_index] & bits;
|
||||||
|
if (clear) {
|
||||||
state_words[word_index] &= ~bits;
|
state_words[word_index] &= ~bits;
|
||||||
|
}
|
||||||
|
|
||||||
if constexpr (type == Type::CPU) {
|
if constexpr (type == Type::CPU) {
|
||||||
const u64 current_bits = untracked_words[word_index] & bits;
|
const u64 current_bits = untracked_words[word_index] & bits;
|
||||||
|
|
|
@ -15,6 +15,7 @@
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
#include <boost/container/small_vector.hpp>
|
#include <boost/container/small_vector.hpp>
|
||||||
|
#include <boost/icl/interval_set.hpp>
|
||||||
|
|
||||||
#include "common/common_types.h"
|
#include "common/common_types.h"
|
||||||
#include "common/div_ceil.h"
|
#include "common/div_ceil.h"
|
||||||
|
@ -77,6 +78,9 @@ class BufferCache {
|
||||||
using Runtime = typename P::Runtime;
|
using Runtime = typename P::Runtime;
|
||||||
using Buffer = typename P::Buffer;
|
using Buffer = typename P::Buffer;
|
||||||
|
|
||||||
|
using IntervalSet = boost::icl::interval_set<VAddr>;
|
||||||
|
using IntervalType = typename IntervalSet::interval_type;
|
||||||
|
|
||||||
struct Empty {};
|
struct Empty {};
|
||||||
|
|
||||||
struct OverlapResult {
|
struct OverlapResult {
|
||||||
|
@ -153,6 +157,7 @@ public:
|
||||||
|
|
||||||
/// Commit asynchronous downloads
|
/// Commit asynchronous downloads
|
||||||
void CommitAsyncFlushes();
|
void CommitAsyncFlushes();
|
||||||
|
void CommitAsyncFlushesHigh();
|
||||||
|
|
||||||
/// Pop asynchronous downloads
|
/// Pop asynchronous downloads
|
||||||
void PopAsyncFlushes();
|
void PopAsyncFlushes();
|
||||||
|
@ -160,6 +165,9 @@ public:
|
||||||
/// Return true when a CPU region is modified from the GPU
|
/// Return true when a CPU region is modified from the GPU
|
||||||
[[nodiscard]] bool IsRegionGpuModified(VAddr addr, size_t size);
|
[[nodiscard]] bool IsRegionGpuModified(VAddr addr, size_t size);
|
||||||
|
|
||||||
|
/// Return true when a CPU region is modified from the GPU
|
||||||
|
[[nodiscard]] bool IsRegionCpuModified(VAddr addr, size_t size);
|
||||||
|
|
||||||
std::mutex mutex;
|
std::mutex mutex;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
@ -272,8 +280,6 @@ private:
|
||||||
|
|
||||||
void DeleteBuffer(BufferId buffer_id);
|
void DeleteBuffer(BufferId buffer_id);
|
||||||
|
|
||||||
void ReplaceBufferDownloads(BufferId old_buffer_id, BufferId new_buffer_id);
|
|
||||||
|
|
||||||
void NotifyBufferDeletion();
|
void NotifyBufferDeletion();
|
||||||
|
|
||||||
[[nodiscard]] Binding StorageBufferBinding(GPUVAddr ssbo_addr) const;
|
[[nodiscard]] Binding StorageBufferBinding(GPUVAddr ssbo_addr) const;
|
||||||
|
@ -327,9 +333,7 @@ private:
|
||||||
|
|
||||||
std::vector<BufferId> cached_write_buffer_ids;
|
std::vector<BufferId> cached_write_buffer_ids;
|
||||||
|
|
||||||
// TODO: This data structure is not optimal and it should be reworked
|
IntervalSet uncommitted_ranges;
|
||||||
std::vector<BufferId> uncommitted_downloads;
|
|
||||||
std::deque<std::vector<BufferId>> committed_downloads;
|
|
||||||
|
|
||||||
size_t immediate_buffer_capacity = 0;
|
size_t immediate_buffer_capacity = 0;
|
||||||
std::unique_ptr<u8[]> immediate_buffer_alloc;
|
std::unique_ptr<u8[]> immediate_buffer_alloc;
|
||||||
|
@ -547,29 +551,18 @@ void BufferCache<P>::FlushCachedWrites() {
|
||||||
|
|
||||||
template <class P>
|
template <class P>
|
||||||
bool BufferCache<P>::HasUncommittedFlushes() const noexcept {
|
bool BufferCache<P>::HasUncommittedFlushes() const noexcept {
|
||||||
return !uncommitted_downloads.empty();
|
return !uncommitted_ranges.empty();
|
||||||
}
|
}
|
||||||
|
|
||||||
template <class P>
|
template <class P>
|
||||||
bool BufferCache<P>::ShouldWaitAsyncFlushes() const noexcept {
|
bool BufferCache<P>::ShouldWaitAsyncFlushes() const noexcept {
|
||||||
return !committed_downloads.empty() && !committed_downloads.front().empty();
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
template <class P>
|
template <class P>
|
||||||
void BufferCache<P>::CommitAsyncFlushes() {
|
void BufferCache<P>::CommitAsyncFlushesHigh() {
|
||||||
// This is intentionally passing the value by copy
|
const IntervalSet& intervals = uncommitted_ranges;
|
||||||
committed_downloads.push_front(uncommitted_downloads);
|
if (intervals.empty()) {
|
||||||
uncommitted_downloads.clear();
|
|
||||||
}
|
|
||||||
|
|
||||||
template <class P>
|
|
||||||
void BufferCache<P>::PopAsyncFlushes() {
|
|
||||||
if (committed_downloads.empty()) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
auto scope_exit_pop_download = detail::ScopeExit([this] { committed_downloads.pop_back(); });
|
|
||||||
const std::span<const BufferId> download_ids = committed_downloads.back();
|
|
||||||
if (download_ids.empty()) {
|
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
MICROPROFILE_SCOPE(GPU_DownloadMemory);
|
MICROPROFILE_SCOPE(GPU_DownloadMemory);
|
||||||
|
@ -577,18 +570,35 @@ void BufferCache<P>::PopAsyncFlushes() {
|
||||||
boost::container::small_vector<std::pair<BufferCopy, BufferId>, 1> downloads;
|
boost::container::small_vector<std::pair<BufferCopy, BufferId>, 1> downloads;
|
||||||
u64 total_size_bytes = 0;
|
u64 total_size_bytes = 0;
|
||||||
u64 largest_copy = 0;
|
u64 largest_copy = 0;
|
||||||
for (const BufferId buffer_id : download_ids) {
|
for (auto& interval : intervals) {
|
||||||
slot_buffers[buffer_id].ForEachDownloadRange([&](u64 range_offset, u64 range_size) {
|
const std::size_t size = interval.upper() - interval.lower();
|
||||||
|
const VAddr cpu_addr = interval.lower();
|
||||||
|
const VAddr cpu_addr_end = interval.upper();
|
||||||
|
ForEachBufferInRange(cpu_addr, size, [&](BufferId buffer_id, Buffer& buffer) {
|
||||||
|
boost::container::small_vector<BufferCopy, 1> copies;
|
||||||
|
buffer.ForEachDownloadRange(
|
||||||
|
cpu_addr, size, false, [&](u64 range_offset, u64 range_size) {
|
||||||
|
VAddr cpu_addr_base = buffer.CpuAddr() + range_offset;
|
||||||
|
VAddr cpu_addr_end2 = cpu_addr_base + range_size;
|
||||||
|
const s64 difference = s64(cpu_addr_end2 - cpu_addr_end);
|
||||||
|
cpu_addr_end2 -= u64(std::max<s64>(difference, 0));
|
||||||
|
const s64 difference2 = s64(cpu_addr - cpu_addr_base);
|
||||||
|
cpu_addr_base += u64(std::max<s64>(difference2, 0));
|
||||||
|
const u64 new_size = cpu_addr_end2 - cpu_addr_base;
|
||||||
|
const u64 new_offset = cpu_addr_base - buffer.CpuAddr();
|
||||||
|
ASSERT(!IsRegionCpuModified(cpu_addr_base, new_size));
|
||||||
downloads.push_back({
|
downloads.push_back({
|
||||||
BufferCopy{
|
BufferCopy{
|
||||||
.src_offset = range_offset,
|
.src_offset = new_offset,
|
||||||
.dst_offset = total_size_bytes,
|
.dst_offset = total_size_bytes,
|
||||||
.size = range_size,
|
.size = new_size,
|
||||||
},
|
},
|
||||||
buffer_id,
|
buffer_id,
|
||||||
});
|
});
|
||||||
total_size_bytes += range_size;
|
total_size_bytes += new_size;
|
||||||
largest_copy = std::max(largest_copy, range_size);
|
buffer.UnmarkRegionAsGpuModified(cpu_addr_base, new_size);
|
||||||
|
largest_copy = std::max(largest_copy, new_size);
|
||||||
|
});
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
if (downloads.empty()) {
|
if (downloads.empty()) {
|
||||||
|
@ -622,6 +632,18 @@ void BufferCache<P>::PopAsyncFlushes() {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template <class P>
|
||||||
|
void BufferCache<P>::CommitAsyncFlushes() {
|
||||||
|
if (Settings::values.gpu_accuracy.GetValue() == Settings::GPUAccuracy::High) {
|
||||||
|
CommitAsyncFlushesHigh();
|
||||||
|
} else {
|
||||||
|
uncommitted_ranges.clear();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
template <class P>
|
||||||
|
void BufferCache<P>::PopAsyncFlushes() {}
|
||||||
|
|
||||||
template <class P>
|
template <class P>
|
||||||
bool BufferCache<P>::IsRegionGpuModified(VAddr addr, size_t size) {
|
bool BufferCache<P>::IsRegionGpuModified(VAddr addr, size_t size) {
|
||||||
const u64 page_end = Common::DivCeil(addr + size, PAGE_SIZE);
|
const u64 page_end = Common::DivCeil(addr + size, PAGE_SIZE);
|
||||||
|
@ -641,6 +663,25 @@ bool BufferCache<P>::IsRegionGpuModified(VAddr addr, size_t size) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template <class P>
|
||||||
|
bool BufferCache<P>::IsRegionCpuModified(VAddr addr, size_t size) {
|
||||||
|
const u64 page_end = Common::DivCeil(addr + size, PAGE_SIZE);
|
||||||
|
for (u64 page = addr >> PAGE_BITS; page < page_end;) {
|
||||||
|
const BufferId image_id = page_table[page];
|
||||||
|
if (!image_id) {
|
||||||
|
++page;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
Buffer& buffer = slot_buffers[image_id];
|
||||||
|
if (buffer.IsRegionCpuModified(addr, size)) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
const VAddr end_addr = buffer.CpuAddr() + buffer.SizeBytes();
|
||||||
|
page = Common::DivCeil(end_addr, PAGE_SIZE);
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
template <class P>
|
template <class P>
|
||||||
void BufferCache<P>::BindHostIndexBuffer() {
|
void BufferCache<P>::BindHostIndexBuffer() {
|
||||||
Buffer& buffer = slot_buffers[index_buffer.buffer_id];
|
Buffer& buffer = slot_buffers[index_buffer.buffer_id];
|
||||||
|
@ -1010,16 +1051,14 @@ void BufferCache<P>::MarkWrittenBuffer(BufferId buffer_id, VAddr cpu_addr, u32 s
|
||||||
Buffer& buffer = slot_buffers[buffer_id];
|
Buffer& buffer = slot_buffers[buffer_id];
|
||||||
buffer.MarkRegionAsGpuModified(cpu_addr, size);
|
buffer.MarkRegionAsGpuModified(cpu_addr, size);
|
||||||
|
|
||||||
const bool is_accuracy_high = Settings::IsGPULevelHigh();
|
const bool is_accuracy_high =
|
||||||
|
Settings::values.gpu_accuracy.GetValue() == Settings::GPUAccuracy::High;
|
||||||
const bool is_async = Settings::values.use_asynchronous_gpu_emulation.GetValue();
|
const bool is_async = Settings::values.use_asynchronous_gpu_emulation.GetValue();
|
||||||
if (!is_accuracy_high || !is_async) {
|
if (!is_async && !is_accuracy_high) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
if (std::ranges::find(uncommitted_downloads, buffer_id) != uncommitted_downloads.end()) {
|
const IntervalType base_interval{cpu_addr, cpu_addr + size};
|
||||||
// Already inserted
|
uncommitted_ranges.add(base_interval);
|
||||||
return;
|
|
||||||
}
|
|
||||||
uncommitted_downloads.push_back(buffer_id);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
template <class P>
|
template <class P>
|
||||||
|
@ -1103,7 +1142,6 @@ void BufferCache<P>::JoinOverlap(BufferId new_buffer_id, BufferId overlap_id,
|
||||||
if (!copies.empty()) {
|
if (!copies.empty()) {
|
||||||
runtime.CopyBuffer(slot_buffers[new_buffer_id], overlap, copies);
|
runtime.CopyBuffer(slot_buffers[new_buffer_id], overlap, copies);
|
||||||
}
|
}
|
||||||
ReplaceBufferDownloads(overlap_id, new_buffer_id);
|
|
||||||
DeleteBuffer(overlap_id);
|
DeleteBuffer(overlap_id);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1244,7 +1282,7 @@ void BufferCache<P>::DownloadBufferMemory(Buffer& buffer, VAddr cpu_addr, u64 si
|
||||||
boost::container::small_vector<BufferCopy, 1> copies;
|
boost::container::small_vector<BufferCopy, 1> copies;
|
||||||
u64 total_size_bytes = 0;
|
u64 total_size_bytes = 0;
|
||||||
u64 largest_copy = 0;
|
u64 largest_copy = 0;
|
||||||
buffer.ForEachDownloadRange(cpu_addr, size, [&](u64 range_offset, u64 range_size) {
|
buffer.ForEachDownloadRange(cpu_addr, size, true, [&](u64 range_offset, u64 range_size) {
|
||||||
copies.push_back(BufferCopy{
|
copies.push_back(BufferCopy{
|
||||||
.src_offset = range_offset,
|
.src_offset = range_offset,
|
||||||
.dst_offset = total_size_bytes,
|
.dst_offset = total_size_bytes,
|
||||||
|
@ -1315,18 +1353,6 @@ void BufferCache<P>::DeleteBuffer(BufferId buffer_id) {
|
||||||
NotifyBufferDeletion();
|
NotifyBufferDeletion();
|
||||||
}
|
}
|
||||||
|
|
||||||
template <class P>
|
|
||||||
void BufferCache<P>::ReplaceBufferDownloads(BufferId old_buffer_id, BufferId new_buffer_id) {
|
|
||||||
const auto replace = [old_buffer_id, new_buffer_id](std::vector<BufferId>& buffers) {
|
|
||||||
std::ranges::replace(buffers, old_buffer_id, new_buffer_id);
|
|
||||||
if (auto it = std::ranges::find(buffers, new_buffer_id); it != buffers.end()) {
|
|
||||||
buffers.erase(std::remove(it + 1, buffers.end(), new_buffer_id), buffers.end());
|
|
||||||
}
|
|
||||||
};
|
|
||||||
replace(uncommitted_downloads);
|
|
||||||
std::ranges::for_each(committed_downloads, replace);
|
|
||||||
}
|
|
||||||
|
|
||||||
template <class P>
|
template <class P>
|
||||||
void BufferCache<P>::NotifyBufferDeletion() {
|
void BufferCache<P>::NotifyBufferDeletion() {
|
||||||
if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) {
|
if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) {
|
||||||
|
|
|
@ -65,19 +65,20 @@ void Fermi2D::Blit() {
|
||||||
.src_x1 = static_cast<s32>((args.du_dx * args.dst_width + args.src_x0) >> 32),
|
.src_x1 = static_cast<s32>((args.du_dx * args.dst_width + args.src_x0) >> 32),
|
||||||
.src_y1 = static_cast<s32>((args.dv_dy * args.dst_height + args.src_y0) >> 32),
|
.src_y1 = static_cast<s32>((args.dv_dy * args.dst_height + args.src_y0) >> 32),
|
||||||
};
|
};
|
||||||
|
|
||||||
Surface src = regs.src;
|
Surface src = regs.src;
|
||||||
s32 src_address_offset = 0;
|
|
||||||
const auto bytes_per_pixel = BytesPerBlock(PixelFormatFromRenderTargetFormat(src.format));
|
const auto bytes_per_pixel = BytesPerBlock(PixelFormatFromRenderTargetFormat(src.format));
|
||||||
if (src.linear == Tegra::Engines::Fermi2D::MemoryLayout::Pitch && src.width == config.src_x1 &&
|
const auto is_copy_out_of_bound =
|
||||||
config.src_x1 > static_cast<s32>(src.pitch / bytes_per_pixel) && config.src_x0 > 0) {
|
src.linear == Tegra::Engines::Fermi2D::MemoryLayout::Pitch && src.width == config.src_x1 &&
|
||||||
src_address_offset = config.src_x0 * bytes_per_pixel;
|
config.src_x1 > static_cast<s32>(src.pitch / bytes_per_pixel) && config.src_x0 > 0;
|
||||||
|
if (is_copy_out_of_bound) {
|
||||||
|
auto address = src.Address() + config.src_x0 * bytes_per_pixel;
|
||||||
|
src.addr_upper = static_cast<u32>(address >> 32);
|
||||||
|
src.addr_lower = static_cast<u32>(address);
|
||||||
src.width -= config.src_x0;
|
src.width -= config.src_x0;
|
||||||
config.src_x1 -= config.src_x0;
|
config.src_x1 -= config.src_x0;
|
||||||
config.src_x0 = 0;
|
config.src_x0 = 0;
|
||||||
}
|
}
|
||||||
|
if (!rasterizer->AccelerateSurfaceCopy(src, regs.dst, config)) {
|
||||||
if (!rasterizer->AccelerateSurfaceCopy(src, src_address_offset, regs.dst, config)) {
|
|
||||||
UNIMPLEMENTED();
|
UNIMPLEMENTED();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -96,6 +96,23 @@ public:
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void TryReleasePendingFences() {
|
||||||
|
while (!fences.empty()) {
|
||||||
|
TFence& current_fence = fences.front();
|
||||||
|
if (ShouldWait() && !IsFenceSignaled(current_fence)) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
PopAsyncFlushes();
|
||||||
|
if (current_fence->IsSemaphore()) {
|
||||||
|
gpu_memory.template Write<u32>(current_fence->GetAddress(),
|
||||||
|
current_fence->GetPayload());
|
||||||
|
} else {
|
||||||
|
gpu.IncrementSyncPoint(current_fence->GetPayload());
|
||||||
|
}
|
||||||
|
PopFence();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
explicit FenceManager(VideoCore::RasterizerInterface& rasterizer_, Tegra::GPU& gpu_,
|
explicit FenceManager(VideoCore::RasterizerInterface& rasterizer_, Tegra::GPU& gpu_,
|
||||||
TTextureCache& texture_cache_, TTBufferCache& buffer_cache_,
|
TTextureCache& texture_cache_, TTBufferCache& buffer_cache_,
|
||||||
|
@ -125,23 +142,6 @@ protected:
|
||||||
TQueryCache& query_cache;
|
TQueryCache& query_cache;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
void TryReleasePendingFences() {
|
|
||||||
while (!fences.empty()) {
|
|
||||||
TFence& current_fence = fences.front();
|
|
||||||
if (ShouldWait() && !IsFenceSignaled(current_fence)) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
PopAsyncFlushes();
|
|
||||||
if (current_fence->IsSemaphore()) {
|
|
||||||
gpu_memory.template Write<u32>(current_fence->GetAddress(),
|
|
||||||
current_fence->GetPayload());
|
|
||||||
} else {
|
|
||||||
gpu.IncrementSyncPoint(current_fence->GetPayload());
|
|
||||||
}
|
|
||||||
PopFence();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
bool ShouldWait() const {
|
bool ShouldWait() const {
|
||||||
std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex};
|
std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex};
|
||||||
return texture_cache.ShouldWaitAsyncFlushes() || buffer_cache.ShouldWaitAsyncFlushes() ||
|
return texture_cache.ShouldWaitAsyncFlushes() || buffer_cache.ShouldWaitAsyncFlushes() ||
|
||||||
|
|
|
@ -8,6 +8,7 @@
|
||||||
#include "common/settings.h"
|
#include "common/settings.h"
|
||||||
#include "common/thread.h"
|
#include "common/thread.h"
|
||||||
#include "core/core.h"
|
#include "core/core.h"
|
||||||
|
#include "core/core_timing.h"
|
||||||
#include "core/frontend/emu_window.h"
|
#include "core/frontend/emu_window.h"
|
||||||
#include "video_core/dma_pusher.h"
|
#include "video_core/dma_pusher.h"
|
||||||
#include "video_core/gpu.h"
|
#include "video_core/gpu.h"
|
||||||
|
@ -83,6 +84,17 @@ void ThreadManager::StartThread(VideoCore::RendererBase& renderer,
|
||||||
rasterizer = renderer.ReadRasterizer();
|
rasterizer = renderer.ReadRasterizer();
|
||||||
thread = std::thread(RunThread, std::ref(system), std::ref(renderer), std::ref(context),
|
thread = std::thread(RunThread, std::ref(system), std::ref(renderer), std::ref(context),
|
||||||
std::ref(dma_pusher), std::ref(state));
|
std::ref(dma_pusher), std::ref(state));
|
||||||
|
gpu_sync_event = Core::Timing::CreateEvent(
|
||||||
|
"GPUHostSyncCallback", [this](std::uintptr_t, std::chrono::nanoseconds) {
|
||||||
|
if (!state.is_running) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
OnCommandListEnd();
|
||||||
|
const auto time_interval = std::chrono::nanoseconds{500 * 1000};
|
||||||
|
system.CoreTiming().ScheduleEvent(time_interval, gpu_sync_event);
|
||||||
|
});
|
||||||
|
system.CoreTiming().ScheduleEvent(std::chrono::nanoseconds{500 * 1000}, gpu_sync_event);
|
||||||
}
|
}
|
||||||
|
|
||||||
void ThreadManager::SubmitList(Tegra::CommandList&& entries) {
|
void ThreadManager::SubmitList(Tegra::CommandList&& entries) {
|
||||||
|
@ -128,6 +140,9 @@ void ThreadManager::ShutDown() {
|
||||||
state.cv.notify_all();
|
state.cv.notify_all();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
system.CoreTiming().UnscheduleEvent(gpu_sync_event, 0);
|
||||||
|
system.CoreTiming().RemoveEvent(gpu_sync_event);
|
||||||
|
|
||||||
if (!thread.joinable()) {
|
if (!thread.joinable()) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
|
@ -20,6 +20,10 @@ class DmaPusher;
|
||||||
} // namespace Tegra
|
} // namespace Tegra
|
||||||
|
|
||||||
namespace Core {
|
namespace Core {
|
||||||
|
namespace Timing {
|
||||||
|
class CoreTiming;
|
||||||
|
struct EventType;
|
||||||
|
} // namespace Timing
|
||||||
namespace Frontend {
|
namespace Frontend {
|
||||||
class GraphicsContext;
|
class GraphicsContext;
|
||||||
}
|
}
|
||||||
|
@ -150,6 +154,7 @@ private:
|
||||||
|
|
||||||
SynchState state;
|
SynchState state;
|
||||||
std::thread thread;
|
std::thread thread;
|
||||||
|
std::shared_ptr<Core::Timing::EventType> gpu_sync_event;
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace VideoCommon::GPUThread
|
} // namespace VideoCommon::GPUThread
|
||||||
|
|
|
@ -69,11 +69,16 @@ void MemoryManager::Unmap(GPUVAddr gpu_addr, std::size_t size) {
|
||||||
} else {
|
} else {
|
||||||
UNREACHABLE_MSG("Unmapping non-existent GPU address=0x{:x}", gpu_addr);
|
UNREACHABLE_MSG("Unmapping non-existent GPU address=0x{:x}", gpu_addr);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const auto submapped_ranges = GetSubmappedRange(gpu_addr, size);
|
||||||
|
|
||||||
|
for (const auto& map : submapped_ranges) {
|
||||||
// Flush and invalidate through the GPU interface, to be asynchronous if possible.
|
// Flush and invalidate through the GPU interface, to be asynchronous if possible.
|
||||||
const std::optional<VAddr> cpu_addr = GpuToCpuAddress(gpu_addr);
|
const std::optional<VAddr> cpu_addr = GpuToCpuAddress(map.first);
|
||||||
ASSERT(cpu_addr);
|
ASSERT(cpu_addr);
|
||||||
|
|
||||||
rasterizer->UnmapMemory(*cpu_addr, size);
|
rasterizer->UnmapMemory(*cpu_addr, map.second);
|
||||||
|
}
|
||||||
|
|
||||||
UpdateRange(gpu_addr, PageEntry::State::Unmapped, size);
|
UpdateRange(gpu_addr, PageEntry::State::Unmapped, size);
|
||||||
}
|
}
|
||||||
|
@ -146,8 +151,14 @@ void MemoryManager::SetPageEntry(GPUVAddr gpu_addr, PageEntry page_entry, std::s
|
||||||
|
|
||||||
//// Lock the new page
|
//// Lock the new page
|
||||||
// TryLockPage(page_entry, size);
|
// TryLockPage(page_entry, size);
|
||||||
|
auto& current_page = page_table[PageEntryIndex(gpu_addr)];
|
||||||
|
|
||||||
page_table[PageEntryIndex(gpu_addr)] = page_entry;
|
if ((!current_page.IsValid() && page_entry.IsValid()) ||
|
||||||
|
current_page.ToAddress() != page_entry.ToAddress()) {
|
||||||
|
rasterizer->ModifyGPUMemory(gpu_addr, size);
|
||||||
|
}
|
||||||
|
|
||||||
|
current_page = page_entry;
|
||||||
}
|
}
|
||||||
|
|
||||||
std::optional<GPUVAddr> MemoryManager::FindFreeRange(std::size_t size, std::size_t align,
|
std::optional<GPUVAddr> MemoryManager::FindFreeRange(std::size_t size, std::size_t align,
|
||||||
|
@ -193,6 +204,19 @@ std::optional<VAddr> MemoryManager::GpuToCpuAddress(GPUVAddr gpu_addr) const {
|
||||||
return page_entry.ToAddress() + (gpu_addr & page_mask);
|
return page_entry.ToAddress() + (gpu_addr & page_mask);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::optional<VAddr> MemoryManager::GpuToCpuAddress(GPUVAddr addr, std::size_t size) const {
|
||||||
|
size_t page_index{addr >> page_bits};
|
||||||
|
const size_t page_last{(addr + size + page_size - 1) >> page_bits};
|
||||||
|
while (page_index < page_last) {
|
||||||
|
const auto page_addr{GpuToCpuAddress(page_index << page_bits)};
|
||||||
|
if (page_addr && *page_addr != 0) {
|
||||||
|
return page_addr;
|
||||||
|
}
|
||||||
|
++page_index;
|
||||||
|
}
|
||||||
|
return std::nullopt;
|
||||||
|
}
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
T MemoryManager::Read(GPUVAddr addr) const {
|
T MemoryManager::Read(GPUVAddr addr) const {
|
||||||
if (auto page_pointer{GetPointer(addr)}; page_pointer) {
|
if (auto page_pointer{GetPointer(addr)}; page_pointer) {
|
||||||
|
@ -389,4 +413,79 @@ bool MemoryManager::IsGranularRange(GPUVAddr gpu_addr, std::size_t size) const {
|
||||||
return page <= Core::Memory::PAGE_SIZE;
|
return page <= Core::Memory::PAGE_SIZE;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool MemoryManager::IsContinousRange(GPUVAddr gpu_addr, std::size_t size) const {
|
||||||
|
size_t page_index{gpu_addr >> page_bits};
|
||||||
|
const size_t page_last{(gpu_addr + size + page_size - 1) >> page_bits};
|
||||||
|
std::optional<VAddr> old_page_addr{};
|
||||||
|
while (page_index != page_last) {
|
||||||
|
const auto page_addr{GpuToCpuAddress(page_index << page_bits)};
|
||||||
|
if (!page_addr || *page_addr == 0) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
if (old_page_addr) {
|
||||||
|
if (*old_page_addr + page_size != *page_addr) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
old_page_addr = page_addr;
|
||||||
|
++page_index;
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool MemoryManager::IsFullyMappedRange(GPUVAddr gpu_addr, std::size_t size) const {
|
||||||
|
size_t page_index{gpu_addr >> page_bits};
|
||||||
|
const size_t page_last{(gpu_addr + size + page_size - 1) >> page_bits};
|
||||||
|
while (page_index < page_last) {
|
||||||
|
if (!page_table[page_index].IsValid() || page_table[page_index].ToAddress() == 0) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
++page_index;
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::vector<std::pair<GPUVAddr, std::size_t>> MemoryManager::GetSubmappedRange(
|
||||||
|
GPUVAddr gpu_addr, std::size_t size) const {
|
||||||
|
std::vector<std::pair<GPUVAddr, std::size_t>> result{};
|
||||||
|
size_t page_index{gpu_addr >> page_bits};
|
||||||
|
size_t remaining_size{size};
|
||||||
|
size_t page_offset{gpu_addr & page_mask};
|
||||||
|
std::optional<std::pair<GPUVAddr, std::size_t>> last_segment{};
|
||||||
|
std::optional<VAddr> old_page_addr{};
|
||||||
|
const auto extend_size = [this, &last_segment, &page_index](std::size_t bytes) {
|
||||||
|
if (!last_segment) {
|
||||||
|
GPUVAddr new_base_addr = page_index << page_bits;
|
||||||
|
last_segment = {new_base_addr, bytes};
|
||||||
|
} else {
|
||||||
|
last_segment->second += bytes;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
const auto split = [this, &last_segment, &result] {
|
||||||
|
if (last_segment) {
|
||||||
|
result.push_back(*last_segment);
|
||||||
|
last_segment = std::nullopt;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
while (remaining_size > 0) {
|
||||||
|
const size_t num_bytes{std::min(page_size - page_offset, remaining_size)};
|
||||||
|
const auto page_addr{GpuToCpuAddress(page_index << page_bits)};
|
||||||
|
if (!page_addr) {
|
||||||
|
split();
|
||||||
|
} else if (old_page_addr) {
|
||||||
|
if (*old_page_addr + page_size != *page_addr) {
|
||||||
|
split();
|
||||||
|
}
|
||||||
|
extend_size(num_bytes);
|
||||||
|
} else {
|
||||||
|
extend_size(num_bytes);
|
||||||
|
}
|
||||||
|
++page_index;
|
||||||
|
page_offset = 0;
|
||||||
|
remaining_size -= num_bytes;
|
||||||
|
}
|
||||||
|
split();
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace Tegra
|
} // namespace Tegra
|
||||||
|
|
|
@ -76,6 +76,8 @@ public:
|
||||||
|
|
||||||
[[nodiscard]] std::optional<VAddr> GpuToCpuAddress(GPUVAddr addr) const;
|
[[nodiscard]] std::optional<VAddr> GpuToCpuAddress(GPUVAddr addr) const;
|
||||||
|
|
||||||
|
[[nodiscard]] std::optional<VAddr> GpuToCpuAddress(GPUVAddr addr, std::size_t size) const;
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
[[nodiscard]] T Read(GPUVAddr addr) const;
|
[[nodiscard]] T Read(GPUVAddr addr) const;
|
||||||
|
|
||||||
|
@ -112,10 +114,28 @@ public:
|
||||||
void WriteBlockUnsafe(GPUVAddr gpu_dest_addr, const void* src_buffer, std::size_t size);
|
void WriteBlockUnsafe(GPUVAddr gpu_dest_addr, const void* src_buffer, std::size_t size);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* IsGranularRange checks if a gpu region can be simply read with a pointer.
|
* Checks if a gpu region can be simply read with a pointer.
|
||||||
*/
|
*/
|
||||||
[[nodiscard]] bool IsGranularRange(GPUVAddr gpu_addr, std::size_t size) const;
|
[[nodiscard]] bool IsGranularRange(GPUVAddr gpu_addr, std::size_t size) const;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Checks if a gpu region is mapped by a single range of cpu addresses.
|
||||||
|
*/
|
||||||
|
[[nodiscard]] bool IsContinousRange(GPUVAddr gpu_addr, std::size_t size) const;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Checks if a gpu region is mapped entirely.
|
||||||
|
*/
|
||||||
|
[[nodiscard]] bool IsFullyMappedRange(GPUVAddr gpu_addr, std::size_t size) const;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns a vector with all the subranges of cpu addresses mapped beneath.
|
||||||
|
* if the region is continous, a single pair will be returned. If it's unmapped, an empty vector
|
||||||
|
* will be returned;
|
||||||
|
*/
|
||||||
|
std::vector<std::pair<GPUVAddr, std::size_t>> GetSubmappedRange(GPUVAddr gpu_addr,
|
||||||
|
std::size_t size) const;
|
||||||
|
|
||||||
[[nodiscard]] GPUVAddr Map(VAddr cpu_addr, GPUVAddr gpu_addr, std::size_t size);
|
[[nodiscard]] GPUVAddr Map(VAddr cpu_addr, GPUVAddr gpu_addr, std::size_t size);
|
||||||
[[nodiscard]] GPUVAddr MapAllocate(VAddr cpu_addr, std::size_t size, std::size_t align);
|
[[nodiscard]] GPUVAddr MapAllocate(VAddr cpu_addr, std::size_t size, std::size_t align);
|
||||||
[[nodiscard]] GPUVAddr MapAllocate32(VAddr cpu_addr, std::size_t size);
|
[[nodiscard]] GPUVAddr MapAllocate32(VAddr cpu_addr, std::size_t size);
|
||||||
|
|
|
@ -93,6 +93,9 @@ public:
|
||||||
/// Unmap memory range
|
/// Unmap memory range
|
||||||
virtual void UnmapMemory(VAddr addr, u64 size) = 0;
|
virtual void UnmapMemory(VAddr addr, u64 size) = 0;
|
||||||
|
|
||||||
|
/// Remap GPU memory range. This means underneath backing memory changed
|
||||||
|
virtual void ModifyGPUMemory(GPUVAddr addr, u64 size) = 0;
|
||||||
|
|
||||||
/// Notify rasterizer that any caches of the specified region should be flushed to Switch memory
|
/// Notify rasterizer that any caches of the specified region should be flushed to Switch memory
|
||||||
/// and invalidated
|
/// and invalidated
|
||||||
virtual void FlushAndInvalidateRegion(VAddr addr, u64 size) = 0;
|
virtual void FlushAndInvalidateRegion(VAddr addr, u64 size) = 0;
|
||||||
|
@ -114,8 +117,7 @@ public:
|
||||||
|
|
||||||
/// Attempt to use a faster method to perform a surface copy
|
/// Attempt to use a faster method to perform a surface copy
|
||||||
[[nodiscard]] virtual bool AccelerateSurfaceCopy(
|
[[nodiscard]] virtual bool AccelerateSurfaceCopy(
|
||||||
const Tegra::Engines::Fermi2D::Surface& src, s32 src_address_offset,
|
const Tegra::Engines::Fermi2D::Surface& src, const Tegra::Engines::Fermi2D::Surface& dst,
|
||||||
const Tegra::Engines::Fermi2D::Surface& dst,
|
|
||||||
const Tegra::Engines::Fermi2D::Config& copy_config) {
|
const Tegra::Engines::Fermi2D::Config& copy_config) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
|
@ -631,6 +631,13 @@ void RasterizerOpenGL::UnmapMemory(VAddr addr, u64 size) {
|
||||||
shader_cache.OnCPUWrite(addr, size);
|
shader_cache.OnCPUWrite(addr, size);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void RasterizerOpenGL::ModifyGPUMemory(GPUVAddr addr, u64 size) {
|
||||||
|
{
|
||||||
|
std::scoped_lock lock{texture_cache.mutex};
|
||||||
|
texture_cache.UnmapGPUMemory(addr, size);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
void RasterizerOpenGL::SignalSemaphore(GPUVAddr addr, u32 value) {
|
void RasterizerOpenGL::SignalSemaphore(GPUVAddr addr, u32 value) {
|
||||||
if (!gpu.IsAsync()) {
|
if (!gpu.IsAsync()) {
|
||||||
gpu_memory.Write<u32>(addr, value);
|
gpu_memory.Write<u32>(addr, value);
|
||||||
|
@ -698,12 +705,11 @@ void RasterizerOpenGL::TickFrame() {
|
||||||
}
|
}
|
||||||
|
|
||||||
bool RasterizerOpenGL::AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Surface& src,
|
bool RasterizerOpenGL::AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Surface& src,
|
||||||
s32 src_address_offset,
|
|
||||||
const Tegra::Engines::Fermi2D::Surface& dst,
|
const Tegra::Engines::Fermi2D::Surface& dst,
|
||||||
const Tegra::Engines::Fermi2D::Config& copy_config) {
|
const Tegra::Engines::Fermi2D::Config& copy_config) {
|
||||||
MICROPROFILE_SCOPE(OpenGL_Blits);
|
MICROPROFILE_SCOPE(OpenGL_Blits);
|
||||||
std::scoped_lock lock{texture_cache.mutex};
|
std::scoped_lock lock{texture_cache.mutex};
|
||||||
texture_cache.BlitImage(dst, src, src_address_offset, copy_config);
|
texture_cache.BlitImage(dst, src, copy_config);
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -82,6 +82,7 @@ public:
|
||||||
void OnCPUWrite(VAddr addr, u64 size) override;
|
void OnCPUWrite(VAddr addr, u64 size) override;
|
||||||
void SyncGuestHost() override;
|
void SyncGuestHost() override;
|
||||||
void UnmapMemory(VAddr addr, u64 size) override;
|
void UnmapMemory(VAddr addr, u64 size) override;
|
||||||
|
void ModifyGPUMemory(GPUVAddr addr, u64 size) override;
|
||||||
void SignalSemaphore(GPUVAddr addr, u32 value) override;
|
void SignalSemaphore(GPUVAddr addr, u32 value) override;
|
||||||
void SignalSyncPoint(u32 value) override;
|
void SignalSyncPoint(u32 value) override;
|
||||||
void ReleaseFences() override;
|
void ReleaseFences() override;
|
||||||
|
@ -91,7 +92,7 @@ public:
|
||||||
void TiledCacheBarrier() override;
|
void TiledCacheBarrier() override;
|
||||||
void FlushCommands() override;
|
void FlushCommands() override;
|
||||||
void TickFrame() override;
|
void TickFrame() override;
|
||||||
bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Surface& src, s32 src_address_offset,
|
bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Surface& src,
|
||||||
const Tegra::Engines::Fermi2D::Surface& dst,
|
const Tegra::Engines::Fermi2D::Surface& dst,
|
||||||
const Tegra::Engines::Fermi2D::Config& copy_config) override;
|
const Tegra::Engines::Fermi2D::Config& copy_config) override;
|
||||||
bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr,
|
bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr,
|
||||||
|
|
|
@ -34,6 +34,10 @@ bool InnerFence::IsSignaled() const {
|
||||||
if (is_stubbed) {
|
if (is_stubbed) {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
if (scheduler.IsFree(wait_tick)) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
scheduler.Refresh();
|
||||||
return scheduler.IsFree(wait_tick);
|
return scheduler.IsFree(wait_tick);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -577,6 +577,13 @@ void RasterizerVulkan::UnmapMemory(VAddr addr, u64 size) {
|
||||||
pipeline_cache.OnCPUWrite(addr, size);
|
pipeline_cache.OnCPUWrite(addr, size);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void RasterizerVulkan::ModifyGPUMemory(GPUVAddr addr, u64 size) {
|
||||||
|
{
|
||||||
|
std::scoped_lock lock{texture_cache.mutex};
|
||||||
|
texture_cache.UnmapGPUMemory(addr, size);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
void RasterizerVulkan::SignalSemaphore(GPUVAddr addr, u32 value) {
|
void RasterizerVulkan::SignalSemaphore(GPUVAddr addr, u32 value) {
|
||||||
if (!gpu.IsAsync()) {
|
if (!gpu.IsAsync()) {
|
||||||
gpu_memory.Write<u32>(addr, value);
|
gpu_memory.Write<u32>(addr, value);
|
||||||
|
@ -597,7 +604,7 @@ void RasterizerVulkan::ReleaseFences() {
|
||||||
if (!gpu.IsAsync()) {
|
if (!gpu.IsAsync()) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
fence_manager.WaitPendingFences();
|
fence_manager.TryReleasePendingFences();
|
||||||
}
|
}
|
||||||
|
|
||||||
void RasterizerVulkan::FlushAndInvalidateRegion(VAddr addr, u64 size) {
|
void RasterizerVulkan::FlushAndInvalidateRegion(VAddr addr, u64 size) {
|
||||||
|
@ -658,11 +665,10 @@ void RasterizerVulkan::TickFrame() {
|
||||||
}
|
}
|
||||||
|
|
||||||
bool RasterizerVulkan::AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Surface& src,
|
bool RasterizerVulkan::AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Surface& src,
|
||||||
s32 src_address_offset,
|
|
||||||
const Tegra::Engines::Fermi2D::Surface& dst,
|
const Tegra::Engines::Fermi2D::Surface& dst,
|
||||||
const Tegra::Engines::Fermi2D::Config& copy_config) {
|
const Tegra::Engines::Fermi2D::Config& copy_config) {
|
||||||
std::scoped_lock lock{texture_cache.mutex};
|
std::scoped_lock lock{texture_cache.mutex};
|
||||||
texture_cache.BlitImage(dst, src, src_address_offset, copy_config);
|
texture_cache.BlitImage(dst, src, copy_config);
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -74,6 +74,7 @@ public:
|
||||||
void OnCPUWrite(VAddr addr, u64 size) override;
|
void OnCPUWrite(VAddr addr, u64 size) override;
|
||||||
void SyncGuestHost() override;
|
void SyncGuestHost() override;
|
||||||
void UnmapMemory(VAddr addr, u64 size) override;
|
void UnmapMemory(VAddr addr, u64 size) override;
|
||||||
|
void ModifyGPUMemory(GPUVAddr addr, u64 size) override;
|
||||||
void SignalSemaphore(GPUVAddr addr, u32 value) override;
|
void SignalSemaphore(GPUVAddr addr, u32 value) override;
|
||||||
void SignalSyncPoint(u32 value) override;
|
void SignalSyncPoint(u32 value) override;
|
||||||
void ReleaseFences() override;
|
void ReleaseFences() override;
|
||||||
|
@ -83,7 +84,7 @@ public:
|
||||||
void TiledCacheBarrier() override;
|
void TiledCacheBarrier() override;
|
||||||
void FlushCommands() override;
|
void FlushCommands() override;
|
||||||
void TickFrame() override;
|
void TickFrame() override;
|
||||||
bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Surface& src, s32 src_address_offset,
|
bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Surface& src,
|
||||||
const Tegra::Engines::Fermi2D::Surface& dst,
|
const Tegra::Engines::Fermi2D::Surface& dst,
|
||||||
const Tegra::Engines::Fermi2D::Config& copy_config) override;
|
const Tegra::Engines::Fermi2D::Config& copy_config) override;
|
||||||
bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr,
|
bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr,
|
||||||
|
|
|
@ -83,6 +83,10 @@ public:
|
||||||
return master_semaphore->IsFree(tick);
|
return master_semaphore->IsFree(tick);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void Refresh() const noexcept {
|
||||||
|
return master_semaphore->Refresh();
|
||||||
|
}
|
||||||
|
|
||||||
/// Waits for the given tick to trigger on the GPU.
|
/// Waits for the given tick to trigger on the GPU.
|
||||||
void Wait(u64 tick) {
|
void Wait(u64 tick) {
|
||||||
master_semaphore->Wait(tick);
|
master_semaphore->Wait(tick);
|
||||||
|
|
|
@ -69,6 +69,9 @@ ImageBase::ImageBase(const ImageInfo& info_, GPUVAddr gpu_addr_, VAddr cpu_addr_
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
ImageMapView::ImageMapView(GPUVAddr gpu_addr_, VAddr cpu_addr_, size_t size_, ImageId image_id_)
|
||||||
|
: gpu_addr{gpu_addr_}, cpu_addr{cpu_addr_}, size{size_}, image_id{image_id_} {}
|
||||||
|
|
||||||
std::optional<SubresourceBase> ImageBase::TryFindBase(GPUVAddr other_addr) const noexcept {
|
std::optional<SubresourceBase> ImageBase::TryFindBase(GPUVAddr other_addr) const noexcept {
|
||||||
if (other_addr < gpu_addr) {
|
if (other_addr < gpu_addr) {
|
||||||
// Subresource address can't be lower than the base
|
// Subresource address can't be lower than the base
|
||||||
|
|
|
@ -25,11 +25,13 @@ enum class ImageFlagBits : u32 {
|
||||||
Strong = 1 << 5, ///< Exists in the image table, the dimensions are can be trusted
|
Strong = 1 << 5, ///< Exists in the image table, the dimensions are can be trusted
|
||||||
Registered = 1 << 6, ///< True when the image is registered
|
Registered = 1 << 6, ///< True when the image is registered
|
||||||
Picked = 1 << 7, ///< Temporary flag to mark the image as picked
|
Picked = 1 << 7, ///< Temporary flag to mark the image as picked
|
||||||
|
Remapped = 1 << 8, ///< Image has been remapped.
|
||||||
|
Sparse = 1 << 9, ///< Image has non continous submemory.
|
||||||
|
|
||||||
// Garbage Collection Flags
|
// Garbage Collection Flags
|
||||||
BadOverlap = 1 << 8, ///< This image overlaps other but doesn't fit, has higher
|
BadOverlap = 1 << 10, ///< This image overlaps other but doesn't fit, has higher
|
||||||
///< garbage collection priority
|
///< garbage collection priority
|
||||||
Alias = 1 << 9, ///< This image has aliases and has priority on garbage
|
Alias = 1 << 11, ///< This image has aliases and has priority on garbage
|
||||||
///< collection
|
///< collection
|
||||||
};
|
};
|
||||||
DECLARE_ENUM_FLAG_OPERATORS(ImageFlagBits)
|
DECLARE_ENUM_FLAG_OPERATORS(ImageFlagBits)
|
||||||
|
@ -57,6 +59,12 @@ struct ImageBase {
|
||||||
return cpu_addr < overlap_end && overlap_cpu_addr < cpu_addr_end;
|
return cpu_addr < overlap_end && overlap_cpu_addr < cpu_addr_end;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
[[nodiscard]] bool OverlapsGPU(GPUVAddr overlap_gpu_addr, size_t overlap_size) const noexcept {
|
||||||
|
const VAddr overlap_end = overlap_gpu_addr + overlap_size;
|
||||||
|
const GPUVAddr gpu_addr_end = gpu_addr + guest_size_bytes;
|
||||||
|
return gpu_addr < overlap_end && overlap_gpu_addr < gpu_addr_end;
|
||||||
|
}
|
||||||
|
|
||||||
void CheckBadOverlapState();
|
void CheckBadOverlapState();
|
||||||
void CheckAliasState();
|
void CheckAliasState();
|
||||||
|
|
||||||
|
@ -84,6 +92,29 @@ struct ImageBase {
|
||||||
|
|
||||||
std::vector<AliasedImage> aliased_images;
|
std::vector<AliasedImage> aliased_images;
|
||||||
std::vector<ImageId> overlapping_images;
|
std::vector<ImageId> overlapping_images;
|
||||||
|
ImageMapId map_view_id{};
|
||||||
|
};
|
||||||
|
|
||||||
|
struct ImageMapView {
|
||||||
|
explicit ImageMapView(GPUVAddr gpu_addr, VAddr cpu_addr, size_t size, ImageId image_id);
|
||||||
|
|
||||||
|
[[nodiscard]] bool Overlaps(VAddr overlap_cpu_addr, size_t overlap_size) const noexcept {
|
||||||
|
const VAddr overlap_end = overlap_cpu_addr + overlap_size;
|
||||||
|
const VAddr cpu_addr_end = cpu_addr + size;
|
||||||
|
return cpu_addr < overlap_end && overlap_cpu_addr < cpu_addr_end;
|
||||||
|
}
|
||||||
|
|
||||||
|
[[nodiscard]] bool OverlapsGPU(GPUVAddr overlap_gpu_addr, size_t overlap_size) const noexcept {
|
||||||
|
const GPUVAddr overlap_end = overlap_gpu_addr + overlap_size;
|
||||||
|
const GPUVAddr gpu_addr_end = gpu_addr + size;
|
||||||
|
return gpu_addr < overlap_end && overlap_gpu_addr < gpu_addr_end;
|
||||||
|
}
|
||||||
|
|
||||||
|
GPUVAddr gpu_addr;
|
||||||
|
VAddr cpu_addr;
|
||||||
|
size_t size;
|
||||||
|
ImageId image_id;
|
||||||
|
bool picked{};
|
||||||
};
|
};
|
||||||
|
|
||||||
struct ImageAllocBase {
|
struct ImageAllocBase {
|
||||||
|
|
|
@ -13,6 +13,7 @@
|
||||||
#include <span>
|
#include <span>
|
||||||
#include <type_traits>
|
#include <type_traits>
|
||||||
#include <unordered_map>
|
#include <unordered_map>
|
||||||
|
#include <unordered_set>
|
||||||
#include <utility>
|
#include <utility>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
|
@ -152,9 +153,12 @@ public:
|
||||||
/// Remove images in a region
|
/// Remove images in a region
|
||||||
void UnmapMemory(VAddr cpu_addr, size_t size);
|
void UnmapMemory(VAddr cpu_addr, size_t size);
|
||||||
|
|
||||||
|
/// Remove images in a region
|
||||||
|
void UnmapGPUMemory(GPUVAddr gpu_addr, size_t size);
|
||||||
|
|
||||||
/// Blit an image with the given parameters
|
/// Blit an image with the given parameters
|
||||||
void BlitImage(const Tegra::Engines::Fermi2D::Surface& dst,
|
void BlitImage(const Tegra::Engines::Fermi2D::Surface& dst,
|
||||||
const Tegra::Engines::Fermi2D::Surface& src, s32 src_address_offset,
|
const Tegra::Engines::Fermi2D::Surface& src,
|
||||||
const Tegra::Engines::Fermi2D::Config& copy);
|
const Tegra::Engines::Fermi2D::Config& copy);
|
||||||
|
|
||||||
/// Invalidate the contents of the color buffer index
|
/// Invalidate the contents of the color buffer index
|
||||||
|
@ -188,7 +192,22 @@ public:
|
||||||
private:
|
private:
|
||||||
/// Iterate over all page indices in a range
|
/// Iterate over all page indices in a range
|
||||||
template <typename Func>
|
template <typename Func>
|
||||||
static void ForEachPage(VAddr addr, size_t size, Func&& func) {
|
static void ForEachCPUPage(VAddr addr, size_t size, Func&& func) {
|
||||||
|
static constexpr bool RETURNS_BOOL = std::is_same_v<std::invoke_result<Func, u64>, bool>;
|
||||||
|
const u64 page_end = (addr + size - 1) >> PAGE_BITS;
|
||||||
|
for (u64 page = addr >> PAGE_BITS; page <= page_end; ++page) {
|
||||||
|
if constexpr (RETURNS_BOOL) {
|
||||||
|
if (func(page)) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
func(page);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename Func>
|
||||||
|
static void ForEachGPUPage(GPUVAddr addr, size_t size, Func&& func) {
|
||||||
static constexpr bool RETURNS_BOOL = std::is_same_v<std::invoke_result<Func, u64>, bool>;
|
static constexpr bool RETURNS_BOOL = std::is_same_v<std::invoke_result<Func, u64>, bool>;
|
||||||
const u64 page_end = (addr + size - 1) >> PAGE_BITS;
|
const u64 page_end = (addr + size - 1) >> PAGE_BITS;
|
||||||
for (u64 page = addr >> PAGE_BITS; page <= page_end; ++page) {
|
for (u64 page = addr >> PAGE_BITS; page <= page_end; ++page) {
|
||||||
|
@ -218,7 +237,7 @@ private:
|
||||||
FramebufferId GetFramebufferId(const RenderTargets& key);
|
FramebufferId GetFramebufferId(const RenderTargets& key);
|
||||||
|
|
||||||
/// Refresh the contents (pixel data) of an image
|
/// Refresh the contents (pixel data) of an image
|
||||||
void RefreshContents(Image& image);
|
void RefreshContents(Image& image, ImageId image_id);
|
||||||
|
|
||||||
/// Upload data from guest to an image
|
/// Upload data from guest to an image
|
||||||
template <typename StagingBuffer>
|
template <typename StagingBuffer>
|
||||||
|
@ -248,8 +267,7 @@ private:
|
||||||
|
|
||||||
/// Return a blit image pair from the given guest blit parameters
|
/// Return a blit image pair from the given guest blit parameters
|
||||||
[[nodiscard]] BlitImages GetBlitImages(const Tegra::Engines::Fermi2D::Surface& dst,
|
[[nodiscard]] BlitImages GetBlitImages(const Tegra::Engines::Fermi2D::Surface& dst,
|
||||||
const Tegra::Engines::Fermi2D::Surface& src,
|
const Tegra::Engines::Fermi2D::Surface& src);
|
||||||
s32 src_address_offset);
|
|
||||||
|
|
||||||
/// Find or create a sampler from a guest descriptor sampler
|
/// Find or create a sampler from a guest descriptor sampler
|
||||||
[[nodiscard]] SamplerId FindSampler(const TSCEntry& config);
|
[[nodiscard]] SamplerId FindSampler(const TSCEntry& config);
|
||||||
|
@ -268,6 +286,16 @@ private:
|
||||||
template <typename Func>
|
template <typename Func>
|
||||||
void ForEachImageInRegion(VAddr cpu_addr, size_t size, Func&& func);
|
void ForEachImageInRegion(VAddr cpu_addr, size_t size, Func&& func);
|
||||||
|
|
||||||
|
template <typename Func>
|
||||||
|
void ForEachImageInRegionGPU(GPUVAddr gpu_addr, size_t size, Func&& func);
|
||||||
|
|
||||||
|
template <typename Func>
|
||||||
|
void ForEachSparseImageInRegion(GPUVAddr gpu_addr, size_t size, Func&& func);
|
||||||
|
|
||||||
|
/// Iterates over all the images in a region calling func
|
||||||
|
template <typename Func>
|
||||||
|
void ForEachSparseSegment(ImageBase& image, Func&& func);
|
||||||
|
|
||||||
/// Find or create an image view in the given image with the passed parameters
|
/// Find or create an image view in the given image with the passed parameters
|
||||||
[[nodiscard]] ImageViewId FindOrEmplaceImageView(ImageId image_id, const ImageViewInfo& info);
|
[[nodiscard]] ImageViewId FindOrEmplaceImageView(ImageId image_id, const ImageViewInfo& info);
|
||||||
|
|
||||||
|
@ -278,10 +306,10 @@ private:
|
||||||
void UnregisterImage(ImageId image);
|
void UnregisterImage(ImageId image);
|
||||||
|
|
||||||
/// Track CPU reads and writes for image
|
/// Track CPU reads and writes for image
|
||||||
void TrackImage(ImageBase& image);
|
void TrackImage(ImageBase& image, ImageId image_id);
|
||||||
|
|
||||||
/// Stop tracking CPU reads and writes for image
|
/// Stop tracking CPU reads and writes for image
|
||||||
void UntrackImage(ImageBase& image);
|
void UntrackImage(ImageBase& image, ImageId image_id);
|
||||||
|
|
||||||
/// Delete image from the cache
|
/// Delete image from the cache
|
||||||
void DeleteImage(ImageId image);
|
void DeleteImage(ImageId image);
|
||||||
|
@ -339,7 +367,13 @@ private:
|
||||||
std::unordered_map<TSCEntry, SamplerId> samplers;
|
std::unordered_map<TSCEntry, SamplerId> samplers;
|
||||||
std::unordered_map<RenderTargets, FramebufferId> framebuffers;
|
std::unordered_map<RenderTargets, FramebufferId> framebuffers;
|
||||||
|
|
||||||
std::unordered_map<u64, std::vector<ImageId>, IdentityHash<u64>> page_table;
|
std::unordered_map<u64, std::vector<ImageMapId>, IdentityHash<u64>> page_table;
|
||||||
|
std::unordered_map<u64, std::vector<ImageId>, IdentityHash<u64>> gpu_page_table;
|
||||||
|
std::unordered_map<u64, std::vector<ImageId>, IdentityHash<u64>> sparse_page_table;
|
||||||
|
|
||||||
|
std::unordered_map<ImageId, std::vector<ImageViewId>> sparse_views;
|
||||||
|
|
||||||
|
VAddr virtual_invalid_space{};
|
||||||
|
|
||||||
bool has_deleted_images = false;
|
bool has_deleted_images = false;
|
||||||
u64 total_used_memory = 0;
|
u64 total_used_memory = 0;
|
||||||
|
@ -348,6 +382,7 @@ private:
|
||||||
u64 critical_memory;
|
u64 critical_memory;
|
||||||
|
|
||||||
SlotVector<Image> slot_images;
|
SlotVector<Image> slot_images;
|
||||||
|
SlotVector<ImageMapView> slot_map_views;
|
||||||
SlotVector<ImageView> slot_image_views;
|
SlotVector<ImageView> slot_image_views;
|
||||||
SlotVector<ImageAlloc> slot_image_allocs;
|
SlotVector<ImageAlloc> slot_image_allocs;
|
||||||
SlotVector<Sampler> slot_samplers;
|
SlotVector<Sampler> slot_samplers;
|
||||||
|
@ -458,7 +493,7 @@ void TextureCache<P>::RunGarbageCollector() {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (True(image->flags & ImageFlagBits::Tracked)) {
|
if (True(image->flags & ImageFlagBits::Tracked)) {
|
||||||
UntrackImage(*image);
|
UntrackImage(*image, image_id);
|
||||||
}
|
}
|
||||||
UnregisterImage(image_id);
|
UnregisterImage(image_id);
|
||||||
DeleteImage(image_id);
|
DeleteImage(image_id);
|
||||||
|
@ -657,7 +692,9 @@ void TextureCache<P>::WriteMemory(VAddr cpu_addr, size_t size) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
image.flags |= ImageFlagBits::CpuModified;
|
image.flags |= ImageFlagBits::CpuModified;
|
||||||
UntrackImage(image);
|
if (True(image.flags & ImageFlagBits::Tracked)) {
|
||||||
|
UntrackImage(image, image_id);
|
||||||
|
}
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -694,18 +731,35 @@ void TextureCache<P>::UnmapMemory(VAddr cpu_addr, size_t size) {
|
||||||
for (const ImageId id : deleted_images) {
|
for (const ImageId id : deleted_images) {
|
||||||
Image& image = slot_images[id];
|
Image& image = slot_images[id];
|
||||||
if (True(image.flags & ImageFlagBits::Tracked)) {
|
if (True(image.flags & ImageFlagBits::Tracked)) {
|
||||||
UntrackImage(image);
|
UntrackImage(image, id);
|
||||||
}
|
}
|
||||||
UnregisterImage(id);
|
UnregisterImage(id);
|
||||||
DeleteImage(id);
|
DeleteImage(id);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template <class P>
|
||||||
|
void TextureCache<P>::UnmapGPUMemory(GPUVAddr gpu_addr, size_t size) {
|
||||||
|
std::vector<ImageId> deleted_images;
|
||||||
|
ForEachImageInRegionGPU(gpu_addr, size,
|
||||||
|
[&](ImageId id, Image&) { deleted_images.push_back(id); });
|
||||||
|
for (const ImageId id : deleted_images) {
|
||||||
|
Image& image = slot_images[id];
|
||||||
|
if (True(image.flags & ImageFlagBits::Remapped)) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
image.flags |= ImageFlagBits::Remapped;
|
||||||
|
if (True(image.flags & ImageFlagBits::Tracked)) {
|
||||||
|
UntrackImage(image, id);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
template <class P>
|
template <class P>
|
||||||
void TextureCache<P>::BlitImage(const Tegra::Engines::Fermi2D::Surface& dst,
|
void TextureCache<P>::BlitImage(const Tegra::Engines::Fermi2D::Surface& dst,
|
||||||
const Tegra::Engines::Fermi2D::Surface& src, s32 src_address_offset,
|
const Tegra::Engines::Fermi2D::Surface& src,
|
||||||
const Tegra::Engines::Fermi2D::Config& copy) {
|
const Tegra::Engines::Fermi2D::Config& copy) {
|
||||||
const BlitImages images = GetBlitImages(dst, src, src_address_offset);
|
const BlitImages images = GetBlitImages(dst, src);
|
||||||
const ImageId dst_id = images.dst_id;
|
const ImageId dst_id = images.dst_id;
|
||||||
const ImageId src_id = images.src_id;
|
const ImageId src_id = images.src_id;
|
||||||
PrepareImage(src_id, false, false);
|
PrepareImage(src_id, false, false);
|
||||||
|
@ -725,7 +779,7 @@ void TextureCache<P>::BlitImage(const Tegra::Engines::Fermi2D::Surface& dst,
|
||||||
Offset2D{.x = copy.src_x1 >> src_samples_x, .y = copy.src_y1 >> src_samples_y},
|
Offset2D{.x = copy.src_x1 >> src_samples_x, .y = copy.src_y1 >> src_samples_y},
|
||||||
};
|
};
|
||||||
|
|
||||||
const std::optional src_base = src_image.TryFindBase(src.Address() + src_address_offset);
|
const std::optional src_base = src_image.TryFindBase(src.Address());
|
||||||
const SubresourceRange src_range{.base = src_base.value(), .extent = {1, 1}};
|
const SubresourceRange src_range{.base = src_base.value(), .extent = {1, 1}};
|
||||||
const ImageViewInfo src_view_info(ImageViewType::e2D, images.src_format, src_range);
|
const ImageViewInfo src_view_info(ImageViewType::e2D, images.src_format, src_range);
|
||||||
const auto [src_framebuffer_id, src_view_id] = RenderTargetFromImage(src_id, src_view_info);
|
const auto [src_framebuffer_id, src_view_id] = RenderTargetFromImage(src_id, src_view_info);
|
||||||
|
@ -793,9 +847,10 @@ typename P::ImageView* TextureCache<P>::TryFindFramebufferImageView(VAddr cpu_ad
|
||||||
if (it == page_table.end()) {
|
if (it == page_table.end()) {
|
||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
const auto& image_ids = it->second;
|
const auto& image_map_ids = it->second;
|
||||||
for (const ImageId image_id : image_ids) {
|
for (const ImageMapId map_id : image_map_ids) {
|
||||||
const ImageBase& image = slot_images[image_id];
|
const ImageMapView& map = slot_map_views[map_id];
|
||||||
|
const ImageBase& image = slot_images[map.image_id];
|
||||||
if (image.cpu_addr != cpu_addr) {
|
if (image.cpu_addr != cpu_addr) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
@ -875,13 +930,13 @@ bool TextureCache<P>::IsRegionGpuModified(VAddr addr, size_t size) {
|
||||||
}
|
}
|
||||||
|
|
||||||
template <class P>
|
template <class P>
|
||||||
void TextureCache<P>::RefreshContents(Image& image) {
|
void TextureCache<P>::RefreshContents(Image& image, ImageId image_id) {
|
||||||
if (False(image.flags & ImageFlagBits::CpuModified)) {
|
if (False(image.flags & ImageFlagBits::CpuModified)) {
|
||||||
// Only upload modified images
|
// Only upload modified images
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
image.flags &= ~ImageFlagBits::CpuModified;
|
image.flags &= ~ImageFlagBits::CpuModified;
|
||||||
TrackImage(image);
|
TrackImage(image, image_id);
|
||||||
|
|
||||||
if (image.info.num_samples > 1) {
|
if (image.info.num_samples > 1) {
|
||||||
LOG_WARNING(HW_GPU, "MSAA image uploads are not implemented");
|
LOG_WARNING(HW_GPU, "MSAA image uploads are not implemented");
|
||||||
|
@ -918,7 +973,7 @@ void TextureCache<P>::UploadImageContents(Image& image, StagingBuffer& staging)
|
||||||
|
|
||||||
template <class P>
|
template <class P>
|
||||||
ImageViewId TextureCache<P>::FindImageView(const TICEntry& config) {
|
ImageViewId TextureCache<P>::FindImageView(const TICEntry& config) {
|
||||||
if (!IsValidAddress(gpu_memory, config)) {
|
if (!IsValidEntry(gpu_memory, config)) {
|
||||||
return NULL_IMAGE_VIEW_ID;
|
return NULL_IMAGE_VIEW_ID;
|
||||||
}
|
}
|
||||||
const auto [pair, is_new] = image_views.try_emplace(config);
|
const auto [pair, is_new] = image_views.try_emplace(config);
|
||||||
|
@ -960,14 +1015,20 @@ ImageId TextureCache<P>::FindOrInsertImage(const ImageInfo& info, GPUVAddr gpu_a
|
||||||
template <class P>
|
template <class P>
|
||||||
ImageId TextureCache<P>::FindImage(const ImageInfo& info, GPUVAddr gpu_addr,
|
ImageId TextureCache<P>::FindImage(const ImageInfo& info, GPUVAddr gpu_addr,
|
||||||
RelaxedOptions options) {
|
RelaxedOptions options) {
|
||||||
const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
|
std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
|
||||||
|
if (!cpu_addr) {
|
||||||
|
cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr, CalculateGuestSizeInBytes(info));
|
||||||
if (!cpu_addr) {
|
if (!cpu_addr) {
|
||||||
return ImageId{};
|
return ImageId{};
|
||||||
}
|
}
|
||||||
|
}
|
||||||
const bool broken_views = runtime.HasBrokenTextureViewFormats();
|
const bool broken_views = runtime.HasBrokenTextureViewFormats();
|
||||||
const bool native_bgr = runtime.HasNativeBgr();
|
const bool native_bgr = runtime.HasNativeBgr();
|
||||||
ImageId image_id;
|
ImageId image_id;
|
||||||
const auto lambda = [&](ImageId existing_image_id, ImageBase& existing_image) {
|
const auto lambda = [&](ImageId existing_image_id, ImageBase& existing_image) {
|
||||||
|
if (True(existing_image.flags & ImageFlagBits::Remapped)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
if (info.type == ImageType::Linear || existing_image.info.type == ImageType::Linear) {
|
if (info.type == ImageType::Linear || existing_image.info.type == ImageType::Linear) {
|
||||||
const bool strict_size = False(options & RelaxedOptions::Size) &&
|
const bool strict_size = False(options & RelaxedOptions::Size) &&
|
||||||
True(existing_image.flags & ImageFlagBits::Strong);
|
True(existing_image.flags & ImageFlagBits::Strong);
|
||||||
|
@ -993,7 +1054,16 @@ ImageId TextureCache<P>::FindImage(const ImageInfo& info, GPUVAddr gpu_addr,
|
||||||
template <class P>
|
template <class P>
|
||||||
ImageId TextureCache<P>::InsertImage(const ImageInfo& info, GPUVAddr gpu_addr,
|
ImageId TextureCache<P>::InsertImage(const ImageInfo& info, GPUVAddr gpu_addr,
|
||||||
RelaxedOptions options) {
|
RelaxedOptions options) {
|
||||||
const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
|
std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
|
||||||
|
if (!cpu_addr) {
|
||||||
|
const auto size = CalculateGuestSizeInBytes(info);
|
||||||
|
cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr, size);
|
||||||
|
if (!cpu_addr) {
|
||||||
|
const VAddr fake_addr = ~(1ULL << 40ULL) + virtual_invalid_space;
|
||||||
|
virtual_invalid_space += Common::AlignUp(size, 32);
|
||||||
|
cpu_addr = std::optional<VAddr>(fake_addr);
|
||||||
|
}
|
||||||
|
}
|
||||||
ASSERT_MSG(cpu_addr, "Tried to insert an image to an invalid gpu_addr=0x{:x}", gpu_addr);
|
ASSERT_MSG(cpu_addr, "Tried to insert an image to an invalid gpu_addr=0x{:x}", gpu_addr);
|
||||||
const ImageId image_id = JoinImages(info, gpu_addr, *cpu_addr);
|
const ImageId image_id = JoinImages(info, gpu_addr, *cpu_addr);
|
||||||
const Image& image = slot_images[image_id];
|
const Image& image = slot_images[image_id];
|
||||||
|
@ -1013,10 +1083,16 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA
|
||||||
const bool broken_views = runtime.HasBrokenTextureViewFormats();
|
const bool broken_views = runtime.HasBrokenTextureViewFormats();
|
||||||
const bool native_bgr = runtime.HasNativeBgr();
|
const bool native_bgr = runtime.HasNativeBgr();
|
||||||
std::vector<ImageId> overlap_ids;
|
std::vector<ImageId> overlap_ids;
|
||||||
|
std::unordered_set<ImageId> overlaps_found;
|
||||||
std::vector<ImageId> left_aliased_ids;
|
std::vector<ImageId> left_aliased_ids;
|
||||||
std::vector<ImageId> right_aliased_ids;
|
std::vector<ImageId> right_aliased_ids;
|
||||||
|
std::unordered_set<ImageId> ignore_textures;
|
||||||
std::vector<ImageId> bad_overlap_ids;
|
std::vector<ImageId> bad_overlap_ids;
|
||||||
ForEachImageInRegion(cpu_addr, size_bytes, [&](ImageId overlap_id, ImageBase& overlap) {
|
const auto region_check = [&](ImageId overlap_id, ImageBase& overlap) {
|
||||||
|
if (True(overlap.flags & ImageFlagBits::Remapped)) {
|
||||||
|
ignore_textures.insert(overlap_id);
|
||||||
|
return;
|
||||||
|
}
|
||||||
if (info.type == ImageType::Linear) {
|
if (info.type == ImageType::Linear) {
|
||||||
if (info.pitch == overlap.info.pitch && gpu_addr == overlap.gpu_addr) {
|
if (info.pitch == overlap.info.pitch && gpu_addr == overlap.gpu_addr) {
|
||||||
// Alias linear images with the same pitch
|
// Alias linear images with the same pitch
|
||||||
|
@ -1024,6 +1100,7 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA
|
||||||
}
|
}
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
overlaps_found.insert(overlap_id);
|
||||||
static constexpr bool strict_size = true;
|
static constexpr bool strict_size = true;
|
||||||
const std::optional<OverlapResult> solution = ResolveOverlap(
|
const std::optional<OverlapResult> solution = ResolveOverlap(
|
||||||
new_info, gpu_addr, cpu_addr, overlap, strict_size, broken_views, native_bgr);
|
new_info, gpu_addr, cpu_addr, overlap, strict_size, broken_views, native_bgr);
|
||||||
|
@ -1047,12 +1124,40 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA
|
||||||
bad_overlap_ids.push_back(overlap_id);
|
bad_overlap_ids.push_back(overlap_id);
|
||||||
overlap.flags |= ImageFlagBits::BadOverlap;
|
overlap.flags |= ImageFlagBits::BadOverlap;
|
||||||
}
|
}
|
||||||
});
|
};
|
||||||
|
ForEachImageInRegion(cpu_addr, size_bytes, region_check);
|
||||||
|
const auto region_check_gpu = [&](ImageId overlap_id, ImageBase& overlap) {
|
||||||
|
if (!overlaps_found.contains(overlap_id)) {
|
||||||
|
if (True(overlap.flags & ImageFlagBits::Remapped)) {
|
||||||
|
ignore_textures.insert(overlap_id);
|
||||||
|
}
|
||||||
|
if (overlap.gpu_addr == gpu_addr && overlap.guest_size_bytes == size_bytes) {
|
||||||
|
ignore_textures.insert(overlap_id);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
ForEachSparseImageInRegion(gpu_addr, size_bytes, region_check_gpu);
|
||||||
const ImageId new_image_id = slot_images.insert(runtime, new_info, gpu_addr, cpu_addr);
|
const ImageId new_image_id = slot_images.insert(runtime, new_info, gpu_addr, cpu_addr);
|
||||||
Image& new_image = slot_images[new_image_id];
|
Image& new_image = slot_images[new_image_id];
|
||||||
|
|
||||||
|
if (!gpu_memory.IsContinousRange(new_image.gpu_addr, new_image.guest_size_bytes)) {
|
||||||
|
new_image.flags |= ImageFlagBits::Sparse;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (const ImageId overlap_id : ignore_textures) {
|
||||||
|
Image& overlap = slot_images[overlap_id];
|
||||||
|
if (True(overlap.flags & ImageFlagBits::GpuModified)) {
|
||||||
|
UNIMPLEMENTED();
|
||||||
|
}
|
||||||
|
if (True(overlap.flags & ImageFlagBits::Tracked)) {
|
||||||
|
UntrackImage(overlap, overlap_id);
|
||||||
|
}
|
||||||
|
UnregisterImage(overlap_id);
|
||||||
|
DeleteImage(overlap_id);
|
||||||
|
}
|
||||||
|
|
||||||
// TODO: Only upload what we need
|
// TODO: Only upload what we need
|
||||||
RefreshContents(new_image);
|
RefreshContents(new_image, new_image_id);
|
||||||
|
|
||||||
for (const ImageId overlap_id : overlap_ids) {
|
for (const ImageId overlap_id : overlap_ids) {
|
||||||
Image& overlap = slot_images[overlap_id];
|
Image& overlap = slot_images[overlap_id];
|
||||||
|
@ -1064,7 +1169,7 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA
|
||||||
runtime.CopyImage(new_image, overlap, copies);
|
runtime.CopyImage(new_image, overlap, copies);
|
||||||
}
|
}
|
||||||
if (True(overlap.flags & ImageFlagBits::Tracked)) {
|
if (True(overlap.flags & ImageFlagBits::Tracked)) {
|
||||||
UntrackImage(overlap);
|
UntrackImage(overlap, overlap_id);
|
||||||
}
|
}
|
||||||
UnregisterImage(overlap_id);
|
UnregisterImage(overlap_id);
|
||||||
DeleteImage(overlap_id);
|
DeleteImage(overlap_id);
|
||||||
|
@ -1092,11 +1197,10 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA
|
||||||
|
|
||||||
template <class P>
|
template <class P>
|
||||||
typename TextureCache<P>::BlitImages TextureCache<P>::GetBlitImages(
|
typename TextureCache<P>::BlitImages TextureCache<P>::GetBlitImages(
|
||||||
const Tegra::Engines::Fermi2D::Surface& dst, const Tegra::Engines::Fermi2D::Surface& src,
|
const Tegra::Engines::Fermi2D::Surface& dst, const Tegra::Engines::Fermi2D::Surface& src) {
|
||||||
s32 src_address_offset) {
|
|
||||||
static constexpr auto FIND_OPTIONS = RelaxedOptions::Format | RelaxedOptions::Samples;
|
static constexpr auto FIND_OPTIONS = RelaxedOptions::Format | RelaxedOptions::Samples;
|
||||||
const GPUVAddr dst_addr = dst.Address();
|
const GPUVAddr dst_addr = dst.Address();
|
||||||
const GPUVAddr src_addr = src.Address() + src_address_offset;
|
const GPUVAddr src_addr = src.Address();
|
||||||
ImageInfo dst_info(dst);
|
ImageInfo dst_info(dst);
|
||||||
ImageInfo src_info(src);
|
ImageInfo src_info(src);
|
||||||
ImageId dst_id;
|
ImageId dst_id;
|
||||||
|
@ -1200,7 +1304,8 @@ void TextureCache<P>::ForEachImageInRegion(VAddr cpu_addr, size_t size, Func&& f
|
||||||
using FuncReturn = typename std::invoke_result<Func, ImageId, Image&>::type;
|
using FuncReturn = typename std::invoke_result<Func, ImageId, Image&>::type;
|
||||||
static constexpr bool BOOL_BREAK = std::is_same_v<FuncReturn, bool>;
|
static constexpr bool BOOL_BREAK = std::is_same_v<FuncReturn, bool>;
|
||||||
boost::container::small_vector<ImageId, 32> images;
|
boost::container::small_vector<ImageId, 32> images;
|
||||||
ForEachPage(cpu_addr, size, [this, &images, cpu_addr, size, func](u64 page) {
|
boost::container::small_vector<ImageMapId, 32> maps;
|
||||||
|
ForEachCPUPage(cpu_addr, size, [this, &images, &maps, cpu_addr, size, func](u64 page) {
|
||||||
const auto it = page_table.find(page);
|
const auto it = page_table.find(page);
|
||||||
if (it == page_table.end()) {
|
if (it == page_table.end()) {
|
||||||
if constexpr (BOOL_BREAK) {
|
if constexpr (BOOL_BREAK) {
|
||||||
|
@ -1209,12 +1314,63 @@ void TextureCache<P>::ForEachImageInRegion(VAddr cpu_addr, size_t size, Func&& f
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
for (const ImageMapId map_id : it->second) {
|
||||||
|
ImageMapView& map = slot_map_views[map_id];
|
||||||
|
if (map.picked) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if (!map.Overlaps(cpu_addr, size)) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
map.picked = true;
|
||||||
|
maps.push_back(map_id);
|
||||||
|
Image& image = slot_images[map.image_id];
|
||||||
|
if (True(image.flags & ImageFlagBits::Picked)) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
image.flags |= ImageFlagBits::Picked;
|
||||||
|
images.push_back(map.image_id);
|
||||||
|
if constexpr (BOOL_BREAK) {
|
||||||
|
if (func(map.image_id, image)) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
func(map.image_id, image);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if constexpr (BOOL_BREAK) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
});
|
||||||
|
for (const ImageId image_id : images) {
|
||||||
|
slot_images[image_id].flags &= ~ImageFlagBits::Picked;
|
||||||
|
}
|
||||||
|
for (const ImageMapId map_id : maps) {
|
||||||
|
slot_map_views[map_id].picked = false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
template <class P>
|
||||||
|
template <typename Func>
|
||||||
|
void TextureCache<P>::ForEachImageInRegionGPU(GPUVAddr gpu_addr, size_t size, Func&& func) {
|
||||||
|
using FuncReturn = typename std::invoke_result<Func, ImageId, Image&>::type;
|
||||||
|
static constexpr bool BOOL_BREAK = std::is_same_v<FuncReturn, bool>;
|
||||||
|
boost::container::small_vector<ImageId, 8> images;
|
||||||
|
ForEachGPUPage(gpu_addr, size, [this, &images, gpu_addr, size, func](u64 page) {
|
||||||
|
const auto it = gpu_page_table.find(page);
|
||||||
|
if (it == gpu_page_table.end()) {
|
||||||
|
if constexpr (BOOL_BREAK) {
|
||||||
|
return false;
|
||||||
|
} else {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
for (const ImageId image_id : it->second) {
|
for (const ImageId image_id : it->second) {
|
||||||
Image& image = slot_images[image_id];
|
Image& image = slot_images[image_id];
|
||||||
if (True(image.flags & ImageFlagBits::Picked)) {
|
if (True(image.flags & ImageFlagBits::Picked)) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
if (!image.Overlaps(cpu_addr, size)) {
|
if (!image.OverlapsGPU(gpu_addr, size)) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
image.flags |= ImageFlagBits::Picked;
|
image.flags |= ImageFlagBits::Picked;
|
||||||
|
@ -1236,6 +1392,69 @@ void TextureCache<P>::ForEachImageInRegion(VAddr cpu_addr, size_t size, Func&& f
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template <class P>
|
||||||
|
template <typename Func>
|
||||||
|
void TextureCache<P>::ForEachSparseImageInRegion(GPUVAddr gpu_addr, size_t size, Func&& func) {
|
||||||
|
using FuncReturn = typename std::invoke_result<Func, ImageId, Image&>::type;
|
||||||
|
static constexpr bool BOOL_BREAK = std::is_same_v<FuncReturn, bool>;
|
||||||
|
boost::container::small_vector<ImageId, 8> images;
|
||||||
|
ForEachGPUPage(gpu_addr, size, [this, &images, gpu_addr, size, func](u64 page) {
|
||||||
|
const auto it = sparse_page_table.find(page);
|
||||||
|
if (it == sparse_page_table.end()) {
|
||||||
|
if constexpr (BOOL_BREAK) {
|
||||||
|
return false;
|
||||||
|
} else {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for (const ImageId image_id : it->second) {
|
||||||
|
Image& image = slot_images[image_id];
|
||||||
|
if (True(image.flags & ImageFlagBits::Picked)) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if (!image.OverlapsGPU(gpu_addr, size)) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
image.flags |= ImageFlagBits::Picked;
|
||||||
|
images.push_back(image_id);
|
||||||
|
if constexpr (BOOL_BREAK) {
|
||||||
|
if (func(image_id, image)) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
func(image_id, image);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if constexpr (BOOL_BREAK) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
});
|
||||||
|
for (const ImageId image_id : images) {
|
||||||
|
slot_images[image_id].flags &= ~ImageFlagBits::Picked;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
template <class P>
|
||||||
|
template <typename Func>
|
||||||
|
void TextureCache<P>::ForEachSparseSegment(ImageBase& image, Func&& func) {
|
||||||
|
using FuncReturn = typename std::invoke_result<Func, GPUVAddr, VAddr, size_t>::type;
|
||||||
|
static constexpr bool RETURNS_BOOL = std::is_same_v<FuncReturn, bool>;
|
||||||
|
const auto segments = gpu_memory.GetSubmappedRange(image.gpu_addr, image.guest_size_bytes);
|
||||||
|
for (auto& segment : segments) {
|
||||||
|
const auto gpu_addr = segment.first;
|
||||||
|
const auto size = segment.second;
|
||||||
|
std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
|
||||||
|
ASSERT(cpu_addr);
|
||||||
|
if constexpr (RETURNS_BOOL) {
|
||||||
|
if (func(gpu_addr, *cpu_addr, size)) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
func(gpu_addr, *cpu_addr, size);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
template <class P>
|
template <class P>
|
||||||
ImageViewId TextureCache<P>::FindOrEmplaceImageView(ImageId image_id, const ImageViewInfo& info) {
|
ImageViewId TextureCache<P>::FindOrEmplaceImageView(ImageId image_id, const ImageViewInfo& info) {
|
||||||
Image& image = slot_images[image_id];
|
Image& image = slot_images[image_id];
|
||||||
|
@ -1253,8 +1472,6 @@ void TextureCache<P>::RegisterImage(ImageId image_id) {
|
||||||
ASSERT_MSG(False(image.flags & ImageFlagBits::Registered),
|
ASSERT_MSG(False(image.flags & ImageFlagBits::Registered),
|
||||||
"Trying to register an already registered image");
|
"Trying to register an already registered image");
|
||||||
image.flags |= ImageFlagBits::Registered;
|
image.flags |= ImageFlagBits::Registered;
|
||||||
ForEachPage(image.cpu_addr, image.guest_size_bytes,
|
|
||||||
[this, image_id](u64 page) { page_table[page].push_back(image_id); });
|
|
||||||
u64 tentative_size = std::max(image.guest_size_bytes, image.unswizzled_size_bytes);
|
u64 tentative_size = std::max(image.guest_size_bytes, image.unswizzled_size_bytes);
|
||||||
if ((IsPixelFormatASTC(image.info.format) &&
|
if ((IsPixelFormatASTC(image.info.format) &&
|
||||||
True(image.flags & ImageFlagBits::AcceleratedUpload)) ||
|
True(image.flags & ImageFlagBits::AcceleratedUpload)) ||
|
||||||
|
@ -1262,6 +1479,27 @@ void TextureCache<P>::RegisterImage(ImageId image_id) {
|
||||||
tentative_size = EstimatedDecompressedSize(tentative_size, image.info.format);
|
tentative_size = EstimatedDecompressedSize(tentative_size, image.info.format);
|
||||||
}
|
}
|
||||||
total_used_memory += Common::AlignUp(tentative_size, 1024);
|
total_used_memory += Common::AlignUp(tentative_size, 1024);
|
||||||
|
ForEachGPUPage(image.gpu_addr, image.guest_size_bytes,
|
||||||
|
[this, image_id](u64 page) { gpu_page_table[page].push_back(image_id); });
|
||||||
|
if (False(image.flags & ImageFlagBits::Sparse)) {
|
||||||
|
auto map_id =
|
||||||
|
slot_map_views.insert(image.gpu_addr, image.cpu_addr, image.guest_size_bytes, image_id);
|
||||||
|
ForEachCPUPage(image.cpu_addr, image.guest_size_bytes,
|
||||||
|
[this, map_id](u64 page) { page_table[page].push_back(map_id); });
|
||||||
|
image.map_view_id = map_id;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
std::vector<ImageViewId> sparse_maps{};
|
||||||
|
ForEachSparseSegment(
|
||||||
|
image, [this, image_id, &sparse_maps](GPUVAddr gpu_addr, VAddr cpu_addr, size_t size) {
|
||||||
|
auto map_id = slot_map_views.insert(gpu_addr, cpu_addr, size, image_id);
|
||||||
|
ForEachCPUPage(cpu_addr, size,
|
||||||
|
[this, map_id](u64 page) { page_table[page].push_back(map_id); });
|
||||||
|
sparse_maps.push_back(map_id);
|
||||||
|
});
|
||||||
|
sparse_views.emplace(image_id, std::move(sparse_maps));
|
||||||
|
ForEachGPUPage(image.gpu_addr, image.guest_size_bytes,
|
||||||
|
[this, image_id](u64 page) { sparse_page_table[page].push_back(image_id); });
|
||||||
}
|
}
|
||||||
|
|
||||||
template <class P>
|
template <class P>
|
||||||
|
@ -1278,34 +1516,125 @@ void TextureCache<P>::UnregisterImage(ImageId image_id) {
|
||||||
tentative_size = EstimatedDecompressedSize(tentative_size, image.info.format);
|
tentative_size = EstimatedDecompressedSize(tentative_size, image.info.format);
|
||||||
}
|
}
|
||||||
total_used_memory -= Common::AlignUp(tentative_size, 1024);
|
total_used_memory -= Common::AlignUp(tentative_size, 1024);
|
||||||
ForEachPage(image.cpu_addr, image.guest_size_bytes, [this, image_id](u64 page) {
|
const auto& clear_page_table =
|
||||||
const auto page_it = page_table.find(page);
|
[this, image_id](
|
||||||
if (page_it == page_table.end()) {
|
u64 page,
|
||||||
|
std::unordered_map<u64, std::vector<ImageId>, IdentityHash<u64>>& selected_page_table) {
|
||||||
|
const auto page_it = selected_page_table.find(page);
|
||||||
|
if (page_it == selected_page_table.end()) {
|
||||||
UNREACHABLE_MSG("Unregistering unregistered page=0x{:x}", page << PAGE_BITS);
|
UNREACHABLE_MSG("Unregistering unregistered page=0x{:x}", page << PAGE_BITS);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
std::vector<ImageId>& image_ids = page_it->second;
|
std::vector<ImageId>& image_ids = page_it->second;
|
||||||
const auto vector_it = std::ranges::find(image_ids, image_id);
|
const auto vector_it = std::ranges::find(image_ids, image_id);
|
||||||
if (vector_it == image_ids.end()) {
|
if (vector_it == image_ids.end()) {
|
||||||
UNREACHABLE_MSG("Unregistering unregistered image in page=0x{:x}", page << PAGE_BITS);
|
UNREACHABLE_MSG("Unregistering unregistered image in page=0x{:x}",
|
||||||
|
page << PAGE_BITS);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
image_ids.erase(vector_it);
|
image_ids.erase(vector_it);
|
||||||
|
};
|
||||||
|
ForEachGPUPage(image.gpu_addr, image.guest_size_bytes,
|
||||||
|
[this, &clear_page_table](u64 page) { clear_page_table(page, gpu_page_table); });
|
||||||
|
if (False(image.flags & ImageFlagBits::Sparse)) {
|
||||||
|
const auto map_id = image.map_view_id;
|
||||||
|
ForEachCPUPage(image.cpu_addr, image.guest_size_bytes, [this, map_id](u64 page) {
|
||||||
|
const auto page_it = page_table.find(page);
|
||||||
|
if (page_it == page_table.end()) {
|
||||||
|
UNREACHABLE_MSG("Unregistering unregistered page=0x{:x}", page << PAGE_BITS);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
std::vector<ImageMapId>& image_map_ids = page_it->second;
|
||||||
|
const auto vector_it = std::ranges::find(image_map_ids, map_id);
|
||||||
|
if (vector_it == image_map_ids.end()) {
|
||||||
|
UNREACHABLE_MSG("Unregistering unregistered image in page=0x{:x}",
|
||||||
|
page << PAGE_BITS);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
image_map_ids.erase(vector_it);
|
||||||
|
});
|
||||||
|
slot_map_views.erase(map_id);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
ForEachGPUPage(image.gpu_addr, image.guest_size_bytes, [this, &clear_page_table](u64 page) {
|
||||||
|
clear_page_table(page, sparse_page_table);
|
||||||
|
});
|
||||||
|
auto it = sparse_views.find(image_id);
|
||||||
|
ASSERT(it != sparse_views.end());
|
||||||
|
auto& sparse_maps = it->second;
|
||||||
|
for (auto& map_view_id : sparse_maps) {
|
||||||
|
const auto& map_range = slot_map_views[map_view_id];
|
||||||
|
const VAddr cpu_addr = map_range.cpu_addr;
|
||||||
|
const std::size_t size = map_range.size;
|
||||||
|
ForEachCPUPage(cpu_addr, size, [this, image_id](u64 page) {
|
||||||
|
const auto page_it = page_table.find(page);
|
||||||
|
if (page_it == page_table.end()) {
|
||||||
|
UNREACHABLE_MSG("Unregistering unregistered page=0x{:x}", page << PAGE_BITS);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
std::vector<ImageMapId>& image_map_ids = page_it->second;
|
||||||
|
auto vector_it = image_map_ids.begin();
|
||||||
|
while (vector_it != image_map_ids.end()) {
|
||||||
|
ImageMapView& map = slot_map_views[*vector_it];
|
||||||
|
if (map.image_id != image_id) {
|
||||||
|
vector_it++;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if (!map.picked) {
|
||||||
|
map.picked = true;
|
||||||
|
}
|
||||||
|
vector_it = image_map_ids.erase(vector_it);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
slot_map_views.erase(map_view_id);
|
||||||
|
}
|
||||||
|
sparse_views.erase(it);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <class P>
|
||||||
|
void TextureCache<P>::TrackImage(ImageBase& image, ImageId image_id) {
|
||||||
|
ASSERT(False(image.flags & ImageFlagBits::Tracked));
|
||||||
|
image.flags |= ImageFlagBits::Tracked;
|
||||||
|
if (False(image.flags & ImageFlagBits::Sparse)) {
|
||||||
|
rasterizer.UpdatePagesCachedCount(image.cpu_addr, image.guest_size_bytes, 1);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if (True(image.flags & ImageFlagBits::Registered)) {
|
||||||
|
auto it = sparse_views.find(image_id);
|
||||||
|
ASSERT(it != sparse_views.end());
|
||||||
|
auto& sparse_maps = it->second;
|
||||||
|
for (auto& map_view_id : sparse_maps) {
|
||||||
|
const auto& map = slot_map_views[map_view_id];
|
||||||
|
const VAddr cpu_addr = map.cpu_addr;
|
||||||
|
const std::size_t size = map.size;
|
||||||
|
rasterizer.UpdatePagesCachedCount(cpu_addr, size, 1);
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
ForEachSparseSegment(image,
|
||||||
|
[this]([[maybe_unused]] GPUVAddr gpu_addr, VAddr cpu_addr, size_t size) {
|
||||||
|
rasterizer.UpdatePagesCachedCount(cpu_addr, size, 1);
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
template <class P>
|
template <class P>
|
||||||
void TextureCache<P>::TrackImage(ImageBase& image) {
|
void TextureCache<P>::UntrackImage(ImageBase& image, ImageId image_id) {
|
||||||
ASSERT(False(image.flags & ImageFlagBits::Tracked));
|
|
||||||
image.flags |= ImageFlagBits::Tracked;
|
|
||||||
rasterizer.UpdatePagesCachedCount(image.cpu_addr, image.guest_size_bytes, 1);
|
|
||||||
}
|
|
||||||
|
|
||||||
template <class P>
|
|
||||||
void TextureCache<P>::UntrackImage(ImageBase& image) {
|
|
||||||
ASSERT(True(image.flags & ImageFlagBits::Tracked));
|
ASSERT(True(image.flags & ImageFlagBits::Tracked));
|
||||||
image.flags &= ~ImageFlagBits::Tracked;
|
image.flags &= ~ImageFlagBits::Tracked;
|
||||||
|
if (False(image.flags & ImageFlagBits::Sparse)) {
|
||||||
rasterizer.UpdatePagesCachedCount(image.cpu_addr, image.guest_size_bytes, -1);
|
rasterizer.UpdatePagesCachedCount(image.cpu_addr, image.guest_size_bytes, -1);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
ASSERT(True(image.flags & ImageFlagBits::Registered));
|
||||||
|
auto it = sparse_views.find(image_id);
|
||||||
|
ASSERT(it != sparse_views.end());
|
||||||
|
auto& sparse_maps = it->second;
|
||||||
|
for (auto& map_view_id : sparse_maps) {
|
||||||
|
const auto& map = slot_map_views[map_view_id];
|
||||||
|
const VAddr cpu_addr = map.cpu_addr;
|
||||||
|
const std::size_t size = map.size;
|
||||||
|
rasterizer.UpdatePagesCachedCount(cpu_addr, size, -1);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template <class P>
|
template <class P>
|
||||||
|
@ -1447,10 +1776,10 @@ void TextureCache<P>::PrepareImage(ImageId image_id, bool is_modification, bool
|
||||||
if (invalidate) {
|
if (invalidate) {
|
||||||
image.flags &= ~(ImageFlagBits::CpuModified | ImageFlagBits::GpuModified);
|
image.flags &= ~(ImageFlagBits::CpuModified | ImageFlagBits::GpuModified);
|
||||||
if (False(image.flags & ImageFlagBits::Tracked)) {
|
if (False(image.flags & ImageFlagBits::Tracked)) {
|
||||||
TrackImage(image);
|
TrackImage(image, image_id);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
RefreshContents(image);
|
RefreshContents(image, image_id);
|
||||||
SynchronizeAliases(image_id);
|
SynchronizeAliases(image_id);
|
||||||
}
|
}
|
||||||
if (is_modification) {
|
if (is_modification) {
|
||||||
|
|
|
@ -16,6 +16,7 @@ constexpr size_t MAX_MIP_LEVELS = 14;
|
||||||
constexpr SlotId CORRUPT_ID{0xfffffffe};
|
constexpr SlotId CORRUPT_ID{0xfffffffe};
|
||||||
|
|
||||||
using ImageId = SlotId;
|
using ImageId = SlotId;
|
||||||
|
using ImageMapId = SlotId;
|
||||||
using ImageViewId = SlotId;
|
using ImageViewId = SlotId;
|
||||||
using ImageAllocId = SlotId;
|
using ImageAllocId = SlotId;
|
||||||
using SamplerId = SlotId;
|
using SamplerId = SlotId;
|
||||||
|
@ -132,8 +133,8 @@ struct BufferImageCopy {
|
||||||
};
|
};
|
||||||
|
|
||||||
struct BufferCopy {
|
struct BufferCopy {
|
||||||
size_t src_offset;
|
u64 src_offset;
|
||||||
size_t dst_offset;
|
u64 dst_offset;
|
||||||
size_t size;
|
size_t size;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
@ -664,6 +664,16 @@ LevelArray CalculateMipLevelOffsets(const ImageInfo& info) noexcept {
|
||||||
return offsets;
|
return offsets;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
LevelArray CalculateMipLevelSizes(const ImageInfo& info) noexcept {
|
||||||
|
const u32 num_levels = info.resources.levels;
|
||||||
|
const LevelInfo level_info = MakeLevelInfo(info);
|
||||||
|
LevelArray sizes{};
|
||||||
|
for (u32 level = 0; level < num_levels; ++level) {
|
||||||
|
sizes[level] = CalculateLevelSize(level_info, level);
|
||||||
|
}
|
||||||
|
return sizes;
|
||||||
|
}
|
||||||
|
|
||||||
std::vector<u32> CalculateSliceOffsets(const ImageInfo& info) {
|
std::vector<u32> CalculateSliceOffsets(const ImageInfo& info) {
|
||||||
ASSERT(info.type == ImageType::e3D);
|
ASSERT(info.type == ImageType::e3D);
|
||||||
std::vector<u32> offsets;
|
std::vector<u32> offsets;
|
||||||
|
@ -776,14 +786,20 @@ std::vector<ImageCopy> MakeShrinkImageCopies(const ImageInfo& dst, const ImageIn
|
||||||
return copies;
|
return copies;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool IsValidAddress(const Tegra::MemoryManager& gpu_memory, const TICEntry& config) {
|
bool IsValidEntry(const Tegra::MemoryManager& gpu_memory, const TICEntry& config) {
|
||||||
if (config.Address() == 0) {
|
const GPUVAddr address = config.Address();
|
||||||
|
if (address == 0) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
if (config.Address() > (u64(1) << 48)) {
|
if (address > (1ULL << 48)) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
return gpu_memory.GpuToCpuAddress(config.Address()).has_value();
|
if (gpu_memory.GpuToCpuAddress(address).has_value()) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
const ImageInfo info{config};
|
||||||
|
const size_t guest_size_bytes = CalculateGuestSizeInBytes(info);
|
||||||
|
return gpu_memory.GpuToCpuAddress(address, guest_size_bytes).has_value();
|
||||||
}
|
}
|
||||||
|
|
||||||
std::vector<BufferImageCopy> UnswizzleImage(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr,
|
std::vector<BufferImageCopy> UnswizzleImage(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr,
|
||||||
|
|
|
@ -40,6 +40,8 @@ struct OverlapResult {
|
||||||
|
|
||||||
[[nodiscard]] LevelArray CalculateMipLevelOffsets(const ImageInfo& info) noexcept;
|
[[nodiscard]] LevelArray CalculateMipLevelOffsets(const ImageInfo& info) noexcept;
|
||||||
|
|
||||||
|
[[nodiscard]] LevelArray CalculateMipLevelSizes(const ImageInfo& info) noexcept;
|
||||||
|
|
||||||
[[nodiscard]] std::vector<u32> CalculateSliceOffsets(const ImageInfo& info);
|
[[nodiscard]] std::vector<u32> CalculateSliceOffsets(const ImageInfo& info);
|
||||||
|
|
||||||
[[nodiscard]] std::vector<SubresourceBase> CalculateSliceSubresources(const ImageInfo& info);
|
[[nodiscard]] std::vector<SubresourceBase> CalculateSliceSubresources(const ImageInfo& info);
|
||||||
|
@ -55,7 +57,7 @@ struct OverlapResult {
|
||||||
const ImageInfo& src,
|
const ImageInfo& src,
|
||||||
SubresourceBase base);
|
SubresourceBase base);
|
||||||
|
|
||||||
[[nodiscard]] bool IsValidAddress(const Tegra::MemoryManager& gpu_memory, const TICEntry& config);
|
[[nodiscard]] bool IsValidEntry(const Tegra::MemoryManager& gpu_memory, const TICEntry& config);
|
||||||
|
|
||||||
[[nodiscard]] std::vector<BufferImageCopy> UnswizzleImage(Tegra::MemoryManager& gpu_memory,
|
[[nodiscard]] std::vector<BufferImageCopy> UnswizzleImage(Tegra::MemoryManager& gpu_memory,
|
||||||
GPUVAddr gpu_addr, const ImageInfo& info,
|
GPUVAddr gpu_addr, const ImageInfo& info,
|
||||||
|
|
Loading…
Reference in a new issue