From 4ba24324d2e0fc2a8d211f30a78bfc45659e0720 Mon Sep 17 00:00:00 2001 From: pineappleEA Date: Thu, 6 Jul 2023 09:05:01 +0200 Subject: [PATCH] early-access version 3743 --- README.md | 2 +- src/audio_core/device/device_session.cpp | 6 +- .../renderer/command/data_source/decode.cpp | 21 +- .../renderer/command/effect/aux_.cpp | 82 +++++-- src/common/page_table.cpp | 1 - src/common/page_table.h | 1 - src/common/scratch_buffer.h | 17 +- src/core/core_timing.cpp | 3 +- src/core/core_timing.h | 2 +- src/core/hle/service/hle_ipc.cpp | 32 +-- src/core/memory.cpp | 54 +---- src/core/memory.h | 212 ------------------ src/video_core/buffer_cache/buffer_cache.h | 7 +- src/video_core/dma_pusher.cpp | 28 ++- src/video_core/engines/engine_upload.cpp | 28 +-- src/video_core/engines/kepler_compute.cpp | 1 + src/video_core/engines/maxwell_3d.cpp | 4 +- src/video_core/engines/maxwell_dma.cpp | 85 +++---- src/video_core/engines/sw_blitter/blitter.cpp | 29 +-- src/video_core/memory_manager.cpp | 30 +-- src/video_core/memory_manager.h | 18 -- src/video_core/texture_cache/texture_cache.h | 24 +- src/video_core/texture_cache/util.cpp | 26 ++- src/video_core/texture_cache/util.h | 3 + 24 files changed, 237 insertions(+), 479 deletions(-) diff --git a/README.md b/README.md index d879d24f9..5c2afbe17 100755 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ yuzu emulator early access ============= -This is the source code for early-access 3742. +This is the source code for early-access 3743. ## Legal Notice diff --git a/src/audio_core/device/device_session.cpp b/src/audio_core/device/device_session.cpp index 141eb6928..e1d66ccd6 100755 --- a/src/audio_core/device/device_session.cpp +++ b/src/audio_core/device/device_session.cpp @@ -92,9 +92,9 @@ void DeviceSession::AppendBuffers(std::span buffers) { if (type == Sink::StreamType::In) { stream->AppendBuffer(new_buffer, tmp_samples); } else { - Core::Memory::CpuGuestMemory samples( - system.ApplicationMemory(), buffer.samples, buffer.size / sizeof(s16)); - stream->AppendBuffer(new_buffer, samples); + system.ApplicationMemory().ReadBlockUnsafe(buffer.samples, tmp_samples.data(), + buffer.size); + stream->AppendBuffer(new_buffer, tmp_samples); } } } diff --git a/src/audio_core/renderer/command/data_source/decode.cpp b/src/audio_core/renderer/command/data_source/decode.cpp index fd35571ac..19bbbc313 100755 --- a/src/audio_core/renderer/command/data_source/decode.cpp +++ b/src/audio_core/renderer/command/data_source/decode.cpp @@ -28,6 +28,7 @@ constexpr std::array PitchBySrcQuality = {4, 8, 4}; template static u32 DecodePcm(Core::Memory::Memory& memory, std::span out_buffer, const DecodeArg& req) { + std::array tmp_samples{}; constexpr s32 min{std::numeric_limits::min()}; constexpr s32 max{std::numeric_limits::max()}; @@ -48,18 +49,19 @@ static u32 DecodePcm(Core::Memory::Memory& memory, std::span out_buffer, const VAddr source{req.buffer + (((req.start_offset + req.offset) * channel_count) * sizeof(T))}; const u64 size{channel_count * samples_to_decode}; + const u64 size_bytes{size * sizeof(T)}; + + memory.ReadBlockUnsafe(source, tmp_samples.data(), size_bytes); - Core::Memory::CpuGuestMemory samples( - memory, source, size); if constexpr (std::is_floating_point_v) { for (u32 i = 0; i < samples_to_decode; i++) { - auto sample{static_cast(samples[i * channel_count + req.target_channel] * + auto sample{static_cast(tmp_samples[i * channel_count + req.target_channel] * std::numeric_limits::max())}; out_buffer[i] = static_cast(std::clamp(sample, min, max)); } } else { for (u32 i = 0; i < samples_to_decode; i++) { - out_buffer[i] = samples[i * channel_count + req.target_channel]; + out_buffer[i] = tmp_samples[i * channel_count + req.target_channel]; } } } break; @@ -72,17 +74,16 @@ static u32 DecodePcm(Core::Memory::Memory& memory, std::span out_buffer, } const VAddr source{req.buffer + ((req.start_offset + req.offset) * sizeof(T))}; - Core::Memory::CpuGuestMemory samples( - memory, source, samples_to_decode); + memory.ReadBlockUnsafe(source, tmp_samples.data(), samples_to_decode * sizeof(T)); if constexpr (std::is_floating_point_v) { for (u32 i = 0; i < samples_to_decode; i++) { - auto sample{static_cast(samples[i * channel_count + req.target_channel] * + auto sample{static_cast(tmp_samples[i * channel_count + req.target_channel] * std::numeric_limits::max())}; out_buffer[i] = static_cast(std::clamp(sample, min, max)); } } else { - std::memcpy(out_buffer.data(), samples.data(), samples_to_decode * sizeof(s16)); + std::memcpy(out_buffer.data(), tmp_samples.data(), samples_to_decode * sizeof(s16)); } break; } @@ -100,6 +101,7 @@ static u32 DecodePcm(Core::Memory::Memory& memory, std::span out_buffer, */ static u32 DecodeAdpcm(Core::Memory::Memory& memory, std::span out_buffer, const DecodeArg& req) { + std::array wavebuffer{}; constexpr u32 SamplesPerFrame{14}; constexpr u32 NibblesPerFrame{16}; @@ -137,8 +139,7 @@ static u32 DecodeAdpcm(Core::Memory::Memory& memory, std::span out_buffer, } const auto size{std::max((samples_to_process / 8U) * SamplesPerFrame, 8U)}; - Core::Memory::CpuGuestMemory wavebuffer( - memory, req.buffer + position_in_frame / 2, size); + memory.ReadBlockUnsafe(req.buffer + position_in_frame / 2, wavebuffer.data(), size); auto context{req.adpcm_context}; auto header{context->header}; diff --git a/src/audio_core/renderer/command/effect/aux_.cpp b/src/audio_core/renderer/command/effect/aux_.cpp index 03f1c6b42..e487feae0 100755 --- a/src/audio_core/renderer/command/effect/aux_.cpp +++ b/src/audio_core/renderer/command/effect/aux_.cpp @@ -21,13 +21,23 @@ static void ResetAuxBufferDsp(Core::Memory::Memory& memory, const CpuAddr aux_in } AuxInfo::AuxInfoDsp info{}; - memory.ReadBlockUnsafe(aux_info, &info, sizeof(AuxInfo::AuxInfoDsp)); + auto info_ptr{&info}; + bool host_safe{(aux_info & Core::Memory::YUZU_PAGEMASK) <= + (Core::Memory::YUZU_PAGESIZE - sizeof(AuxInfo::AuxInfoDsp))}; - info.read_offset = 0; - info.write_offset = 0; - info.total_sample_count = 0; + if (host_safe) [[likely]] { + info_ptr = memory.GetPointer(aux_info); + } else { + memory.ReadBlockUnsafe(aux_info, info_ptr, sizeof(AuxInfo::AuxInfoDsp)); + } - memory.WriteBlockUnsafe(aux_info, &info, sizeof(AuxInfo::AuxInfoDsp)); + info_ptr->read_offset = 0; + info_ptr->write_offset = 0; + info_ptr->total_sample_count = 0; + + if (!host_safe) [[unlikely]] { + memory.WriteBlockUnsafe(aux_info, info_ptr, sizeof(AuxInfo::AuxInfoDsp)); + } } /** @@ -76,9 +86,17 @@ static u32 WriteAuxBufferDsp(Core::Memory::Memory& memory, CpuAddr send_info_, } AuxInfo::AuxInfoDsp send_info{}; - memory.ReadBlockUnsafe(send_info_, &send_info, sizeof(AuxInfo::AuxInfoDsp)); + auto send_ptr = &send_info; + bool host_safe = (send_info_ & Core::Memory::YUZU_PAGEMASK) <= + (Core::Memory::YUZU_PAGESIZE - sizeof(AuxInfo::AuxInfoDsp)); - u32 target_write_offset{send_info.write_offset + write_offset}; + if (host_safe) [[likely]] { + send_ptr = memory.GetPointer(send_info_); + } else { + memory.ReadBlockUnsafe(send_info_, send_ptr, sizeof(AuxInfo::AuxInfoDsp)); + } + + u32 target_write_offset{send_ptr->write_offset + write_offset}; if (target_write_offset > count_max) { return 0; } @@ -87,9 +105,15 @@ static u32 WriteAuxBufferDsp(Core::Memory::Memory& memory, CpuAddr send_info_, u32 read_pos{0}; while (write_count > 0) { u32 to_write{std::min(count_max - target_write_offset, write_count)}; - if (to_write > 0) { - const auto write_addr = send_buffer + target_write_offset * sizeof(s32); - memory.WriteBlockUnsafe(write_addr, &input[read_pos], to_write * sizeof(s32)); + const auto write_addr = send_buffer + target_write_offset * sizeof(s32); + bool write_safe{(write_addr & Core::Memory::YUZU_PAGEMASK) <= + (Core::Memory::YUZU_PAGESIZE - (write_addr + to_write * sizeof(s32)))}; + if (write_safe) [[likely]] { + auto ptr = memory.GetPointer(write_addr); + std::memcpy(ptr, &input[read_pos], to_write * sizeof(s32)); + } else { + memory.WriteBlockUnsafe(send_buffer + target_write_offset * sizeof(s32), + &input[read_pos], to_write * sizeof(s32)); } target_write_offset = (target_write_offset + to_write) % count_max; write_count -= to_write; @@ -97,10 +121,13 @@ static u32 WriteAuxBufferDsp(Core::Memory::Memory& memory, CpuAddr send_info_, } if (update_count) { - send_info.write_offset = (send_info.write_offset + update_count) % count_max; + send_ptr->write_offset = (send_ptr->write_offset + update_count) % count_max; + } + + if (!host_safe) [[unlikely]] { + memory.WriteBlockUnsafe(send_info_, send_ptr, sizeof(AuxInfo::AuxInfoDsp)); } - memory.WriteBlockUnsafe(send_info_, &send_info, sizeof(AuxInfo::AuxInfoDsp)); return write_count_; } @@ -147,9 +174,17 @@ static u32 ReadAuxBufferDsp(Core::Memory::Memory& memory, CpuAddr return_info_, } AuxInfo::AuxInfoDsp return_info{}; - memory.ReadBlockUnsafe(return_info_, &return_info, sizeof(AuxInfo::AuxInfoDsp)); + auto return_ptr = &return_info; + bool host_safe = (return_info_ & Core::Memory::YUZU_PAGEMASK) <= + (Core::Memory::YUZU_PAGESIZE - sizeof(AuxInfo::AuxInfoDsp)); - u32 target_read_offset{return_info.read_offset + read_offset}; + if (host_safe) [[likely]] { + return_ptr = memory.GetPointer(return_info_); + } else { + memory.ReadBlockUnsafe(return_info_, return_ptr, sizeof(AuxInfo::AuxInfoDsp)); + } + + u32 target_read_offset{return_ptr->read_offset + read_offset}; if (target_read_offset > count_max) { return 0; } @@ -158,9 +193,15 @@ static u32 ReadAuxBufferDsp(Core::Memory::Memory& memory, CpuAddr return_info_, u32 write_pos{0}; while (read_count > 0) { u32 to_read{std::min(count_max - target_read_offset, read_count)}; - if (to_read > 0) { - const auto read_addr = return_buffer + target_read_offset * sizeof(s32); - memory.ReadBlockUnsafe(read_addr, &output[write_pos], to_read * sizeof(s32)); + const auto read_addr = return_buffer + target_read_offset * sizeof(s32); + bool read_safe{(read_addr & Core::Memory::YUZU_PAGEMASK) <= + (Core::Memory::YUZU_PAGESIZE - (read_addr + to_read * sizeof(s32)))}; + if (read_safe) [[likely]] { + auto ptr = memory.GetPointer(read_addr); + std::memcpy(&output[write_pos], ptr, to_read * sizeof(s32)); + } else { + memory.ReadBlockUnsafe(return_buffer + target_read_offset * sizeof(s32), + &output[write_pos], to_read * sizeof(s32)); } target_read_offset = (target_read_offset + to_read) % count_max; read_count -= to_read; @@ -168,10 +209,13 @@ static u32 ReadAuxBufferDsp(Core::Memory::Memory& memory, CpuAddr return_info_, } if (update_count) { - return_info.read_offset = (return_info.read_offset + update_count) % count_max; + return_ptr->read_offset = (return_ptr->read_offset + update_count) % count_max; + } + + if (!host_safe) [[unlikely]] { + memory.WriteBlockUnsafe(return_info_, return_ptr, sizeof(AuxInfo::AuxInfoDsp)); } - memory.WriteBlockUnsafe(return_info_, &return_info, sizeof(AuxInfo::AuxInfoDsp)); return read_count_; } diff --git a/src/common/page_table.cpp b/src/common/page_table.cpp index 01fcdc5c0..9c1fdcd4b 100755 --- a/src/common/page_table.cpp +++ b/src/common/page_table.cpp @@ -66,7 +66,6 @@ void PageTable::Resize(std::size_t address_space_width_in_bits, std::size_t page << (address_space_width_in_bits - page_size_in_bits)}; pointers.resize(num_page_table_entries); backing_addr.resize(num_page_table_entries); - blocks.resize(num_page_table_entries); current_address_space_width_in_bits = address_space_width_in_bits; page_size = 1ULL << page_size_in_bits; } diff --git a/src/common/page_table.h b/src/common/page_table.h index edf1e4dcc..6eaa28ba2 100755 --- a/src/common/page_table.h +++ b/src/common/page_table.h @@ -122,7 +122,6 @@ struct PageTable { * corresponding attribute element is of type `Memory`. */ VirtualBuffer pointers; - VirtualBuffer blocks; VirtualBuffer backing_addr; diff --git a/src/common/scratch_buffer.h b/src/common/scratch_buffer.h index 2a98cda53..d5961b020 100755 --- a/src/common/scratch_buffer.h +++ b/src/common/scratch_buffer.h @@ -40,21 +40,8 @@ public: ~ScratchBuffer() = default; ScratchBuffer(const ScratchBuffer&) = delete; ScratchBuffer& operator=(const ScratchBuffer&) = delete; - - ScratchBuffer(ScratchBuffer&& other) noexcept { - swap(other); - other.last_requested_size = 0; - other.buffer_capacity = 0; - other.buffer.reset(); - } - - ScratchBuffer& operator=(ScratchBuffer&& other) noexcept { - swap(other); - other.last_requested_size = 0; - other.buffer_capacity = 0; - other.buffer.reset(); - return *this; - } + ScratchBuffer(ScratchBuffer&&) = default; + ScratchBuffer& operator=(ScratchBuffer&&) = default; /// This will only grow the buffer's capacity if size is greater than the current capacity. /// The previously held data will remain intact. diff --git a/src/core/core_timing.cpp b/src/core/core_timing.cpp index 521fba414..1085d8dc5 100755 --- a/src/core/core_timing.cpp +++ b/src/core/core_timing.cpp @@ -70,7 +70,7 @@ void CoreTiming::Initialize(std::function&& on_thread_init_) { -> std::optional { return std::nullopt; }; ev_lost = CreateEvent("_lost_event", empty_timed_callback); if (is_multicore) { - timer_thread = std::make_unique(ThreadEntry, std::ref(*this)); + timer_thread = std::make_unique(ThreadEntry, std::ref(*this)); } } @@ -255,6 +255,7 @@ void CoreTiming::ThreadLoop() { #ifdef _WIN32 while (!paused && !event.IsSet() && wait_time > 0) { wait_time = *next_time - GetGlobalTimeNs().count(); + if (wait_time >= timer_resolution_ns) { Common::Windows::SleepForOneTick(); } else { diff --git a/src/core/core_timing.h b/src/core/core_timing.h index 8fb8257de..e5681637b 100755 --- a/src/core/core_timing.h +++ b/src/core/core_timing.h @@ -163,7 +163,7 @@ private: Common::Event pause_event{}; std::mutex basic_lock; std::mutex advance_lock; - std::unique_ptr timer_thread; + std::unique_ptr timer_thread; std::atomic paused{}; std::atomic paused_set{}; std::atomic wait_set{}; diff --git a/src/core/hle/service/hle_ipc.cpp b/src/core/hle/service/hle_ipc.cpp index f6a1e54f2..2290df705 100755 --- a/src/core/hle/service/hle_ipc.cpp +++ b/src/core/hle/service/hle_ipc.cpp @@ -329,22 +329,8 @@ std::vector HLERequestContext::ReadBufferCopy(std::size_t buffer_index) cons } std::span HLERequestContext::ReadBuffer(std::size_t buffer_index) const { - static thread_local std::array read_buffer_a{ - Core::Memory::CpuGuestMemory(memory, 0, 0), - Core::Memory::CpuGuestMemory(memory, 0, 0), - }; - static thread_local std::array read_buffer_data_a{ - Common::ScratchBuffer(), - Common::ScratchBuffer(), - }; - static thread_local std::array read_buffer_x{ - Core::Memory::CpuGuestMemory(memory, 0, 0), - Core::Memory::CpuGuestMemory(memory, 0, 0), - }; - static thread_local std::array read_buffer_data_x{ - Common::ScratchBuffer(), - Common::ScratchBuffer(), - }; + static thread_local std::array, 2> read_buffer_a; + static thread_local std::array, 2> read_buffer_x; const bool is_buffer_a{BufferDescriptorA().size() > buffer_index && BufferDescriptorA()[buffer_index].Size()}; @@ -353,17 +339,19 @@ std::span HLERequestContext::ReadBuffer(std::size_t buffer_index) cons BufferDescriptorA().size() > buffer_index, { return {}; }, "BufferDescriptorA invalid buffer_index {}", buffer_index); auto& read_buffer = read_buffer_a[buffer_index]; - return read_buffer.Read(BufferDescriptorA()[buffer_index].Address(), - BufferDescriptorA()[buffer_index].Size(), - &read_buffer_data_a[buffer_index]); + read_buffer.resize_destructive(BufferDescriptorA()[buffer_index].Size()); + memory.ReadBlock(BufferDescriptorA()[buffer_index].Address(), read_buffer.data(), + read_buffer.size()); + return read_buffer; } else { ASSERT_OR_EXECUTE_MSG( BufferDescriptorX().size() > buffer_index, { return {}; }, "BufferDescriptorX invalid buffer_index {}", buffer_index); auto& read_buffer = read_buffer_x[buffer_index]; - return read_buffer.Read(BufferDescriptorX()[buffer_index].Address(), - BufferDescriptorX()[buffer_index].Size(), - &read_buffer_data_x[buffer_index]); + read_buffer.resize_destructive(BufferDescriptorX()[buffer_index].Size()); + memory.ReadBlock(BufferDescriptorX()[buffer_index].Address(), read_buffer.data(), + read_buffer.size()); + return read_buffer; } } diff --git a/src/core/memory.cpp b/src/core/memory.cpp index 5bf92e9ce..b4390cd00 100755 --- a/src/core/memory.cpp +++ b/src/core/memory.cpp @@ -266,22 +266,6 @@ struct Memory::Impl { ReadBlockImpl(*system.ApplicationProcess(), src_addr, dest_buffer, size); } - const u8* GetSpan(const VAddr src_addr, const std::size_t size) const { - if (current_page_table->blocks[src_addr >> YUZU_PAGEBITS] == - current_page_table->blocks[(src_addr + size) >> YUZU_PAGEBITS]) { - return GetPointerSilent(src_addr); - } - return nullptr; - } - - u8* GetSpan(const VAddr src_addr, const std::size_t size) { - if (current_page_table->blocks[src_addr >> YUZU_PAGEBITS] == - current_page_table->blocks[(src_addr + size) >> YUZU_PAGEBITS]) { - return GetPointerSilent(src_addr); - } - return nullptr; - } - template void WriteBlockImpl(const Kernel::KProcess& process, const Common::ProcessAddress dest_addr, const void* src_buffer, const std::size_t size) { @@ -575,7 +559,7 @@ struct Memory::Impl { } } - const auto end = base + size; + const Common::ProcessAddress end = base + size; ASSERT_MSG(end <= page_table.pointers.size(), "out of range mapping at {:016X}", base + page_table.pointers.size()); @@ -586,18 +570,14 @@ struct Memory::Impl { while (base != end) { page_table.pointers[base].Store(nullptr, type); page_table.backing_addr[base] = 0; - page_table.blocks[base] = 0; + base += 1; } } else { - auto orig_base = base; while (base != end) { - auto host_ptr = - system.DeviceMemory().GetPointer(target) - (base << YUZU_PAGEBITS); - auto backing = GetInteger(target) - (base << YUZU_PAGEBITS); - page_table.pointers[base].Store(host_ptr, type); - page_table.backing_addr[base] = backing; - page_table.blocks[base] = orig_base << YUZU_PAGEBITS; + page_table.pointers[base].Store( + system.DeviceMemory().GetPointer(target) - (base << YUZU_PAGEBITS), type); + page_table.backing_addr[base] = GetInteger(target) - (base << YUZU_PAGEBITS); ASSERT_MSG(page_table.pointers[base].Pointer(), "memory mapping base yield a nullptr within the table"); @@ -767,14 +747,6 @@ struct Memory::Impl { VAddr last_address; }; - void InvalidateRegion(Common::ProcessAddress dest_addr, size_t size) { - system.GPU().InvalidateRegion(GetInteger(dest_addr), size); - } - - void FlushRegion(Common::ProcessAddress dest_addr, size_t size) { - system.GPU().FlushRegion(GetInteger(dest_addr), size); - } - Core::System& system; Common::PageTable* current_page_table = nullptr; std::array @@ -909,14 +881,6 @@ void Memory::ReadBlockUnsafe(const Common::ProcessAddress src_addr, void* dest_b impl->ReadBlockUnsafe(src_addr, dest_buffer, size); } -const u8* Memory::GetSpan(const VAddr src_addr, const std::size_t size) const { - return impl->GetSpan(src_addr, size); -} - -u8* Memory::GetSpan(const VAddr src_addr, const std::size_t size) { - return impl->GetSpan(src_addr, size); -} - void Memory::WriteBlock(const Common::ProcessAddress dest_addr, const void* src_buffer, const std::size_t size) { impl->WriteBlock(dest_addr, src_buffer, size); @@ -960,12 +924,4 @@ void Memory::MarkRegionDebug(Common::ProcessAddress vaddr, u64 size, bool debug) impl->MarkRegionDebug(GetInteger(vaddr), size, debug); } -void Memory::InvalidateRegion(Common::ProcessAddress dest_addr, size_t size) { - impl->InvalidateRegion(dest_addr, size); -} - -void Memory::FlushRegion(Common::ProcessAddress dest_addr, size_t size) { - impl->FlushRegion(dest_addr, size); -} - } // namespace Core::Memory diff --git a/src/core/memory.h b/src/core/memory.h index 183fed329..9558bda7c 100755 --- a/src/core/memory.h +++ b/src/core/memory.h @@ -5,12 +5,8 @@ #include #include -#include #include #include -#include - -#include "common/scratch_buffer.h" #include "common/typed_address.h" #include "core/hle/result.h" @@ -28,10 +24,6 @@ class PhysicalMemory; class KProcess; } // namespace Kernel -namespace Tegra { -class MemoryManager; -} - namespace Core::Memory { /** @@ -351,9 +343,6 @@ public: */ void ReadBlockUnsafe(Common::ProcessAddress src_addr, void* dest_buffer, std::size_t size); - const u8* GetSpan(const VAddr src_addr, const std::size_t size) const; - u8* GetSpan(const VAddr src_addr, const std::size_t size); - /** * Writes a range of bytes into the current process' address space at the specified * virtual address. @@ -472,8 +461,6 @@ public: void MarkRegionDebug(Common::ProcessAddress vaddr, u64 size, bool debug); void SetGPUDirtyManagers(std::span managers); - void InvalidateRegion(Common::ProcessAddress dest_addr, size_t size); - void FlushRegion(Common::ProcessAddress dest_addr, size_t size); private: Core::System& system; @@ -482,203 +469,4 @@ private: std::unique_ptr impl; }; -enum GuestMemoryFlags : u32 { - Read = 1 << 0, - Write = 1 << 1, - Safe = 1 << 2, - Cached = 1 << 3, - - SafeRead = Read | Safe, - SafeWrite = Write | Safe, - SafeReadWrite = SafeRead | SafeWrite, - SafeReadCachedWrite = SafeReadWrite | Cached, - - UnsafeRead = Read, - UnsafeWrite = Write, - UnsafeReadWrite = UnsafeRead | UnsafeWrite, - UnsafeReadCachedWrite = UnsafeReadWrite | Cached, -}; - -namespace { -template -class GuestMemory { - using iterator = T*; - using const_iterator = const T*; - using value_type = T; - using element_type = T; - using iterator_category = std::contiguous_iterator_tag; - -public: - GuestMemory() = delete; - explicit GuestMemory(M& memory_, u64 addr_, std::size_t size_, - Common::ScratchBuffer* backup = nullptr) - : memory{memory_}, addr{addr_}, size{size_} { - static_assert(FLAGS & GuestMemoryFlags::Read || FLAGS & GuestMemoryFlags::Write); - if constexpr (FLAGS & GuestMemoryFlags::Read) { - Read(addr, size, backup); - } - } - - ~GuestMemory() = default; - - T* data() noexcept { - return data_span.data(); - } - - const T* data() const noexcept { - return data_span.data(); - } - - [[nodiscard]] T* begin() noexcept { - return data(); - } - - [[nodiscard]] const T* begin() const noexcept { - return data(); - } - - [[nodiscard]] T* end() noexcept { - return data() + size; - } - - [[nodiscard]] const T* end() const noexcept { - return data() + size; - } - - T& operator[](size_t index) noexcept { - return data_span[index]; - } - - const T& operator[](size_t index) const noexcept { - return data_span[index]; - } - - void SetAddressAndSize(u64 addr_, std::size_t size_) noexcept { - addr = addr_; - size = size_; - addr_changed = true; - } - - std::span Read(u64 addr_, std::size_t size_, - Common::ScratchBuffer* backup = nullptr) noexcept { - addr = addr_; - size = size_; - if (size == 0) { - is_data_copy = true; - return {}; - } - - if (TrySetSpan()) { - if constexpr (FLAGS & GuestMemoryFlags::Safe) { - memory.FlushRegion(addr, size * sizeof(T)); - } - } else { - if (backup) { - backup->resize_destructive(size); - data_span = *backup; - } else { - data_copy.resize(size); - data_span = std::span(data_copy); - } - is_data_copy = true; - span_valid = true; - if constexpr (FLAGS & GuestMemoryFlags::Safe) { - memory.ReadBlock(addr, data_span.data(), size * sizeof(T)); - } else { - memory.ReadBlockUnsafe(addr, data_span.data(), size * sizeof(T)); - } - } - return data_span; - } - - void Write(std::span write_data) noexcept { - if constexpr (FLAGS & GuestMemoryFlags::Cached) { - memory.WriteBlockCached(addr, write_data.data(), size * sizeof(T)); - } else if constexpr (FLAGS & GuestMemoryFlags::Safe) { - memory.WriteBlock(addr, write_data.data(), size * sizeof(T)); - } else { - memory.WriteBlockUnsafe(addr, write_data.data(), size * sizeof(T)); - } - } - - bool TrySetSpan() noexcept { - if (u8* ptr = memory.GetSpan(addr, size * sizeof(T)); ptr) { - data_span = {reinterpret_cast(ptr), size}; - span_valid = true; - return true; - } - return false; - } - -protected: - bool IsDataCopy() const noexcept { - return is_data_copy; - } - - bool AddressChanged() const noexcept { - return addr_changed; - } - - M& memory; - u64 addr; - size_t size; - std::span data_span{}; - std::vector data_copy; - bool span_valid{false}; - bool is_data_copy{false}; - bool addr_changed{false}; -}; - -template -class GuestMemoryScoped : public GuestMemory { -public: - GuestMemoryScoped() = delete; - explicit GuestMemoryScoped(M& memory_, u64 addr_, std::size_t size_, - Common::ScratchBuffer* backup = nullptr) - : GuestMemory(memory_, addr_, size_, backup) { - if constexpr (!(FLAGS & GuestMemoryFlags::Read)) { - if (!this->TrySetSpan()) { - if (backup) { - this->data_span = *backup; - this->span_valid = true; - this->is_data_copy = true; - } - } - } - } - - ~GuestMemoryScoped() { - if constexpr (FLAGS & GuestMemoryFlags::Write) { - if (this->size == 0) [[unlikely]] { - return; - } - - if (this->AddressChanged() || this->IsDataCopy()) { - ASSERT(this->span_valid); - if constexpr (FLAGS & GuestMemoryFlags::Cached) { - this->memory.WriteBlockCached(this->addr, this->data_span.data(), - this->size * sizeof(T)); - } else if constexpr (FLAGS & GuestMemoryFlags::Safe) { - this->memory.WriteBlock(this->addr, this->data_span.data(), - this->size * sizeof(T)); - } else { - this->memory.WriteBlockUnsafe(this->addr, this->data_span.data(), - this->size * sizeof(T)); - } - } else if constexpr (FLAGS & GuestMemoryFlags::Safe) { - this->memory.InvalidateRegion(this->addr, this->size * sizeof(T)); - } - } - } -}; -} // namespace - -template -using CpuGuestMemory = GuestMemory; -template -using CpuGuestMemoryScoped = GuestMemoryScoped; -template -using GpuGuestMemory = GuestMemory; -template -using GpuGuestMemoryScoped = GuestMemoryScoped; } // namespace Core::Memory diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index c2a1e7d82..7636c74b6 100755 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -234,10 +234,9 @@ bool BufferCache

::DMACopy(GPUVAddr src_address, GPUVAddr dest_address, u64 am if (has_new_downloads) { memory_tracker.MarkRegionAsGpuModified(*cpu_dest_address, amount); } - - Core::Memory::CpuGuestMemoryScoped tmp( - cpu_memory, *cpu_src_address, amount, &tmp_buffer); - tmp.SetAddressAndSize(*cpu_dest_address, amount); + tmp_buffer.resize_destructive(amount); + cpu_memory.ReadBlockUnsafe(*cpu_src_address, tmp_buffer.data(), amount); + cpu_memory.WriteBlockUnsafe(*cpu_dest_address, tmp_buffer.data(), amount); return true; } diff --git a/src/video_core/dma_pusher.cpp b/src/video_core/dma_pusher.cpp index 51f836fd9..a619dca76 100755 --- a/src/video_core/dma_pusher.cpp +++ b/src/video_core/dma_pusher.cpp @@ -5,7 +5,6 @@ #include "common/microprofile.h" #include "common/settings.h" #include "core/core.h" -#include "core/memory.h" #include "video_core/dma_pusher.h" #include "video_core/engines/maxwell_3d.h" #include "video_core/gpu.h" @@ -13,8 +12,6 @@ namespace Tegra { -constexpr u32 MacroRegistersStart = 0xE00; - DmaPusher::DmaPusher(Core::System& system_, GPU& gpu_, MemoryManager& memory_manager_, Control::ChannelState& channel_state_) : gpu{gpu_}, system{system_}, memory_manager{memory_manager_}, puller{gpu_, memory_manager_, @@ -77,16 +74,25 @@ bool DmaPusher::Step() { } // Push buffer non-empty, read a word - if (dma_state.method >= MacroRegistersStart) { - if (subchannels[dma_state.subchannel]) { - subchannels[dma_state.subchannel]->current_dirty = memory_manager.IsMemoryDirty( - dma_state.dma_get, command_list_header.size * sizeof(u32)); + command_headers.resize_destructive(command_list_header.size); + constexpr u32 MacroRegistersStart = 0xE00; + if (dma_state.method < MacroRegistersStart) { + if (Settings::IsGPULevelHigh()) { + memory_manager.ReadBlock(dma_state.dma_get, command_headers.data(), + command_list_header.size * sizeof(u32)); + } else { + memory_manager.ReadBlockUnsafe(dma_state.dma_get, command_headers.data(), + command_list_header.size * sizeof(u32)); } + } else { + const size_t copy_size = command_list_header.size * sizeof(u32); + if (subchannels[dma_state.subchannel]) { + subchannels[dma_state.subchannel]->current_dirty = + memory_manager.IsMemoryDirty(dma_state.dma_get, copy_size); + } + memory_manager.ReadBlockUnsafe(dma_state.dma_get, command_headers.data(), copy_size); } - Core::Memory::GpuGuestMemory - headers(memory_manager, dma_state.dma_get, command_list_header.size, &command_headers); - ProcessCommands(headers); + ProcessCommands(command_headers); } return true; diff --git a/src/video_core/engines/engine_upload.cpp b/src/video_core/engines/engine_upload.cpp index 9ed7e7327..545df54c4 100755 --- a/src/video_core/engines/engine_upload.cpp +++ b/src/video_core/engines/engine_upload.cpp @@ -5,7 +5,6 @@ #include "common/algorithm.h" #include "common/assert.h" -#include "core/memory.h" #include "video_core/engines/engine_upload.h" #include "video_core/memory_manager.h" #include "video_core/rasterizer_interface.h" @@ -47,11 +46,15 @@ void State::ProcessData(const u32* data, size_t num_data) { void State::ProcessData(std::span read_buffer) { const GPUVAddr address{regs.dest.Address()}; if (is_linear) { - for (size_t line = 0; line < regs.line_count; ++line) { - const GPUVAddr dest_line = address + line * regs.dest.pitch; - std::span buffer(read_buffer.data() + line * regs.line_length_in, - regs.line_length_in); - rasterizer->AccelerateInlineToMemory(dest_line, regs.line_length_in, buffer); + if (regs.line_count == 1) { + rasterizer->AccelerateInlineToMemory(address, copy_size, read_buffer); + } else { + for (size_t line = 0; line < regs.line_count; ++line) { + const GPUVAddr dest_line = address + line * regs.dest.pitch; + std::span buffer(read_buffer.data() + line * regs.line_length_in, + regs.line_length_in); + rasterizer->AccelerateInlineToMemory(dest_line, regs.line_length_in, buffer); + } } } else { u32 width = regs.dest.width; @@ -67,14 +70,13 @@ void State::ProcessData(std::span read_buffer) { const std::size_t dst_size = Tegra::Texture::CalculateSize( true, bytes_per_pixel, width, regs.dest.height, regs.dest.depth, regs.dest.BlockHeight(), regs.dest.BlockDepth()); - - Core::Memory::GpuGuestMemoryScoped - tmp(memory_manager, address, dst_size, &tmp_buffer); - - Tegra::Texture::SwizzleSubrect(tmp, read_buffer, bytes_per_pixel, width, regs.dest.height, - regs.dest.depth, x_offset, regs.dest.y, x_elements, - regs.line_count, regs.dest.BlockHeight(), + tmp_buffer.resize_destructive(dst_size); + memory_manager.ReadBlock(address, tmp_buffer.data(), dst_size); + Tegra::Texture::SwizzleSubrect(tmp_buffer, read_buffer, bytes_per_pixel, width, + regs.dest.height, regs.dest.depth, x_offset, regs.dest.y, + x_elements, regs.line_count, regs.dest.BlockHeight(), regs.dest.BlockDepth(), regs.line_length_in); + memory_manager.WriteBlockCached(address, tmp_buffer.data(), dst_size); } } diff --git a/src/video_core/engines/kepler_compute.cpp b/src/video_core/engines/kepler_compute.cpp index e1de1042c..7735ef1ea 100755 --- a/src/video_core/engines/kepler_compute.cpp +++ b/src/video_core/engines/kepler_compute.cpp @@ -84,6 +84,7 @@ Texture::TICEntry KeplerCompute::GetTICEntry(u32 tic_index) const { Texture::TICEntry tic_entry; memory_manager.ReadBlockUnsafe(tic_address_gpu, &tic_entry, sizeof(Texture::TICEntry)); + return tic_entry; } diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index 0a0d1a3b0..3152f9aa2 100755 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp @@ -9,7 +9,6 @@ #include "common/settings.h" #include "core/core.h" #include "core/core_timing.h" -#include "core/memory.h" #include "video_core/dirty_flags.h" #include "video_core/engines/draw_manager.h" #include "video_core/engines/maxwell_3d.h" @@ -680,14 +679,17 @@ void Maxwell3D::ProcessCBData(u32 value) { Texture::TICEntry Maxwell3D::GetTICEntry(u32 tic_index) const { const GPUVAddr tic_address_gpu{regs.tex_header.Address() + tic_index * sizeof(Texture::TICEntry)}; + Texture::TICEntry tic_entry; memory_manager.ReadBlockUnsafe(tic_address_gpu, &tic_entry, sizeof(Texture::TICEntry)); + return tic_entry; } Texture::TSCEntry Maxwell3D::GetTSCEntry(u32 tsc_index) const { const GPUVAddr tsc_address_gpu{regs.tex_sampler.Address() + tsc_index * sizeof(Texture::TSCEntry)}; + Texture::TSCEntry tsc_entry; memory_manager.ReadBlockUnsafe(tsc_address_gpu, &tsc_entry, sizeof(Texture::TSCEntry)); return tsc_entry; diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp index 0cc78f614..9cdff0cba 100755 --- a/src/video_core/engines/maxwell_dma.cpp +++ b/src/video_core/engines/maxwell_dma.cpp @@ -7,7 +7,6 @@ #include "common/microprofile.h" #include "common/settings.h" #include "core/core.h" -#include "core/memory.h" #include "video_core/engines/maxwell_3d.h" #include "video_core/engines/maxwell_dma.h" #include "video_core/memory_manager.h" @@ -131,12 +130,11 @@ void MaxwellDMA::Launch() { UNIMPLEMENTED_IF(regs.offset_out % 16 != 0); read_buffer.resize_destructive(16); for (u32 offset = 0; offset < regs.line_length_in; offset += 16) { - Core::Memory::GpuGuestMemoryScoped< - u8, Core::Memory::GuestMemoryFlags::SafeReadCachedWrite> - tmp_write_buffer(memory_manager, - convert_linear_2_blocklinear_addr(regs.offset_in + offset), - 16, &read_buffer); - tmp_write_buffer.SetAddressAndSize(regs.offset_out + offset, 16); + memory_manager.ReadBlock( + convert_linear_2_blocklinear_addr(regs.offset_in + offset), + read_buffer.data(), read_buffer.size()); + memory_manager.WriteBlockCached(regs.offset_out + offset, read_buffer.data(), + read_buffer.size()); } } else if (is_src_pitch && !is_dst_pitch) { UNIMPLEMENTED_IF(regs.line_length_in % 16 != 0); @@ -144,19 +142,20 @@ void MaxwellDMA::Launch() { UNIMPLEMENTED_IF(regs.offset_out % 16 != 0); read_buffer.resize_destructive(16); for (u32 offset = 0; offset < regs.line_length_in; offset += 16) { - Core::Memory::GpuGuestMemoryScoped< - u8, Core::Memory::GuestMemoryFlags::SafeReadCachedWrite> - tmp_write_buffer(memory_manager, regs.offset_in + offset, 16, &read_buffer); - tmp_write_buffer.SetAddressAndSize( - convert_linear_2_blocklinear_addr(regs.offset_out + offset), 16); + memory_manager.ReadBlock(regs.offset_in + offset, read_buffer.data(), + read_buffer.size()); + memory_manager.WriteBlockCached( + convert_linear_2_blocklinear_addr(regs.offset_out + offset), + read_buffer.data(), read_buffer.size()); } } else { if (!accelerate.BufferCopy(regs.offset_in, regs.offset_out, regs.line_length_in)) { - Core::Memory::GpuGuestMemoryScoped< - u8, Core::Memory::GuestMemoryFlags::SafeReadCachedWrite> - tmp_write_buffer(memory_manager, regs.offset_in, regs.line_length_in, - &read_buffer); - tmp_write_buffer.SetAddressAndSize(regs.offset_out, regs.line_length_in); + read_buffer.resize_destructive(regs.line_length_in); + memory_manager.ReadBlock(regs.offset_in, read_buffer.data(), + regs.line_length_in, + VideoCommon::CacheType::NoBufferCache); + memory_manager.WriteBlockCached(regs.offset_out, read_buffer.data(), + regs.line_length_in); } } } @@ -223,15 +222,17 @@ void MaxwellDMA::CopyBlockLinearToPitch() { CalculateSize(true, bytes_per_pixel, width, height, depth, block_height, block_depth); const size_t dst_size = dst_operand.pitch * regs.line_count; + read_buffer.resize_destructive(src_size); + write_buffer.resize_destructive(dst_size); - Core::Memory::GpuGuestMemory tmp_read_buffer( - memory_manager, src_operand.address, src_size, &read_buffer); - Core::Memory::GpuGuestMemoryScoped - tmp_write_buffer(memory_manager, dst_operand.address, dst_size, &write_buffer); + memory_manager.ReadBlock(src_operand.address, read_buffer.data(), src_size); + memory_manager.ReadBlock(dst_operand.address, write_buffer.data(), dst_size); - UnswizzleSubrect(tmp_write_buffer, tmp_read_buffer, bytes_per_pixel, width, height, depth, - x_offset, src_params.origin.y, x_elements, regs.line_count, block_height, - block_depth, dst_operand.pitch); + UnswizzleSubrect(write_buffer, read_buffer, bytes_per_pixel, width, height, depth, x_offset, + src_params.origin.y, x_elements, regs.line_count, block_height, block_depth, + dst_operand.pitch); + + memory_manager.WriteBlockCached(regs.offset_out, write_buffer.data(), dst_size); } void MaxwellDMA::CopyPitchToBlockLinear() { @@ -286,17 +287,18 @@ void MaxwellDMA::CopyPitchToBlockLinear() { CalculateSize(true, bytes_per_pixel, width, height, depth, block_height, block_depth); const size_t src_size = static_cast(regs.pitch_in) * regs.line_count; - GPUVAddr src_addr = regs.offset_in; - GPUVAddr dst_addr = regs.offset_out; - Core::Memory::GpuGuestMemory tmp_read_buffer( - memory_manager, src_addr, src_size, &read_buffer); - Core::Memory::GpuGuestMemoryScoped - tmp_write_buffer(memory_manager, dst_addr, dst_size, &write_buffer); + read_buffer.resize_destructive(src_size); + write_buffer.resize_destructive(dst_size); - // If the input is linear and the output is tiled, swizzle the input and copy it over. - SwizzleSubrect(tmp_write_buffer, tmp_read_buffer, bytes_per_pixel, width, height, depth, - x_offset, dst_params.origin.y, x_elements, regs.line_count, block_height, - block_depth, regs.pitch_in); + memory_manager.ReadBlock(regs.offset_in, read_buffer.data(), src_size); + memory_manager.ReadBlockUnsafe(regs.offset_out, write_buffer.data(), dst_size); + + // If the input is linear and the output is tiled, swizzle the input and copy it over. + SwizzleSubrect(write_buffer, read_buffer, bytes_per_pixel, width, height, depth, x_offset, + dst_params.origin.y, x_elements, regs.line_count, block_height, block_depth, + regs.pitch_in); + + memory_manager.WriteBlockCached(regs.offset_out, write_buffer.data(), dst_size); } void MaxwellDMA::CopyBlockLinearToBlockLinear() { @@ -340,20 +342,23 @@ void MaxwellDMA::CopyBlockLinearToBlockLinear() { const u32 pitch = x_elements * bytes_per_pixel; const size_t mid_buffer_size = pitch * regs.line_count; + read_buffer.resize_destructive(src_size); + write_buffer.resize_destructive(dst_size); + intermediate_buffer.resize_destructive(mid_buffer_size); - Core::Memory::GpuGuestMemory tmp_read_buffer( - memory_manager, regs.offset_in, src_size, &read_buffer); - Core::Memory::GpuGuestMemoryScoped - tmp_write_buffer(memory_manager, regs.offset_out, dst_size, &write_buffer); + memory_manager.ReadBlock(regs.offset_in, read_buffer.data(), src_size); + memory_manager.ReadBlock(regs.offset_out, write_buffer.data(), dst_size); - UnswizzleSubrect(intermediate_buffer, tmp_read_buffer, bytes_per_pixel, src_width, src.height, + UnswizzleSubrect(intermediate_buffer, read_buffer, bytes_per_pixel, src_width, src.height, src.depth, src_x_offset, src.origin.y, x_elements, regs.line_count, src.block_size.height, src.block_size.depth, pitch); - SwizzleSubrect(tmp_write_buffer, intermediate_buffer, bytes_per_pixel, dst_width, dst.height, + SwizzleSubrect(write_buffer, intermediate_buffer, bytes_per_pixel, dst_width, dst.height, dst.depth, dst_x_offset, dst.origin.y, x_elements, regs.line_count, dst.block_size.height, dst.block_size.depth, pitch); + + memory_manager.WriteBlockCached(regs.offset_out, write_buffer.data(), dst_size); } void MaxwellDMA::ReleaseSemaphore() { diff --git a/src/video_core/engines/sw_blitter/blitter.cpp b/src/video_core/engines/sw_blitter/blitter.cpp index 3a599f466..ff88cd03d 100755 --- a/src/video_core/engines/sw_blitter/blitter.cpp +++ b/src/video_core/engines/sw_blitter/blitter.cpp @@ -159,11 +159,11 @@ bool SoftwareBlitEngine::Blit(Fermi2D::Surface& src, Fermi2D::Surface& dst, const auto src_bytes_per_pixel = BytesPerBlock(PixelFormatFromRenderTargetFormat(src.format)); const auto dst_bytes_per_pixel = BytesPerBlock(PixelFormatFromRenderTargetFormat(dst.format)); const size_t src_size = get_surface_size(src, src_bytes_per_pixel); - - Core::Memory::GpuGuestMemory tmp_buffer( - memory_manager, src.Address(), src_size, &impl->tmp_buffer); + impl->tmp_buffer.resize_destructive(src_size); + memory_manager.ReadBlock(src.Address(), impl->tmp_buffer.data(), src_size); const size_t src_copy_size = src_extent_x * src_extent_y * src_bytes_per_pixel; + const size_t dst_copy_size = dst_extent_x * dst_extent_y * dst_bytes_per_pixel; impl->src_buffer.resize_destructive(src_copy_size); @@ -200,11 +200,12 @@ bool SoftwareBlitEngine::Blit(Fermi2D::Surface& src, Fermi2D::Surface& dst, impl->dst_buffer.resize_destructive(dst_copy_size); if (src.linear == Fermi2D::MemoryLayout::BlockLinear) { - UnswizzleSubrect(impl->src_buffer, tmp_buffer, src_bytes_per_pixel, src.width, src.height, - src.depth, config.src_x0, config.src_y0, src_extent_x, src_extent_y, - src.block_height, src.block_depth, src_extent_x * src_bytes_per_pixel); + UnswizzleSubrect(impl->src_buffer, impl->tmp_buffer, src_bytes_per_pixel, src.width, + src.height, src.depth, config.src_x0, config.src_y0, src_extent_x, + src_extent_y, src.block_height, src.block_depth, + src_extent_x * src_bytes_per_pixel); } else { - process_pitch_linear(false, tmp_buffer, impl->src_buffer, src_extent_x, src_extent_y, + process_pitch_linear(false, impl->tmp_buffer, impl->src_buffer, src_extent_x, src_extent_y, src.pitch, config.src_x0, config.src_y0, src_bytes_per_pixel); } @@ -220,18 +221,20 @@ bool SoftwareBlitEngine::Blit(Fermi2D::Surface& src, Fermi2D::Surface& dst, } const size_t dst_size = get_surface_size(dst, dst_bytes_per_pixel); - Core::Memory::GpuGuestMemoryScoped - tmp_buffer2(memory_manager, dst.Address(), dst_size, &impl->tmp_buffer); + impl->tmp_buffer.resize_destructive(dst_size); + memory_manager.ReadBlock(dst.Address(), impl->tmp_buffer.data(), dst_size); if (dst.linear == Fermi2D::MemoryLayout::BlockLinear) { - SwizzleSubrect(tmp_buffer2, impl->dst_buffer, dst_bytes_per_pixel, dst.width, dst.height, - dst.depth, config.dst_x0, config.dst_y0, dst_extent_x, dst_extent_y, - dst.block_height, dst.block_depth, dst_extent_x * dst_bytes_per_pixel); + SwizzleSubrect(impl->tmp_buffer, impl->dst_buffer, dst_bytes_per_pixel, dst.width, + dst.height, dst.depth, config.dst_x0, config.dst_y0, dst_extent_x, + dst_extent_y, dst.block_height, dst.block_depth, + dst_extent_x * dst_bytes_per_pixel); } else { - process_pitch_linear(true, impl->dst_buffer, tmp_buffer2, dst_extent_x, dst_extent_y, + process_pitch_linear(true, impl->dst_buffer, impl->tmp_buffer, dst_extent_x, dst_extent_y, dst.pitch, config.dst_x0, config.dst_y0, static_cast(dst_bytes_per_pixel)); } + memory_manager.WriteBlock(dst.Address(), impl->tmp_buffer.data(), dst_size); return true; } diff --git a/src/video_core/memory_manager.cpp b/src/video_core/memory_manager.cpp index 0cce535ca..064714b9b 100755 --- a/src/video_core/memory_manager.cpp +++ b/src/video_core/memory_manager.cpp @@ -10,13 +10,13 @@ #include "core/device_memory.h" #include "core/hle/kernel/k_page_table.h" #include "core/hle/kernel/k_process.h" +#include "core/memory.h" #include "video_core/invalidation_accumulator.h" #include "video_core/memory_manager.h" #include "video_core/rasterizer_interface.h" #include "video_core/renderer_base.h" namespace Tegra { -using Core::Memory::GuestMemoryFlags; std::atomic MemoryManager::unique_identifier_generator{}; @@ -587,10 +587,13 @@ void MemoryManager::InvalidateRegion(GPUVAddr gpu_addr, size_t size, void MemoryManager::CopyBlock(GPUVAddr gpu_dest_addr, GPUVAddr gpu_src_addr, std::size_t size, VideoCommon::CacheType which) { - Core::Memory::GpuGuestMemoryScoped data( - *this, gpu_src_addr, size); - data.SetAddressAndSize(gpu_dest_addr, size); + tmp_buffer.resize_destructive(size); + ReadBlock(gpu_src_addr, tmp_buffer.data(), size, which); + + // The output block must be flushed in case it has data modified from the GPU. + // Fixes NPC geometry in Zombie Panic in Wonderland DX FlushRegion(gpu_dest_addr, size, which); + WriteBlock(gpu_dest_addr, tmp_buffer.data(), size, which); } bool MemoryManager::IsGranularRange(GPUVAddr gpu_addr, std::size_t size) const { @@ -755,23 +758,4 @@ void MemoryManager::FlushCaching() { accumulator->Clear(); } -const u8* MemoryManager::GetSpan(const GPUVAddr src_addr, const std::size_t size) const { - auto cpu_addr = GpuToCpuAddress(src_addr); - if (cpu_addr) { - return memory.GetSpan(*cpu_addr, size); - } - return nullptr; -} - -u8* MemoryManager::GetSpan(const GPUVAddr src_addr, const std::size_t size) { - if (!IsContinuousRange(src_addr, size)) { - return nullptr; - } - auto cpu_addr = GpuToCpuAddress(src_addr); - if (cpu_addr) { - return memory.GetSpan(*cpu_addr, size); - } - return nullptr; -} - } // namespace Tegra diff --git a/src/video_core/memory_manager.h b/src/video_core/memory_manager.h index cfa9f3878..51831570f 100755 --- a/src/video_core/memory_manager.h +++ b/src/video_core/memory_manager.h @@ -15,7 +15,6 @@ #include "common/range_map.h" #include "common/scratch_buffer.h" #include "common/virtual_buffer.h" -#include "core/memory.h" #include "video_core/cache_types.h" #include "video_core/pte_kind.h" @@ -63,20 +62,6 @@ public: [[nodiscard]] u8* GetPointer(GPUVAddr addr); [[nodiscard]] const u8* GetPointer(GPUVAddr addr) const; - template - [[nodiscard]] T* GetPointer(GPUVAddr addr) { - const auto address{GpuToCpuAddress(addr)}; - if (!address) { - return {}; - } - return memory.GetPointer(*address); - } - - template - [[nodiscard]] const T* GetPointer(GPUVAddr addr) const { - return GetPointer(addr); - } - /** * ReadBlock and WriteBlock are full read and write operations over virtual * GPU Memory. It's important to use these when GPU memory may not be continuous @@ -154,9 +139,6 @@ public: void FlushCaching(); - const u8* GetSpan(const GPUVAddr src_addr, const std::size_t size) const; - u8* GetSpan(const GPUVAddr src_addr, const std::size_t size); - private: template inline void MemoryOperation(GPUVAddr gpu_src_addr, std::size_t size, FuncMapped&& func_mapped, diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 57b813921..b28245d6a 100755 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -8,7 +8,6 @@ #include "common/alignment.h" #include "common/settings.h" -#include "core/memory.h" #include "video_core/control/channel_state.h" #include "video_core/dirty_flags.h" #include "video_core/engines/kepler_compute.h" @@ -1027,19 +1026,19 @@ void TextureCache

::UploadImageContents(Image& image, StagingBuffer& staging) runtime.AccelerateImageUpload(image, staging, uploads); return; } - - Core::Memory::GpuGuestMemory swizzle_data( - *gpu_memory, gpu_addr, image.guest_size_bytes, &swizzle_data_buffer); + const size_t guest_size_bytes = image.guest_size_bytes; + swizzle_data_buffer.resize_destructive(guest_size_bytes); + gpu_memory->ReadBlockUnsafe(gpu_addr, swizzle_data_buffer.data(), guest_size_bytes); if (True(image.flags & ImageFlagBits::Converted)) { unswizzle_data_buffer.resize_destructive(image.unswizzled_size_bytes); - auto copies = - UnswizzleImage(*gpu_memory, gpu_addr, image.info, swizzle_data, unswizzle_data_buffer); + auto copies = UnswizzleImage(*gpu_memory, gpu_addr, image.info, swizzle_data_buffer, + unswizzle_data_buffer); ConvertImage(unswizzle_data_buffer, image.info, mapped_span, copies); image.UploadMemory(staging, copies); } else { const auto copies = - UnswizzleImage(*gpu_memory, gpu_addr, image.info, swizzle_data, mapped_span); + UnswizzleImage(*gpu_memory, gpu_addr, image.info, swizzle_data_buffer, mapped_span); image.UploadMemory(staging, copies); } } @@ -1232,12 +1231,11 @@ void TextureCache

::QueueAsyncDecode(Image& image, ImageId image_id) { decode->image_id = image_id; async_decodes.push_back(std::move(decode)); - static Common::ScratchBuffer local_unswizzle_data_buffer; - local_unswizzle_data_buffer.resize_destructive(image.unswizzled_size_bytes); - Core::Memory::GpuGuestMemory swizzle_data( - *gpu_memory, image.gpu_addr, image.guest_size_bytes, &swizzle_data_buffer); - - auto copies = UnswizzleImage(*gpu_memory, image.gpu_addr, image.info, swizzle_data, + Common::ScratchBuffer local_unswizzle_data_buffer(image.unswizzled_size_bytes); + const size_t guest_size_bytes = image.guest_size_bytes; + swizzle_data_buffer.resize_destructive(guest_size_bytes); + gpu_memory->ReadBlockUnsafe(image.gpu_addr, swizzle_data_buffer.data(), guest_size_bytes); + auto copies = UnswizzleImage(*gpu_memory, image.gpu_addr, image.info, swizzle_data_buffer, local_unswizzle_data_buffer); const size_t out_size = MapSizeBytes(image); diff --git a/src/video_core/texture_cache/util.cpp b/src/video_core/texture_cache/util.cpp index 45daeee97..d230a38a2 100755 --- a/src/video_core/texture_cache/util.cpp +++ b/src/video_core/texture_cache/util.cpp @@ -20,7 +20,6 @@ #include "common/div_ceil.h" #include "common/scratch_buffer.h" #include "common/settings.h" -#include "core/memory.h" #include "video_core/compatible_formats.h" #include "video_core/engines/maxwell_3d.h" #include "video_core/memory_manager.h" @@ -545,15 +544,17 @@ void SwizzleBlockLinearImage(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr tile_size.height, info.tile_width_spacing); const size_t subresource_size = sizes[level]; + tmp_buffer.resize_destructive(subresource_size); + const std::span dst(tmp_buffer); + for (s32 layer = 0; layer < info.resources.layers; ++layer) { const std::span src = input.subspan(host_offset); - { - Core::Memory::GpuGuestMemoryScoped - dst(gpu_memory, gpu_addr + guest_offset, subresource_size, &tmp_buffer); + gpu_memory.ReadBlockUnsafe(gpu_addr + guest_offset, dst.data(), dst.size_bytes()); - SwizzleTexture(dst, src, bytes_per_block, num_tiles.width, num_tiles.height, - num_tiles.depth, block.height, block.depth); - } + SwizzleTexture(dst, src, bytes_per_block, num_tiles.width, num_tiles.height, + num_tiles.depth, block.height, block.depth); + + gpu_memory.WriteBlockUnsafe(gpu_addr + guest_offset, dst.data(), dst.size_bytes()); host_offset += host_bytes_per_layer; guest_offset += layer_stride; @@ -836,7 +837,6 @@ boost::container::small_vector UnswizzleImage(Tegra::Memory const Extent3D size = info.size; if (info.type == ImageType::Linear) { - ASSERT(output.size_bytes() >= guest_size_bytes); gpu_memory.ReadBlockUnsafe(gpu_addr, output.data(), guest_size_bytes); ASSERT((info.pitch >> bpp_log2) << bpp_log2 == info.pitch); @@ -904,6 +904,16 @@ boost::container::small_vector UnswizzleImage(Tegra::Memory return copies; } +BufferCopy UploadBufferCopy(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr, + const ImageBase& image, std::span output) { + gpu_memory.ReadBlockUnsafe(gpu_addr, output.data(), image.guest_size_bytes); + return BufferCopy{ + .src_offset = 0, + .dst_offset = 0, + .size = image.guest_size_bytes, + }; +} + void ConvertImage(std::span input, const ImageInfo& info, std::span output, std::span copies) { u32 output_offset = 0; diff --git a/src/video_core/texture_cache/util.h b/src/video_core/texture_cache/util.h index a0332387f..a7315196c 100755 --- a/src/video_core/texture_cache/util.h +++ b/src/video_core/texture_cache/util.h @@ -66,6 +66,9 @@ struct OverlapResult { Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr, const ImageInfo& info, std::span input, std::span output); +[[nodiscard]] BufferCopy UploadBufferCopy(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr, + const ImageBase& image, std::span output); + void ConvertImage(std::span input, const ImageInfo& info, std::span output, std::span copies);