early-access version 3743

This commit is contained in:
pineappleEA 2023-07-06 09:05:01 +02:00
parent 0334ddc2e5
commit 4ba24324d2
24 changed files with 237 additions and 479 deletions

View file

@ -1,7 +1,7 @@
yuzu emulator early access yuzu emulator early access
============= =============
This is the source code for early-access 3742. This is the source code for early-access 3743.
## Legal Notice ## Legal Notice

View file

@ -92,9 +92,9 @@ void DeviceSession::AppendBuffers(std::span<const AudioBuffer> buffers) {
if (type == Sink::StreamType::In) { if (type == Sink::StreamType::In) {
stream->AppendBuffer(new_buffer, tmp_samples); stream->AppendBuffer(new_buffer, tmp_samples);
} else { } else {
Core::Memory::CpuGuestMemory<s16, Core::Memory::GuestMemoryFlags::UnsafeRead> samples( system.ApplicationMemory().ReadBlockUnsafe(buffer.samples, tmp_samples.data(),
system.ApplicationMemory(), buffer.samples, buffer.size / sizeof(s16)); buffer.size);
stream->AppendBuffer(new_buffer, samples); stream->AppendBuffer(new_buffer, tmp_samples);
} }
} }
} }

View file

@ -28,6 +28,7 @@ constexpr std::array<u8, 3> PitchBySrcQuality = {4, 8, 4};
template <typename T> template <typename T>
static u32 DecodePcm(Core::Memory::Memory& memory, std::span<s16> out_buffer, static u32 DecodePcm(Core::Memory::Memory& memory, std::span<s16> out_buffer,
const DecodeArg& req) { const DecodeArg& req) {
std::array<T, TempBufferSize> tmp_samples{};
constexpr s32 min{std::numeric_limits<s16>::min()}; constexpr s32 min{std::numeric_limits<s16>::min()};
constexpr s32 max{std::numeric_limits<s16>::max()}; constexpr s32 max{std::numeric_limits<s16>::max()};
@ -48,18 +49,19 @@ static u32 DecodePcm(Core::Memory::Memory& memory, std::span<s16> out_buffer,
const VAddr source{req.buffer + const VAddr source{req.buffer +
(((req.start_offset + req.offset) * channel_count) * sizeof(T))}; (((req.start_offset + req.offset) * channel_count) * sizeof(T))};
const u64 size{channel_count * samples_to_decode}; const u64 size{channel_count * samples_to_decode};
const u64 size_bytes{size * sizeof(T)};
memory.ReadBlockUnsafe(source, tmp_samples.data(), size_bytes);
Core::Memory::CpuGuestMemory<T, Core::Memory::GuestMemoryFlags::UnsafeRead> samples(
memory, source, size);
if constexpr (std::is_floating_point_v<T>) { if constexpr (std::is_floating_point_v<T>) {
for (u32 i = 0; i < samples_to_decode; i++) { for (u32 i = 0; i < samples_to_decode; i++) {
auto sample{static_cast<s32>(samples[i * channel_count + req.target_channel] * auto sample{static_cast<s32>(tmp_samples[i * channel_count + req.target_channel] *
std::numeric_limits<s16>::max())}; std::numeric_limits<s16>::max())};
out_buffer[i] = static_cast<s16>(std::clamp(sample, min, max)); out_buffer[i] = static_cast<s16>(std::clamp(sample, min, max));
} }
} else { } else {
for (u32 i = 0; i < samples_to_decode; i++) { for (u32 i = 0; i < samples_to_decode; i++) {
out_buffer[i] = samples[i * channel_count + req.target_channel]; out_buffer[i] = tmp_samples[i * channel_count + req.target_channel];
} }
} }
} break; } break;
@ -72,17 +74,16 @@ static u32 DecodePcm(Core::Memory::Memory& memory, std::span<s16> out_buffer,
} }
const VAddr source{req.buffer + ((req.start_offset + req.offset) * sizeof(T))}; const VAddr source{req.buffer + ((req.start_offset + req.offset) * sizeof(T))};
Core::Memory::CpuGuestMemory<T, Core::Memory::GuestMemoryFlags::UnsafeRead> samples( memory.ReadBlockUnsafe(source, tmp_samples.data(), samples_to_decode * sizeof(T));
memory, source, samples_to_decode);
if constexpr (std::is_floating_point_v<T>) { if constexpr (std::is_floating_point_v<T>) {
for (u32 i = 0; i < samples_to_decode; i++) { for (u32 i = 0; i < samples_to_decode; i++) {
auto sample{static_cast<s32>(samples[i * channel_count + req.target_channel] * auto sample{static_cast<s32>(tmp_samples[i * channel_count + req.target_channel] *
std::numeric_limits<s16>::max())}; std::numeric_limits<s16>::max())};
out_buffer[i] = static_cast<s16>(std::clamp(sample, min, max)); out_buffer[i] = static_cast<s16>(std::clamp(sample, min, max));
} }
} else { } else {
std::memcpy(out_buffer.data(), samples.data(), samples_to_decode * sizeof(s16)); std::memcpy(out_buffer.data(), tmp_samples.data(), samples_to_decode * sizeof(s16));
} }
break; break;
} }
@ -100,6 +101,7 @@ static u32 DecodePcm(Core::Memory::Memory& memory, std::span<s16> out_buffer,
*/ */
static u32 DecodeAdpcm(Core::Memory::Memory& memory, std::span<s16> out_buffer, static u32 DecodeAdpcm(Core::Memory::Memory& memory, std::span<s16> out_buffer,
const DecodeArg& req) { const DecodeArg& req) {
std::array<u8, TempBufferSize> wavebuffer{};
constexpr u32 SamplesPerFrame{14}; constexpr u32 SamplesPerFrame{14};
constexpr u32 NibblesPerFrame{16}; constexpr u32 NibblesPerFrame{16};
@ -137,8 +139,7 @@ static u32 DecodeAdpcm(Core::Memory::Memory& memory, std::span<s16> out_buffer,
} }
const auto size{std::max((samples_to_process / 8U) * SamplesPerFrame, 8U)}; const auto size{std::max((samples_to_process / 8U) * SamplesPerFrame, 8U)};
Core::Memory::CpuGuestMemory<u8, Core::Memory::GuestMemoryFlags::UnsafeRead> wavebuffer( memory.ReadBlockUnsafe(req.buffer + position_in_frame / 2, wavebuffer.data(), size);
memory, req.buffer + position_in_frame / 2, size);
auto context{req.adpcm_context}; auto context{req.adpcm_context};
auto header{context->header}; auto header{context->header};

View file

@ -21,13 +21,23 @@ static void ResetAuxBufferDsp(Core::Memory::Memory& memory, const CpuAddr aux_in
} }
AuxInfo::AuxInfoDsp info{}; AuxInfo::AuxInfoDsp info{};
memory.ReadBlockUnsafe(aux_info, &info, sizeof(AuxInfo::AuxInfoDsp)); auto info_ptr{&info};
bool host_safe{(aux_info & Core::Memory::YUZU_PAGEMASK) <=
(Core::Memory::YUZU_PAGESIZE - sizeof(AuxInfo::AuxInfoDsp))};
info.read_offset = 0; if (host_safe) [[likely]] {
info.write_offset = 0; info_ptr = memory.GetPointer<AuxInfo::AuxInfoDsp>(aux_info);
info.total_sample_count = 0; } else {
memory.ReadBlockUnsafe(aux_info, info_ptr, sizeof(AuxInfo::AuxInfoDsp));
}
memory.WriteBlockUnsafe(aux_info, &info, sizeof(AuxInfo::AuxInfoDsp)); info_ptr->read_offset = 0;
info_ptr->write_offset = 0;
info_ptr->total_sample_count = 0;
if (!host_safe) [[unlikely]] {
memory.WriteBlockUnsafe(aux_info, info_ptr, sizeof(AuxInfo::AuxInfoDsp));
}
} }
/** /**
@ -76,9 +86,17 @@ static u32 WriteAuxBufferDsp(Core::Memory::Memory& memory, CpuAddr send_info_,
} }
AuxInfo::AuxInfoDsp send_info{}; AuxInfo::AuxInfoDsp send_info{};
memory.ReadBlockUnsafe(send_info_, &send_info, sizeof(AuxInfo::AuxInfoDsp)); auto send_ptr = &send_info;
bool host_safe = (send_info_ & Core::Memory::YUZU_PAGEMASK) <=
(Core::Memory::YUZU_PAGESIZE - sizeof(AuxInfo::AuxInfoDsp));
u32 target_write_offset{send_info.write_offset + write_offset}; if (host_safe) [[likely]] {
send_ptr = memory.GetPointer<AuxInfo::AuxInfoDsp>(send_info_);
} else {
memory.ReadBlockUnsafe(send_info_, send_ptr, sizeof(AuxInfo::AuxInfoDsp));
}
u32 target_write_offset{send_ptr->write_offset + write_offset};
if (target_write_offset > count_max) { if (target_write_offset > count_max) {
return 0; return 0;
} }
@ -87,9 +105,15 @@ static u32 WriteAuxBufferDsp(Core::Memory::Memory& memory, CpuAddr send_info_,
u32 read_pos{0}; u32 read_pos{0};
while (write_count > 0) { while (write_count > 0) {
u32 to_write{std::min(count_max - target_write_offset, write_count)}; u32 to_write{std::min(count_max - target_write_offset, write_count)};
if (to_write > 0) { const auto write_addr = send_buffer + target_write_offset * sizeof(s32);
const auto write_addr = send_buffer + target_write_offset * sizeof(s32); bool write_safe{(write_addr & Core::Memory::YUZU_PAGEMASK) <=
memory.WriteBlockUnsafe(write_addr, &input[read_pos], to_write * sizeof(s32)); (Core::Memory::YUZU_PAGESIZE - (write_addr + to_write * sizeof(s32)))};
if (write_safe) [[likely]] {
auto ptr = memory.GetPointer(write_addr);
std::memcpy(ptr, &input[read_pos], to_write * sizeof(s32));
} else {
memory.WriteBlockUnsafe(send_buffer + target_write_offset * sizeof(s32),
&input[read_pos], to_write * sizeof(s32));
} }
target_write_offset = (target_write_offset + to_write) % count_max; target_write_offset = (target_write_offset + to_write) % count_max;
write_count -= to_write; write_count -= to_write;
@ -97,10 +121,13 @@ static u32 WriteAuxBufferDsp(Core::Memory::Memory& memory, CpuAddr send_info_,
} }
if (update_count) { if (update_count) {
send_info.write_offset = (send_info.write_offset + update_count) % count_max; send_ptr->write_offset = (send_ptr->write_offset + update_count) % count_max;
}
if (!host_safe) [[unlikely]] {
memory.WriteBlockUnsafe(send_info_, send_ptr, sizeof(AuxInfo::AuxInfoDsp));
} }
memory.WriteBlockUnsafe(send_info_, &send_info, sizeof(AuxInfo::AuxInfoDsp));
return write_count_; return write_count_;
} }
@ -147,9 +174,17 @@ static u32 ReadAuxBufferDsp(Core::Memory::Memory& memory, CpuAddr return_info_,
} }
AuxInfo::AuxInfoDsp return_info{}; AuxInfo::AuxInfoDsp return_info{};
memory.ReadBlockUnsafe(return_info_, &return_info, sizeof(AuxInfo::AuxInfoDsp)); auto return_ptr = &return_info;
bool host_safe = (return_info_ & Core::Memory::YUZU_PAGEMASK) <=
(Core::Memory::YUZU_PAGESIZE - sizeof(AuxInfo::AuxInfoDsp));
u32 target_read_offset{return_info.read_offset + read_offset}; if (host_safe) [[likely]] {
return_ptr = memory.GetPointer<AuxInfo::AuxInfoDsp>(return_info_);
} else {
memory.ReadBlockUnsafe(return_info_, return_ptr, sizeof(AuxInfo::AuxInfoDsp));
}
u32 target_read_offset{return_ptr->read_offset + read_offset};
if (target_read_offset > count_max) { if (target_read_offset > count_max) {
return 0; return 0;
} }
@ -158,9 +193,15 @@ static u32 ReadAuxBufferDsp(Core::Memory::Memory& memory, CpuAddr return_info_,
u32 write_pos{0}; u32 write_pos{0};
while (read_count > 0) { while (read_count > 0) {
u32 to_read{std::min(count_max - target_read_offset, read_count)}; u32 to_read{std::min(count_max - target_read_offset, read_count)};
if (to_read > 0) { const auto read_addr = return_buffer + target_read_offset * sizeof(s32);
const auto read_addr = return_buffer + target_read_offset * sizeof(s32); bool read_safe{(read_addr & Core::Memory::YUZU_PAGEMASK) <=
memory.ReadBlockUnsafe(read_addr, &output[write_pos], to_read * sizeof(s32)); (Core::Memory::YUZU_PAGESIZE - (read_addr + to_read * sizeof(s32)))};
if (read_safe) [[likely]] {
auto ptr = memory.GetPointer(read_addr);
std::memcpy(&output[write_pos], ptr, to_read * sizeof(s32));
} else {
memory.ReadBlockUnsafe(return_buffer + target_read_offset * sizeof(s32),
&output[write_pos], to_read * sizeof(s32));
} }
target_read_offset = (target_read_offset + to_read) % count_max; target_read_offset = (target_read_offset + to_read) % count_max;
read_count -= to_read; read_count -= to_read;
@ -168,10 +209,13 @@ static u32 ReadAuxBufferDsp(Core::Memory::Memory& memory, CpuAddr return_info_,
} }
if (update_count) { if (update_count) {
return_info.read_offset = (return_info.read_offset + update_count) % count_max; return_ptr->read_offset = (return_ptr->read_offset + update_count) % count_max;
}
if (!host_safe) [[unlikely]] {
memory.WriteBlockUnsafe(return_info_, return_ptr, sizeof(AuxInfo::AuxInfoDsp));
} }
memory.WriteBlockUnsafe(return_info_, &return_info, sizeof(AuxInfo::AuxInfoDsp));
return read_count_; return read_count_;
} }

View file

@ -66,7 +66,6 @@ void PageTable::Resize(std::size_t address_space_width_in_bits, std::size_t page
<< (address_space_width_in_bits - page_size_in_bits)}; << (address_space_width_in_bits - page_size_in_bits)};
pointers.resize(num_page_table_entries); pointers.resize(num_page_table_entries);
backing_addr.resize(num_page_table_entries); backing_addr.resize(num_page_table_entries);
blocks.resize(num_page_table_entries);
current_address_space_width_in_bits = address_space_width_in_bits; current_address_space_width_in_bits = address_space_width_in_bits;
page_size = 1ULL << page_size_in_bits; page_size = 1ULL << page_size_in_bits;
} }

View file

@ -122,7 +122,6 @@ struct PageTable {
* corresponding attribute element is of type `Memory`. * corresponding attribute element is of type `Memory`.
*/ */
VirtualBuffer<PageInfo> pointers; VirtualBuffer<PageInfo> pointers;
VirtualBuffer<u64> blocks;
VirtualBuffer<u64> backing_addr; VirtualBuffer<u64> backing_addr;

View file

@ -40,21 +40,8 @@ public:
~ScratchBuffer() = default; ~ScratchBuffer() = default;
ScratchBuffer(const ScratchBuffer&) = delete; ScratchBuffer(const ScratchBuffer&) = delete;
ScratchBuffer& operator=(const ScratchBuffer&) = delete; ScratchBuffer& operator=(const ScratchBuffer&) = delete;
ScratchBuffer(ScratchBuffer&&) = default;
ScratchBuffer(ScratchBuffer&& other) noexcept { ScratchBuffer& operator=(ScratchBuffer&&) = default;
swap(other);
other.last_requested_size = 0;
other.buffer_capacity = 0;
other.buffer.reset();
}
ScratchBuffer& operator=(ScratchBuffer&& other) noexcept {
swap(other);
other.last_requested_size = 0;
other.buffer_capacity = 0;
other.buffer.reset();
return *this;
}
/// This will only grow the buffer's capacity if size is greater than the current capacity. /// This will only grow the buffer's capacity if size is greater than the current capacity.
/// The previously held data will remain intact. /// The previously held data will remain intact.

View file

@ -70,7 +70,7 @@ void CoreTiming::Initialize(std::function<void()>&& on_thread_init_) {
-> std::optional<std::chrono::nanoseconds> { return std::nullopt; }; -> std::optional<std::chrono::nanoseconds> { return std::nullopt; };
ev_lost = CreateEvent("_lost_event", empty_timed_callback); ev_lost = CreateEvent("_lost_event", empty_timed_callback);
if (is_multicore) { if (is_multicore) {
timer_thread = std::make_unique<std::jthread>(ThreadEntry, std::ref(*this)); timer_thread = std::make_unique<std::thread>(ThreadEntry, std::ref(*this));
} }
} }
@ -255,6 +255,7 @@ void CoreTiming::ThreadLoop() {
#ifdef _WIN32 #ifdef _WIN32
while (!paused && !event.IsSet() && wait_time > 0) { while (!paused && !event.IsSet() && wait_time > 0) {
wait_time = *next_time - GetGlobalTimeNs().count(); wait_time = *next_time - GetGlobalTimeNs().count();
if (wait_time >= timer_resolution_ns) { if (wait_time >= timer_resolution_ns) {
Common::Windows::SleepForOneTick(); Common::Windows::SleepForOneTick();
} else { } else {

View file

@ -163,7 +163,7 @@ private:
Common::Event pause_event{}; Common::Event pause_event{};
std::mutex basic_lock; std::mutex basic_lock;
std::mutex advance_lock; std::mutex advance_lock;
std::unique_ptr<std::jthread> timer_thread; std::unique_ptr<std::thread> timer_thread;
std::atomic<bool> paused{}; std::atomic<bool> paused{};
std::atomic<bool> paused_set{}; std::atomic<bool> paused_set{};
std::atomic<bool> wait_set{}; std::atomic<bool> wait_set{};

View file

@ -329,22 +329,8 @@ std::vector<u8> HLERequestContext::ReadBufferCopy(std::size_t buffer_index) cons
} }
std::span<const u8> HLERequestContext::ReadBuffer(std::size_t buffer_index) const { std::span<const u8> HLERequestContext::ReadBuffer(std::size_t buffer_index) const {
static thread_local std::array read_buffer_a{ static thread_local std::array<Common::ScratchBuffer<u8>, 2> read_buffer_a;
Core::Memory::CpuGuestMemory<u8, Core::Memory::GuestMemoryFlags::SafeRead>(memory, 0, 0), static thread_local std::array<Common::ScratchBuffer<u8>, 2> read_buffer_x;
Core::Memory::CpuGuestMemory<u8, Core::Memory::GuestMemoryFlags::SafeRead>(memory, 0, 0),
};
static thread_local std::array read_buffer_data_a{
Common::ScratchBuffer<u8>(),
Common::ScratchBuffer<u8>(),
};
static thread_local std::array read_buffer_x{
Core::Memory::CpuGuestMemory<u8, Core::Memory::GuestMemoryFlags::SafeRead>(memory, 0, 0),
Core::Memory::CpuGuestMemory<u8, Core::Memory::GuestMemoryFlags::SafeRead>(memory, 0, 0),
};
static thread_local std::array read_buffer_data_x{
Common::ScratchBuffer<u8>(),
Common::ScratchBuffer<u8>(),
};
const bool is_buffer_a{BufferDescriptorA().size() > buffer_index && const bool is_buffer_a{BufferDescriptorA().size() > buffer_index &&
BufferDescriptorA()[buffer_index].Size()}; BufferDescriptorA()[buffer_index].Size()};
@ -353,17 +339,19 @@ std::span<const u8> HLERequestContext::ReadBuffer(std::size_t buffer_index) cons
BufferDescriptorA().size() > buffer_index, { return {}; }, BufferDescriptorA().size() > buffer_index, { return {}; },
"BufferDescriptorA invalid buffer_index {}", buffer_index); "BufferDescriptorA invalid buffer_index {}", buffer_index);
auto& read_buffer = read_buffer_a[buffer_index]; auto& read_buffer = read_buffer_a[buffer_index];
return read_buffer.Read(BufferDescriptorA()[buffer_index].Address(), read_buffer.resize_destructive(BufferDescriptorA()[buffer_index].Size());
BufferDescriptorA()[buffer_index].Size(), memory.ReadBlock(BufferDescriptorA()[buffer_index].Address(), read_buffer.data(),
&read_buffer_data_a[buffer_index]); read_buffer.size());
return read_buffer;
} else { } else {
ASSERT_OR_EXECUTE_MSG( ASSERT_OR_EXECUTE_MSG(
BufferDescriptorX().size() > buffer_index, { return {}; }, BufferDescriptorX().size() > buffer_index, { return {}; },
"BufferDescriptorX invalid buffer_index {}", buffer_index); "BufferDescriptorX invalid buffer_index {}", buffer_index);
auto& read_buffer = read_buffer_x[buffer_index]; auto& read_buffer = read_buffer_x[buffer_index];
return read_buffer.Read(BufferDescriptorX()[buffer_index].Address(), read_buffer.resize_destructive(BufferDescriptorX()[buffer_index].Size());
BufferDescriptorX()[buffer_index].Size(), memory.ReadBlock(BufferDescriptorX()[buffer_index].Address(), read_buffer.data(),
&read_buffer_data_x[buffer_index]); read_buffer.size());
return read_buffer;
} }
} }

View file

@ -266,22 +266,6 @@ struct Memory::Impl {
ReadBlockImpl<true>(*system.ApplicationProcess(), src_addr, dest_buffer, size); ReadBlockImpl<true>(*system.ApplicationProcess(), src_addr, dest_buffer, size);
} }
const u8* GetSpan(const VAddr src_addr, const std::size_t size) const {
if (current_page_table->blocks[src_addr >> YUZU_PAGEBITS] ==
current_page_table->blocks[(src_addr + size) >> YUZU_PAGEBITS]) {
return GetPointerSilent(src_addr);
}
return nullptr;
}
u8* GetSpan(const VAddr src_addr, const std::size_t size) {
if (current_page_table->blocks[src_addr >> YUZU_PAGEBITS] ==
current_page_table->blocks[(src_addr + size) >> YUZU_PAGEBITS]) {
return GetPointerSilent(src_addr);
}
return nullptr;
}
template <bool UNSAFE> template <bool UNSAFE>
void WriteBlockImpl(const Kernel::KProcess& process, const Common::ProcessAddress dest_addr, void WriteBlockImpl(const Kernel::KProcess& process, const Common::ProcessAddress dest_addr,
const void* src_buffer, const std::size_t size) { const void* src_buffer, const std::size_t size) {
@ -575,7 +559,7 @@ struct Memory::Impl {
} }
} }
const auto end = base + size; const Common::ProcessAddress end = base + size;
ASSERT_MSG(end <= page_table.pointers.size(), "out of range mapping at {:016X}", ASSERT_MSG(end <= page_table.pointers.size(), "out of range mapping at {:016X}",
base + page_table.pointers.size()); base + page_table.pointers.size());
@ -586,18 +570,14 @@ struct Memory::Impl {
while (base != end) { while (base != end) {
page_table.pointers[base].Store(nullptr, type); page_table.pointers[base].Store(nullptr, type);
page_table.backing_addr[base] = 0; page_table.backing_addr[base] = 0;
page_table.blocks[base] = 0;
base += 1; base += 1;
} }
} else { } else {
auto orig_base = base;
while (base != end) { while (base != end) {
auto host_ptr = page_table.pointers[base].Store(
system.DeviceMemory().GetPointer<u8>(target) - (base << YUZU_PAGEBITS); system.DeviceMemory().GetPointer<u8>(target) - (base << YUZU_PAGEBITS), type);
auto backing = GetInteger(target) - (base << YUZU_PAGEBITS); page_table.backing_addr[base] = GetInteger(target) - (base << YUZU_PAGEBITS);
page_table.pointers[base].Store(host_ptr, type);
page_table.backing_addr[base] = backing;
page_table.blocks[base] = orig_base << YUZU_PAGEBITS;
ASSERT_MSG(page_table.pointers[base].Pointer(), ASSERT_MSG(page_table.pointers[base].Pointer(),
"memory mapping base yield a nullptr within the table"); "memory mapping base yield a nullptr within the table");
@ -767,14 +747,6 @@ struct Memory::Impl {
VAddr last_address; VAddr last_address;
}; };
void InvalidateRegion(Common::ProcessAddress dest_addr, size_t size) {
system.GPU().InvalidateRegion(GetInteger(dest_addr), size);
}
void FlushRegion(Common::ProcessAddress dest_addr, size_t size) {
system.GPU().FlushRegion(GetInteger(dest_addr), size);
}
Core::System& system; Core::System& system;
Common::PageTable* current_page_table = nullptr; Common::PageTable* current_page_table = nullptr;
std::array<VideoCore::RasterizerDownloadArea, Core::Hardware::NUM_CPU_CORES> std::array<VideoCore::RasterizerDownloadArea, Core::Hardware::NUM_CPU_CORES>
@ -909,14 +881,6 @@ void Memory::ReadBlockUnsafe(const Common::ProcessAddress src_addr, void* dest_b
impl->ReadBlockUnsafe(src_addr, dest_buffer, size); impl->ReadBlockUnsafe(src_addr, dest_buffer, size);
} }
const u8* Memory::GetSpan(const VAddr src_addr, const std::size_t size) const {
return impl->GetSpan(src_addr, size);
}
u8* Memory::GetSpan(const VAddr src_addr, const std::size_t size) {
return impl->GetSpan(src_addr, size);
}
void Memory::WriteBlock(const Common::ProcessAddress dest_addr, const void* src_buffer, void Memory::WriteBlock(const Common::ProcessAddress dest_addr, const void* src_buffer,
const std::size_t size) { const std::size_t size) {
impl->WriteBlock(dest_addr, src_buffer, size); impl->WriteBlock(dest_addr, src_buffer, size);
@ -960,12 +924,4 @@ void Memory::MarkRegionDebug(Common::ProcessAddress vaddr, u64 size, bool debug)
impl->MarkRegionDebug(GetInteger(vaddr), size, debug); impl->MarkRegionDebug(GetInteger(vaddr), size, debug);
} }
void Memory::InvalidateRegion(Common::ProcessAddress dest_addr, size_t size) {
impl->InvalidateRegion(dest_addr, size);
}
void Memory::FlushRegion(Common::ProcessAddress dest_addr, size_t size) {
impl->FlushRegion(dest_addr, size);
}
} // namespace Core::Memory } // namespace Core::Memory

View file

@ -5,12 +5,8 @@
#include <cstddef> #include <cstddef>
#include <memory> #include <memory>
#include <optional>
#include <span> #include <span>
#include <string> #include <string>
#include <vector>
#include "common/scratch_buffer.h"
#include "common/typed_address.h" #include "common/typed_address.h"
#include "core/hle/result.h" #include "core/hle/result.h"
@ -28,10 +24,6 @@ class PhysicalMemory;
class KProcess; class KProcess;
} // namespace Kernel } // namespace Kernel
namespace Tegra {
class MemoryManager;
}
namespace Core::Memory { namespace Core::Memory {
/** /**
@ -351,9 +343,6 @@ public:
*/ */
void ReadBlockUnsafe(Common::ProcessAddress src_addr, void* dest_buffer, std::size_t size); void ReadBlockUnsafe(Common::ProcessAddress src_addr, void* dest_buffer, std::size_t size);
const u8* GetSpan(const VAddr src_addr, const std::size_t size) const;
u8* GetSpan(const VAddr src_addr, const std::size_t size);
/** /**
* Writes a range of bytes into the current process' address space at the specified * Writes a range of bytes into the current process' address space at the specified
* virtual address. * virtual address.
@ -472,8 +461,6 @@ public:
void MarkRegionDebug(Common::ProcessAddress vaddr, u64 size, bool debug); void MarkRegionDebug(Common::ProcessAddress vaddr, u64 size, bool debug);
void SetGPUDirtyManagers(std::span<Core::GPUDirtyMemoryManager> managers); void SetGPUDirtyManagers(std::span<Core::GPUDirtyMemoryManager> managers);
void InvalidateRegion(Common::ProcessAddress dest_addr, size_t size);
void FlushRegion(Common::ProcessAddress dest_addr, size_t size);
private: private:
Core::System& system; Core::System& system;
@ -482,203 +469,4 @@ private:
std::unique_ptr<Impl> impl; std::unique_ptr<Impl> impl;
}; };
enum GuestMemoryFlags : u32 {
Read = 1 << 0,
Write = 1 << 1,
Safe = 1 << 2,
Cached = 1 << 3,
SafeRead = Read | Safe,
SafeWrite = Write | Safe,
SafeReadWrite = SafeRead | SafeWrite,
SafeReadCachedWrite = SafeReadWrite | Cached,
UnsafeRead = Read,
UnsafeWrite = Write,
UnsafeReadWrite = UnsafeRead | UnsafeWrite,
UnsafeReadCachedWrite = UnsafeReadWrite | Cached,
};
namespace {
template <typename M, typename T, GuestMemoryFlags FLAGS>
class GuestMemory {
using iterator = T*;
using const_iterator = const T*;
using value_type = T;
using element_type = T;
using iterator_category = std::contiguous_iterator_tag;
public:
GuestMemory() = delete;
explicit GuestMemory(M& memory_, u64 addr_, std::size_t size_,
Common::ScratchBuffer<T>* backup = nullptr)
: memory{memory_}, addr{addr_}, size{size_} {
static_assert(FLAGS & GuestMemoryFlags::Read || FLAGS & GuestMemoryFlags::Write);
if constexpr (FLAGS & GuestMemoryFlags::Read) {
Read(addr, size, backup);
}
}
~GuestMemory() = default;
T* data() noexcept {
return data_span.data();
}
const T* data() const noexcept {
return data_span.data();
}
[[nodiscard]] T* begin() noexcept {
return data();
}
[[nodiscard]] const T* begin() const noexcept {
return data();
}
[[nodiscard]] T* end() noexcept {
return data() + size;
}
[[nodiscard]] const T* end() const noexcept {
return data() + size;
}
T& operator[](size_t index) noexcept {
return data_span[index];
}
const T& operator[](size_t index) const noexcept {
return data_span[index];
}
void SetAddressAndSize(u64 addr_, std::size_t size_) noexcept {
addr = addr_;
size = size_;
addr_changed = true;
}
std::span<T> Read(u64 addr_, std::size_t size_,
Common::ScratchBuffer<T>* backup = nullptr) noexcept {
addr = addr_;
size = size_;
if (size == 0) {
is_data_copy = true;
return {};
}
if (TrySetSpan()) {
if constexpr (FLAGS & GuestMemoryFlags::Safe) {
memory.FlushRegion(addr, size * sizeof(T));
}
} else {
if (backup) {
backup->resize_destructive(size);
data_span = *backup;
} else {
data_copy.resize(size);
data_span = std::span(data_copy);
}
is_data_copy = true;
span_valid = true;
if constexpr (FLAGS & GuestMemoryFlags::Safe) {
memory.ReadBlock(addr, data_span.data(), size * sizeof(T));
} else {
memory.ReadBlockUnsafe(addr, data_span.data(), size * sizeof(T));
}
}
return data_span;
}
void Write(std::span<T> write_data) noexcept {
if constexpr (FLAGS & GuestMemoryFlags::Cached) {
memory.WriteBlockCached(addr, write_data.data(), size * sizeof(T));
} else if constexpr (FLAGS & GuestMemoryFlags::Safe) {
memory.WriteBlock(addr, write_data.data(), size * sizeof(T));
} else {
memory.WriteBlockUnsafe(addr, write_data.data(), size * sizeof(T));
}
}
bool TrySetSpan() noexcept {
if (u8* ptr = memory.GetSpan(addr, size * sizeof(T)); ptr) {
data_span = {reinterpret_cast<T*>(ptr), size};
span_valid = true;
return true;
}
return false;
}
protected:
bool IsDataCopy() const noexcept {
return is_data_copy;
}
bool AddressChanged() const noexcept {
return addr_changed;
}
M& memory;
u64 addr;
size_t size;
std::span<T> data_span{};
std::vector<T> data_copy;
bool span_valid{false};
bool is_data_copy{false};
bool addr_changed{false};
};
template <typename M, typename T, GuestMemoryFlags FLAGS>
class GuestMemoryScoped : public GuestMemory<M, T, FLAGS> {
public:
GuestMemoryScoped() = delete;
explicit GuestMemoryScoped(M& memory_, u64 addr_, std::size_t size_,
Common::ScratchBuffer<T>* backup = nullptr)
: GuestMemory<M, T, FLAGS>(memory_, addr_, size_, backup) {
if constexpr (!(FLAGS & GuestMemoryFlags::Read)) {
if (!this->TrySetSpan()) {
if (backup) {
this->data_span = *backup;
this->span_valid = true;
this->is_data_copy = true;
}
}
}
}
~GuestMemoryScoped() {
if constexpr (FLAGS & GuestMemoryFlags::Write) {
if (this->size == 0) [[unlikely]] {
return;
}
if (this->AddressChanged() || this->IsDataCopy()) {
ASSERT(this->span_valid);
if constexpr (FLAGS & GuestMemoryFlags::Cached) {
this->memory.WriteBlockCached(this->addr, this->data_span.data(),
this->size * sizeof(T));
} else if constexpr (FLAGS & GuestMemoryFlags::Safe) {
this->memory.WriteBlock(this->addr, this->data_span.data(),
this->size * sizeof(T));
} else {
this->memory.WriteBlockUnsafe(this->addr, this->data_span.data(),
this->size * sizeof(T));
}
} else if constexpr (FLAGS & GuestMemoryFlags::Safe) {
this->memory.InvalidateRegion(this->addr, this->size * sizeof(T));
}
}
}
};
} // namespace
template <typename T, GuestMemoryFlags FLAGS>
using CpuGuestMemory = GuestMemory<Memory, T, FLAGS>;
template <typename T, GuestMemoryFlags FLAGS>
using CpuGuestMemoryScoped = GuestMemoryScoped<Memory, T, FLAGS>;
template <typename T, GuestMemoryFlags FLAGS>
using GpuGuestMemory = GuestMemory<Tegra::MemoryManager, T, FLAGS>;
template <typename T, GuestMemoryFlags FLAGS>
using GpuGuestMemoryScoped = GuestMemoryScoped<Tegra::MemoryManager, T, FLAGS>;
} // namespace Core::Memory } // namespace Core::Memory

View file

@ -234,10 +234,9 @@ bool BufferCache<P>::DMACopy(GPUVAddr src_address, GPUVAddr dest_address, u64 am
if (has_new_downloads) { if (has_new_downloads) {
memory_tracker.MarkRegionAsGpuModified(*cpu_dest_address, amount); memory_tracker.MarkRegionAsGpuModified(*cpu_dest_address, amount);
} }
tmp_buffer.resize_destructive(amount);
Core::Memory::CpuGuestMemoryScoped<u8, Core::Memory::GuestMemoryFlags::UnsafeReadWrite> tmp( cpu_memory.ReadBlockUnsafe(*cpu_src_address, tmp_buffer.data(), amount);
cpu_memory, *cpu_src_address, amount, &tmp_buffer); cpu_memory.WriteBlockUnsafe(*cpu_dest_address, tmp_buffer.data(), amount);
tmp.SetAddressAndSize(*cpu_dest_address, amount);
return true; return true;
} }

View file

@ -5,7 +5,6 @@
#include "common/microprofile.h" #include "common/microprofile.h"
#include "common/settings.h" #include "common/settings.h"
#include "core/core.h" #include "core/core.h"
#include "core/memory.h"
#include "video_core/dma_pusher.h" #include "video_core/dma_pusher.h"
#include "video_core/engines/maxwell_3d.h" #include "video_core/engines/maxwell_3d.h"
#include "video_core/gpu.h" #include "video_core/gpu.h"
@ -13,8 +12,6 @@
namespace Tegra { namespace Tegra {
constexpr u32 MacroRegistersStart = 0xE00;
DmaPusher::DmaPusher(Core::System& system_, GPU& gpu_, MemoryManager& memory_manager_, DmaPusher::DmaPusher(Core::System& system_, GPU& gpu_, MemoryManager& memory_manager_,
Control::ChannelState& channel_state_) Control::ChannelState& channel_state_)
: gpu{gpu_}, system{system_}, memory_manager{memory_manager_}, puller{gpu_, memory_manager_, : gpu{gpu_}, system{system_}, memory_manager{memory_manager_}, puller{gpu_, memory_manager_,
@ -77,16 +74,25 @@ bool DmaPusher::Step() {
} }
// Push buffer non-empty, read a word // Push buffer non-empty, read a word
if (dma_state.method >= MacroRegistersStart) { command_headers.resize_destructive(command_list_header.size);
if (subchannels[dma_state.subchannel]) { constexpr u32 MacroRegistersStart = 0xE00;
subchannels[dma_state.subchannel]->current_dirty = memory_manager.IsMemoryDirty( if (dma_state.method < MacroRegistersStart) {
dma_state.dma_get, command_list_header.size * sizeof(u32)); if (Settings::IsGPULevelHigh()) {
memory_manager.ReadBlock(dma_state.dma_get, command_headers.data(),
command_list_header.size * sizeof(u32));
} else {
memory_manager.ReadBlockUnsafe(dma_state.dma_get, command_headers.data(),
command_list_header.size * sizeof(u32));
} }
} else {
const size_t copy_size = command_list_header.size * sizeof(u32);
if (subchannels[dma_state.subchannel]) {
subchannels[dma_state.subchannel]->current_dirty =
memory_manager.IsMemoryDirty(dma_state.dma_get, copy_size);
}
memory_manager.ReadBlockUnsafe(dma_state.dma_get, command_headers.data(), copy_size);
} }
Core::Memory::GpuGuestMemory<Tegra::CommandHeader, ProcessCommands(command_headers);
Core::Memory::GuestMemoryFlags::UnsafeRead>
headers(memory_manager, dma_state.dma_get, command_list_header.size, &command_headers);
ProcessCommands(headers);
} }
return true; return true;

View file

@ -5,7 +5,6 @@
#include "common/algorithm.h" #include "common/algorithm.h"
#include "common/assert.h" #include "common/assert.h"
#include "core/memory.h"
#include "video_core/engines/engine_upload.h" #include "video_core/engines/engine_upload.h"
#include "video_core/memory_manager.h" #include "video_core/memory_manager.h"
#include "video_core/rasterizer_interface.h" #include "video_core/rasterizer_interface.h"
@ -47,11 +46,15 @@ void State::ProcessData(const u32* data, size_t num_data) {
void State::ProcessData(std::span<const u8> read_buffer) { void State::ProcessData(std::span<const u8> read_buffer) {
const GPUVAddr address{regs.dest.Address()}; const GPUVAddr address{regs.dest.Address()};
if (is_linear) { if (is_linear) {
for (size_t line = 0; line < regs.line_count; ++line) { if (regs.line_count == 1) {
const GPUVAddr dest_line = address + line * regs.dest.pitch; rasterizer->AccelerateInlineToMemory(address, copy_size, read_buffer);
std::span<const u8> buffer(read_buffer.data() + line * regs.line_length_in, } else {
regs.line_length_in); for (size_t line = 0; line < regs.line_count; ++line) {
rasterizer->AccelerateInlineToMemory(dest_line, regs.line_length_in, buffer); const GPUVAddr dest_line = address + line * regs.dest.pitch;
std::span<const u8> buffer(read_buffer.data() + line * regs.line_length_in,
regs.line_length_in);
rasterizer->AccelerateInlineToMemory(dest_line, regs.line_length_in, buffer);
}
} }
} else { } else {
u32 width = regs.dest.width; u32 width = regs.dest.width;
@ -67,14 +70,13 @@ void State::ProcessData(std::span<const u8> read_buffer) {
const std::size_t dst_size = Tegra::Texture::CalculateSize( const std::size_t dst_size = Tegra::Texture::CalculateSize(
true, bytes_per_pixel, width, regs.dest.height, regs.dest.depth, true, bytes_per_pixel, width, regs.dest.height, regs.dest.depth,
regs.dest.BlockHeight(), regs.dest.BlockDepth()); regs.dest.BlockHeight(), regs.dest.BlockDepth());
tmp_buffer.resize_destructive(dst_size);
Core::Memory::GpuGuestMemoryScoped<u8, Core::Memory::GuestMemoryFlags::SafeReadCachedWrite> memory_manager.ReadBlock(address, tmp_buffer.data(), dst_size);
tmp(memory_manager, address, dst_size, &tmp_buffer); Tegra::Texture::SwizzleSubrect(tmp_buffer, read_buffer, bytes_per_pixel, width,
regs.dest.height, regs.dest.depth, x_offset, regs.dest.y,
Tegra::Texture::SwizzleSubrect(tmp, read_buffer, bytes_per_pixel, width, regs.dest.height, x_elements, regs.line_count, regs.dest.BlockHeight(),
regs.dest.depth, x_offset, regs.dest.y, x_elements,
regs.line_count, regs.dest.BlockHeight(),
regs.dest.BlockDepth(), regs.line_length_in); regs.dest.BlockDepth(), regs.line_length_in);
memory_manager.WriteBlockCached(address, tmp_buffer.data(), dst_size);
} }
} }

View file

@ -84,6 +84,7 @@ Texture::TICEntry KeplerCompute::GetTICEntry(u32 tic_index) const {
Texture::TICEntry tic_entry; Texture::TICEntry tic_entry;
memory_manager.ReadBlockUnsafe(tic_address_gpu, &tic_entry, sizeof(Texture::TICEntry)); memory_manager.ReadBlockUnsafe(tic_address_gpu, &tic_entry, sizeof(Texture::TICEntry));
return tic_entry; return tic_entry;
} }

View file

@ -9,7 +9,6 @@
#include "common/settings.h" #include "common/settings.h"
#include "core/core.h" #include "core/core.h"
#include "core/core_timing.h" #include "core/core_timing.h"
#include "core/memory.h"
#include "video_core/dirty_flags.h" #include "video_core/dirty_flags.h"
#include "video_core/engines/draw_manager.h" #include "video_core/engines/draw_manager.h"
#include "video_core/engines/maxwell_3d.h" #include "video_core/engines/maxwell_3d.h"
@ -680,14 +679,17 @@ void Maxwell3D::ProcessCBData(u32 value) {
Texture::TICEntry Maxwell3D::GetTICEntry(u32 tic_index) const { Texture::TICEntry Maxwell3D::GetTICEntry(u32 tic_index) const {
const GPUVAddr tic_address_gpu{regs.tex_header.Address() + const GPUVAddr tic_address_gpu{regs.tex_header.Address() +
tic_index * sizeof(Texture::TICEntry)}; tic_index * sizeof(Texture::TICEntry)};
Texture::TICEntry tic_entry; Texture::TICEntry tic_entry;
memory_manager.ReadBlockUnsafe(tic_address_gpu, &tic_entry, sizeof(Texture::TICEntry)); memory_manager.ReadBlockUnsafe(tic_address_gpu, &tic_entry, sizeof(Texture::TICEntry));
return tic_entry; return tic_entry;
} }
Texture::TSCEntry Maxwell3D::GetTSCEntry(u32 tsc_index) const { Texture::TSCEntry Maxwell3D::GetTSCEntry(u32 tsc_index) const {
const GPUVAddr tsc_address_gpu{regs.tex_sampler.Address() + const GPUVAddr tsc_address_gpu{regs.tex_sampler.Address() +
tsc_index * sizeof(Texture::TSCEntry)}; tsc_index * sizeof(Texture::TSCEntry)};
Texture::TSCEntry tsc_entry; Texture::TSCEntry tsc_entry;
memory_manager.ReadBlockUnsafe(tsc_address_gpu, &tsc_entry, sizeof(Texture::TSCEntry)); memory_manager.ReadBlockUnsafe(tsc_address_gpu, &tsc_entry, sizeof(Texture::TSCEntry));
return tsc_entry; return tsc_entry;

View file

@ -7,7 +7,6 @@
#include "common/microprofile.h" #include "common/microprofile.h"
#include "common/settings.h" #include "common/settings.h"
#include "core/core.h" #include "core/core.h"
#include "core/memory.h"
#include "video_core/engines/maxwell_3d.h" #include "video_core/engines/maxwell_3d.h"
#include "video_core/engines/maxwell_dma.h" #include "video_core/engines/maxwell_dma.h"
#include "video_core/memory_manager.h" #include "video_core/memory_manager.h"
@ -131,12 +130,11 @@ void MaxwellDMA::Launch() {
UNIMPLEMENTED_IF(regs.offset_out % 16 != 0); UNIMPLEMENTED_IF(regs.offset_out % 16 != 0);
read_buffer.resize_destructive(16); read_buffer.resize_destructive(16);
for (u32 offset = 0; offset < regs.line_length_in; offset += 16) { for (u32 offset = 0; offset < regs.line_length_in; offset += 16) {
Core::Memory::GpuGuestMemoryScoped< memory_manager.ReadBlock(
u8, Core::Memory::GuestMemoryFlags::SafeReadCachedWrite> convert_linear_2_blocklinear_addr(regs.offset_in + offset),
tmp_write_buffer(memory_manager, read_buffer.data(), read_buffer.size());
convert_linear_2_blocklinear_addr(regs.offset_in + offset), memory_manager.WriteBlockCached(regs.offset_out + offset, read_buffer.data(),
16, &read_buffer); read_buffer.size());
tmp_write_buffer.SetAddressAndSize(regs.offset_out + offset, 16);
} }
} else if (is_src_pitch && !is_dst_pitch) { } else if (is_src_pitch && !is_dst_pitch) {
UNIMPLEMENTED_IF(regs.line_length_in % 16 != 0); UNIMPLEMENTED_IF(regs.line_length_in % 16 != 0);
@ -144,19 +142,20 @@ void MaxwellDMA::Launch() {
UNIMPLEMENTED_IF(regs.offset_out % 16 != 0); UNIMPLEMENTED_IF(regs.offset_out % 16 != 0);
read_buffer.resize_destructive(16); read_buffer.resize_destructive(16);
for (u32 offset = 0; offset < regs.line_length_in; offset += 16) { for (u32 offset = 0; offset < regs.line_length_in; offset += 16) {
Core::Memory::GpuGuestMemoryScoped< memory_manager.ReadBlock(regs.offset_in + offset, read_buffer.data(),
u8, Core::Memory::GuestMemoryFlags::SafeReadCachedWrite> read_buffer.size());
tmp_write_buffer(memory_manager, regs.offset_in + offset, 16, &read_buffer); memory_manager.WriteBlockCached(
tmp_write_buffer.SetAddressAndSize( convert_linear_2_blocklinear_addr(regs.offset_out + offset),
convert_linear_2_blocklinear_addr(regs.offset_out + offset), 16); read_buffer.data(), read_buffer.size());
} }
} else { } else {
if (!accelerate.BufferCopy(regs.offset_in, regs.offset_out, regs.line_length_in)) { if (!accelerate.BufferCopy(regs.offset_in, regs.offset_out, regs.line_length_in)) {
Core::Memory::GpuGuestMemoryScoped< read_buffer.resize_destructive(regs.line_length_in);
u8, Core::Memory::GuestMemoryFlags::SafeReadCachedWrite> memory_manager.ReadBlock(regs.offset_in, read_buffer.data(),
tmp_write_buffer(memory_manager, regs.offset_in, regs.line_length_in, regs.line_length_in,
&read_buffer); VideoCommon::CacheType::NoBufferCache);
tmp_write_buffer.SetAddressAndSize(regs.offset_out, regs.line_length_in); memory_manager.WriteBlockCached(regs.offset_out, read_buffer.data(),
regs.line_length_in);
} }
} }
} }
@ -223,15 +222,17 @@ void MaxwellDMA::CopyBlockLinearToPitch() {
CalculateSize(true, bytes_per_pixel, width, height, depth, block_height, block_depth); CalculateSize(true, bytes_per_pixel, width, height, depth, block_height, block_depth);
const size_t dst_size = dst_operand.pitch * regs.line_count; const size_t dst_size = dst_operand.pitch * regs.line_count;
read_buffer.resize_destructive(src_size);
write_buffer.resize_destructive(dst_size);
Core::Memory::GpuGuestMemory<u8, Core::Memory::GuestMemoryFlags::SafeRead> tmp_read_buffer( memory_manager.ReadBlock(src_operand.address, read_buffer.data(), src_size);
memory_manager, src_operand.address, src_size, &read_buffer); memory_manager.ReadBlock(dst_operand.address, write_buffer.data(), dst_size);
Core::Memory::GpuGuestMemoryScoped<u8, Core::Memory::GuestMemoryFlags::SafeReadCachedWrite>
tmp_write_buffer(memory_manager, dst_operand.address, dst_size, &write_buffer);
UnswizzleSubrect(tmp_write_buffer, tmp_read_buffer, bytes_per_pixel, width, height, depth, UnswizzleSubrect(write_buffer, read_buffer, bytes_per_pixel, width, height, depth, x_offset,
x_offset, src_params.origin.y, x_elements, regs.line_count, block_height, src_params.origin.y, x_elements, regs.line_count, block_height, block_depth,
block_depth, dst_operand.pitch); dst_operand.pitch);
memory_manager.WriteBlockCached(regs.offset_out, write_buffer.data(), dst_size);
} }
void MaxwellDMA::CopyPitchToBlockLinear() { void MaxwellDMA::CopyPitchToBlockLinear() {
@ -286,17 +287,18 @@ void MaxwellDMA::CopyPitchToBlockLinear() {
CalculateSize(true, bytes_per_pixel, width, height, depth, block_height, block_depth); CalculateSize(true, bytes_per_pixel, width, height, depth, block_height, block_depth);
const size_t src_size = static_cast<size_t>(regs.pitch_in) * regs.line_count; const size_t src_size = static_cast<size_t>(regs.pitch_in) * regs.line_count;
GPUVAddr src_addr = regs.offset_in; read_buffer.resize_destructive(src_size);
GPUVAddr dst_addr = regs.offset_out; write_buffer.resize_destructive(dst_size);
Core::Memory::GpuGuestMemory<u8, Core::Memory::GuestMemoryFlags::SafeRead> tmp_read_buffer(
memory_manager, src_addr, src_size, &read_buffer);
Core::Memory::GpuGuestMemoryScoped<u8, Core::Memory::GuestMemoryFlags::SafeReadCachedWrite>
tmp_write_buffer(memory_manager, dst_addr, dst_size, &write_buffer);
// If the input is linear and the output is tiled, swizzle the input and copy it over. memory_manager.ReadBlock(regs.offset_in, read_buffer.data(), src_size);
SwizzleSubrect(tmp_write_buffer, tmp_read_buffer, bytes_per_pixel, width, height, depth, memory_manager.ReadBlockUnsafe(regs.offset_out, write_buffer.data(), dst_size);
x_offset, dst_params.origin.y, x_elements, regs.line_count, block_height,
block_depth, regs.pitch_in); // If the input is linear and the output is tiled, swizzle the input and copy it over.
SwizzleSubrect(write_buffer, read_buffer, bytes_per_pixel, width, height, depth, x_offset,
dst_params.origin.y, x_elements, regs.line_count, block_height, block_depth,
regs.pitch_in);
memory_manager.WriteBlockCached(regs.offset_out, write_buffer.data(), dst_size);
} }
void MaxwellDMA::CopyBlockLinearToBlockLinear() { void MaxwellDMA::CopyBlockLinearToBlockLinear() {
@ -340,20 +342,23 @@ void MaxwellDMA::CopyBlockLinearToBlockLinear() {
const u32 pitch = x_elements * bytes_per_pixel; const u32 pitch = x_elements * bytes_per_pixel;
const size_t mid_buffer_size = pitch * regs.line_count; const size_t mid_buffer_size = pitch * regs.line_count;
read_buffer.resize_destructive(src_size);
write_buffer.resize_destructive(dst_size);
intermediate_buffer.resize_destructive(mid_buffer_size); intermediate_buffer.resize_destructive(mid_buffer_size);
Core::Memory::GpuGuestMemory<u8, Core::Memory::GuestMemoryFlags::SafeRead> tmp_read_buffer( memory_manager.ReadBlock(regs.offset_in, read_buffer.data(), src_size);
memory_manager, regs.offset_in, src_size, &read_buffer); memory_manager.ReadBlock(regs.offset_out, write_buffer.data(), dst_size);
Core::Memory::GpuGuestMemoryScoped<u8, Core::Memory::GuestMemoryFlags::SafeReadCachedWrite>
tmp_write_buffer(memory_manager, regs.offset_out, dst_size, &write_buffer);
UnswizzleSubrect(intermediate_buffer, tmp_read_buffer, bytes_per_pixel, src_width, src.height, UnswizzleSubrect(intermediate_buffer, read_buffer, bytes_per_pixel, src_width, src.height,
src.depth, src_x_offset, src.origin.y, x_elements, regs.line_count, src.depth, src_x_offset, src.origin.y, x_elements, regs.line_count,
src.block_size.height, src.block_size.depth, pitch); src.block_size.height, src.block_size.depth, pitch);
SwizzleSubrect(tmp_write_buffer, intermediate_buffer, bytes_per_pixel, dst_width, dst.height, SwizzleSubrect(write_buffer, intermediate_buffer, bytes_per_pixel, dst_width, dst.height,
dst.depth, dst_x_offset, dst.origin.y, x_elements, regs.line_count, dst.depth, dst_x_offset, dst.origin.y, x_elements, regs.line_count,
dst.block_size.height, dst.block_size.depth, pitch); dst.block_size.height, dst.block_size.depth, pitch);
memory_manager.WriteBlockCached(regs.offset_out, write_buffer.data(), dst_size);
} }
void MaxwellDMA::ReleaseSemaphore() { void MaxwellDMA::ReleaseSemaphore() {

View file

@ -159,11 +159,11 @@ bool SoftwareBlitEngine::Blit(Fermi2D::Surface& src, Fermi2D::Surface& dst,
const auto src_bytes_per_pixel = BytesPerBlock(PixelFormatFromRenderTargetFormat(src.format)); const auto src_bytes_per_pixel = BytesPerBlock(PixelFormatFromRenderTargetFormat(src.format));
const auto dst_bytes_per_pixel = BytesPerBlock(PixelFormatFromRenderTargetFormat(dst.format)); const auto dst_bytes_per_pixel = BytesPerBlock(PixelFormatFromRenderTargetFormat(dst.format));
const size_t src_size = get_surface_size(src, src_bytes_per_pixel); const size_t src_size = get_surface_size(src, src_bytes_per_pixel);
impl->tmp_buffer.resize_destructive(src_size);
Core::Memory::GpuGuestMemory<u8, Core::Memory::GuestMemoryFlags::SafeRead> tmp_buffer( memory_manager.ReadBlock(src.Address(), impl->tmp_buffer.data(), src_size);
memory_manager, src.Address(), src_size, &impl->tmp_buffer);
const size_t src_copy_size = src_extent_x * src_extent_y * src_bytes_per_pixel; const size_t src_copy_size = src_extent_x * src_extent_y * src_bytes_per_pixel;
const size_t dst_copy_size = dst_extent_x * dst_extent_y * dst_bytes_per_pixel; const size_t dst_copy_size = dst_extent_x * dst_extent_y * dst_bytes_per_pixel;
impl->src_buffer.resize_destructive(src_copy_size); impl->src_buffer.resize_destructive(src_copy_size);
@ -200,11 +200,12 @@ bool SoftwareBlitEngine::Blit(Fermi2D::Surface& src, Fermi2D::Surface& dst,
impl->dst_buffer.resize_destructive(dst_copy_size); impl->dst_buffer.resize_destructive(dst_copy_size);
if (src.linear == Fermi2D::MemoryLayout::BlockLinear) { if (src.linear == Fermi2D::MemoryLayout::BlockLinear) {
UnswizzleSubrect(impl->src_buffer, tmp_buffer, src_bytes_per_pixel, src.width, src.height, UnswizzleSubrect(impl->src_buffer, impl->tmp_buffer, src_bytes_per_pixel, src.width,
src.depth, config.src_x0, config.src_y0, src_extent_x, src_extent_y, src.height, src.depth, config.src_x0, config.src_y0, src_extent_x,
src.block_height, src.block_depth, src_extent_x * src_bytes_per_pixel); src_extent_y, src.block_height, src.block_depth,
src_extent_x * src_bytes_per_pixel);
} else { } else {
process_pitch_linear(false, tmp_buffer, impl->src_buffer, src_extent_x, src_extent_y, process_pitch_linear(false, impl->tmp_buffer, impl->src_buffer, src_extent_x, src_extent_y,
src.pitch, config.src_x0, config.src_y0, src_bytes_per_pixel); src.pitch, config.src_x0, config.src_y0, src_bytes_per_pixel);
} }
@ -220,18 +221,20 @@ bool SoftwareBlitEngine::Blit(Fermi2D::Surface& src, Fermi2D::Surface& dst,
} }
const size_t dst_size = get_surface_size(dst, dst_bytes_per_pixel); const size_t dst_size = get_surface_size(dst, dst_bytes_per_pixel);
Core::Memory::GpuGuestMemoryScoped<u8, Core::Memory::GuestMemoryFlags::SafeReadWrite> impl->tmp_buffer.resize_destructive(dst_size);
tmp_buffer2(memory_manager, dst.Address(), dst_size, &impl->tmp_buffer); memory_manager.ReadBlock(dst.Address(), impl->tmp_buffer.data(), dst_size);
if (dst.linear == Fermi2D::MemoryLayout::BlockLinear) { if (dst.linear == Fermi2D::MemoryLayout::BlockLinear) {
SwizzleSubrect(tmp_buffer2, impl->dst_buffer, dst_bytes_per_pixel, dst.width, dst.height, SwizzleSubrect(impl->tmp_buffer, impl->dst_buffer, dst_bytes_per_pixel, dst.width,
dst.depth, config.dst_x0, config.dst_y0, dst_extent_x, dst_extent_y, dst.height, dst.depth, config.dst_x0, config.dst_y0, dst_extent_x,
dst.block_height, dst.block_depth, dst_extent_x * dst_bytes_per_pixel); dst_extent_y, dst.block_height, dst.block_depth,
dst_extent_x * dst_bytes_per_pixel);
} else { } else {
process_pitch_linear(true, impl->dst_buffer, tmp_buffer2, dst_extent_x, dst_extent_y, process_pitch_linear(true, impl->dst_buffer, impl->tmp_buffer, dst_extent_x, dst_extent_y,
dst.pitch, config.dst_x0, config.dst_y0, dst.pitch, config.dst_x0, config.dst_y0,
static_cast<size_t>(dst_bytes_per_pixel)); static_cast<size_t>(dst_bytes_per_pixel));
} }
memory_manager.WriteBlock(dst.Address(), impl->tmp_buffer.data(), dst_size);
return true; return true;
} }

View file

@ -10,13 +10,13 @@
#include "core/device_memory.h" #include "core/device_memory.h"
#include "core/hle/kernel/k_page_table.h" #include "core/hle/kernel/k_page_table.h"
#include "core/hle/kernel/k_process.h" #include "core/hle/kernel/k_process.h"
#include "core/memory.h"
#include "video_core/invalidation_accumulator.h" #include "video_core/invalidation_accumulator.h"
#include "video_core/memory_manager.h" #include "video_core/memory_manager.h"
#include "video_core/rasterizer_interface.h" #include "video_core/rasterizer_interface.h"
#include "video_core/renderer_base.h" #include "video_core/renderer_base.h"
namespace Tegra { namespace Tegra {
using Core::Memory::GuestMemoryFlags;
std::atomic<size_t> MemoryManager::unique_identifier_generator{}; std::atomic<size_t> MemoryManager::unique_identifier_generator{};
@ -587,10 +587,13 @@ void MemoryManager::InvalidateRegion(GPUVAddr gpu_addr, size_t size,
void MemoryManager::CopyBlock(GPUVAddr gpu_dest_addr, GPUVAddr gpu_src_addr, std::size_t size, void MemoryManager::CopyBlock(GPUVAddr gpu_dest_addr, GPUVAddr gpu_src_addr, std::size_t size,
VideoCommon::CacheType which) { VideoCommon::CacheType which) {
Core::Memory::GpuGuestMemoryScoped<u8, GuestMemoryFlags::SafeReadWrite> data( tmp_buffer.resize_destructive(size);
*this, gpu_src_addr, size); ReadBlock(gpu_src_addr, tmp_buffer.data(), size, which);
data.SetAddressAndSize(gpu_dest_addr, size);
// The output block must be flushed in case it has data modified from the GPU.
// Fixes NPC geometry in Zombie Panic in Wonderland DX
FlushRegion(gpu_dest_addr, size, which); FlushRegion(gpu_dest_addr, size, which);
WriteBlock(gpu_dest_addr, tmp_buffer.data(), size, which);
} }
bool MemoryManager::IsGranularRange(GPUVAddr gpu_addr, std::size_t size) const { bool MemoryManager::IsGranularRange(GPUVAddr gpu_addr, std::size_t size) const {
@ -755,23 +758,4 @@ void MemoryManager::FlushCaching() {
accumulator->Clear(); accumulator->Clear();
} }
const u8* MemoryManager::GetSpan(const GPUVAddr src_addr, const std::size_t size) const {
auto cpu_addr = GpuToCpuAddress(src_addr);
if (cpu_addr) {
return memory.GetSpan(*cpu_addr, size);
}
return nullptr;
}
u8* MemoryManager::GetSpan(const GPUVAddr src_addr, const std::size_t size) {
if (!IsContinuousRange(src_addr, size)) {
return nullptr;
}
auto cpu_addr = GpuToCpuAddress(src_addr);
if (cpu_addr) {
return memory.GetSpan(*cpu_addr, size);
}
return nullptr;
}
} // namespace Tegra } // namespace Tegra

View file

@ -15,7 +15,6 @@
#include "common/range_map.h" #include "common/range_map.h"
#include "common/scratch_buffer.h" #include "common/scratch_buffer.h"
#include "common/virtual_buffer.h" #include "common/virtual_buffer.h"
#include "core/memory.h"
#include "video_core/cache_types.h" #include "video_core/cache_types.h"
#include "video_core/pte_kind.h" #include "video_core/pte_kind.h"
@ -63,20 +62,6 @@ public:
[[nodiscard]] u8* GetPointer(GPUVAddr addr); [[nodiscard]] u8* GetPointer(GPUVAddr addr);
[[nodiscard]] const u8* GetPointer(GPUVAddr addr) const; [[nodiscard]] const u8* GetPointer(GPUVAddr addr) const;
template <typename T>
[[nodiscard]] T* GetPointer(GPUVAddr addr) {
const auto address{GpuToCpuAddress(addr)};
if (!address) {
return {};
}
return memory.GetPointer(*address);
}
template <typename T>
[[nodiscard]] const T* GetPointer(GPUVAddr addr) const {
return GetPointer<T*>(addr);
}
/** /**
* ReadBlock and WriteBlock are full read and write operations over virtual * ReadBlock and WriteBlock are full read and write operations over virtual
* GPU Memory. It's important to use these when GPU memory may not be continuous * GPU Memory. It's important to use these when GPU memory may not be continuous
@ -154,9 +139,6 @@ public:
void FlushCaching(); void FlushCaching();
const u8* GetSpan(const GPUVAddr src_addr, const std::size_t size) const;
u8* GetSpan(const GPUVAddr src_addr, const std::size_t size);
private: private:
template <bool is_big_pages, typename FuncMapped, typename FuncReserved, typename FuncUnmapped> template <bool is_big_pages, typename FuncMapped, typename FuncReserved, typename FuncUnmapped>
inline void MemoryOperation(GPUVAddr gpu_src_addr, std::size_t size, FuncMapped&& func_mapped, inline void MemoryOperation(GPUVAddr gpu_src_addr, std::size_t size, FuncMapped&& func_mapped,

View file

@ -8,7 +8,6 @@
#include "common/alignment.h" #include "common/alignment.h"
#include "common/settings.h" #include "common/settings.h"
#include "core/memory.h"
#include "video_core/control/channel_state.h" #include "video_core/control/channel_state.h"
#include "video_core/dirty_flags.h" #include "video_core/dirty_flags.h"
#include "video_core/engines/kepler_compute.h" #include "video_core/engines/kepler_compute.h"
@ -1027,19 +1026,19 @@ void TextureCache<P>::UploadImageContents(Image& image, StagingBuffer& staging)
runtime.AccelerateImageUpload(image, staging, uploads); runtime.AccelerateImageUpload(image, staging, uploads);
return; return;
} }
const size_t guest_size_bytes = image.guest_size_bytes;
Core::Memory::GpuGuestMemory<u8, Core::Memory::GuestMemoryFlags::UnsafeRead> swizzle_data( swizzle_data_buffer.resize_destructive(guest_size_bytes);
*gpu_memory, gpu_addr, image.guest_size_bytes, &swizzle_data_buffer); gpu_memory->ReadBlockUnsafe(gpu_addr, swizzle_data_buffer.data(), guest_size_bytes);
if (True(image.flags & ImageFlagBits::Converted)) { if (True(image.flags & ImageFlagBits::Converted)) {
unswizzle_data_buffer.resize_destructive(image.unswizzled_size_bytes); unswizzle_data_buffer.resize_destructive(image.unswizzled_size_bytes);
auto copies = auto copies = UnswizzleImage(*gpu_memory, gpu_addr, image.info, swizzle_data_buffer,
UnswizzleImage(*gpu_memory, gpu_addr, image.info, swizzle_data, unswizzle_data_buffer); unswizzle_data_buffer);
ConvertImage(unswizzle_data_buffer, image.info, mapped_span, copies); ConvertImage(unswizzle_data_buffer, image.info, mapped_span, copies);
image.UploadMemory(staging, copies); image.UploadMemory(staging, copies);
} else { } else {
const auto copies = const auto copies =
UnswizzleImage(*gpu_memory, gpu_addr, image.info, swizzle_data, mapped_span); UnswizzleImage(*gpu_memory, gpu_addr, image.info, swizzle_data_buffer, mapped_span);
image.UploadMemory(staging, copies); image.UploadMemory(staging, copies);
} }
} }
@ -1232,12 +1231,11 @@ void TextureCache<P>::QueueAsyncDecode(Image& image, ImageId image_id) {
decode->image_id = image_id; decode->image_id = image_id;
async_decodes.push_back(std::move(decode)); async_decodes.push_back(std::move(decode));
static Common::ScratchBuffer<u8> local_unswizzle_data_buffer; Common::ScratchBuffer<u8> local_unswizzle_data_buffer(image.unswizzled_size_bytes);
local_unswizzle_data_buffer.resize_destructive(image.unswizzled_size_bytes); const size_t guest_size_bytes = image.guest_size_bytes;
Core::Memory::GpuGuestMemory<u8, Core::Memory::GuestMemoryFlags::UnsafeRead> swizzle_data( swizzle_data_buffer.resize_destructive(guest_size_bytes);
*gpu_memory, image.gpu_addr, image.guest_size_bytes, &swizzle_data_buffer); gpu_memory->ReadBlockUnsafe(image.gpu_addr, swizzle_data_buffer.data(), guest_size_bytes);
auto copies = UnswizzleImage(*gpu_memory, image.gpu_addr, image.info, swizzle_data_buffer,
auto copies = UnswizzleImage(*gpu_memory, image.gpu_addr, image.info, swizzle_data,
local_unswizzle_data_buffer); local_unswizzle_data_buffer);
const size_t out_size = MapSizeBytes(image); const size_t out_size = MapSizeBytes(image);

View file

@ -20,7 +20,6 @@
#include "common/div_ceil.h" #include "common/div_ceil.h"
#include "common/scratch_buffer.h" #include "common/scratch_buffer.h"
#include "common/settings.h" #include "common/settings.h"
#include "core/memory.h"
#include "video_core/compatible_formats.h" #include "video_core/compatible_formats.h"
#include "video_core/engines/maxwell_3d.h" #include "video_core/engines/maxwell_3d.h"
#include "video_core/memory_manager.h" #include "video_core/memory_manager.h"
@ -545,15 +544,17 @@ void SwizzleBlockLinearImage(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr
tile_size.height, info.tile_width_spacing); tile_size.height, info.tile_width_spacing);
const size_t subresource_size = sizes[level]; const size_t subresource_size = sizes[level];
tmp_buffer.resize_destructive(subresource_size);
const std::span<u8> dst(tmp_buffer);
for (s32 layer = 0; layer < info.resources.layers; ++layer) { for (s32 layer = 0; layer < info.resources.layers; ++layer) {
const std::span<const u8> src = input.subspan(host_offset); const std::span<const u8> src = input.subspan(host_offset);
{ gpu_memory.ReadBlockUnsafe(gpu_addr + guest_offset, dst.data(), dst.size_bytes());
Core::Memory::GpuGuestMemoryScoped<u8, Core::Memory::GuestMemoryFlags::UnsafeReadWrite>
dst(gpu_memory, gpu_addr + guest_offset, subresource_size, &tmp_buffer);
SwizzleTexture(dst, src, bytes_per_block, num_tiles.width, num_tiles.height, SwizzleTexture(dst, src, bytes_per_block, num_tiles.width, num_tiles.height,
num_tiles.depth, block.height, block.depth); num_tiles.depth, block.height, block.depth);
}
gpu_memory.WriteBlockUnsafe(gpu_addr + guest_offset, dst.data(), dst.size_bytes());
host_offset += host_bytes_per_layer; host_offset += host_bytes_per_layer;
guest_offset += layer_stride; guest_offset += layer_stride;
@ -836,7 +837,6 @@ boost::container::small_vector<BufferImageCopy, 16> UnswizzleImage(Tegra::Memory
const Extent3D size = info.size; const Extent3D size = info.size;
if (info.type == ImageType::Linear) { if (info.type == ImageType::Linear) {
ASSERT(output.size_bytes() >= guest_size_bytes);
gpu_memory.ReadBlockUnsafe(gpu_addr, output.data(), guest_size_bytes); gpu_memory.ReadBlockUnsafe(gpu_addr, output.data(), guest_size_bytes);
ASSERT((info.pitch >> bpp_log2) << bpp_log2 == info.pitch); ASSERT((info.pitch >> bpp_log2) << bpp_log2 == info.pitch);
@ -904,6 +904,16 @@ boost::container::small_vector<BufferImageCopy, 16> UnswizzleImage(Tegra::Memory
return copies; return copies;
} }
BufferCopy UploadBufferCopy(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr,
const ImageBase& image, std::span<u8> output) {
gpu_memory.ReadBlockUnsafe(gpu_addr, output.data(), image.guest_size_bytes);
return BufferCopy{
.src_offset = 0,
.dst_offset = 0,
.size = image.guest_size_bytes,
};
}
void ConvertImage(std::span<const u8> input, const ImageInfo& info, std::span<u8> output, void ConvertImage(std::span<const u8> input, const ImageInfo& info, std::span<u8> output,
std::span<BufferImageCopy> copies) { std::span<BufferImageCopy> copies) {
u32 output_offset = 0; u32 output_offset = 0;

View file

@ -66,6 +66,9 @@ struct OverlapResult {
Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr, const ImageInfo& info, Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr, const ImageInfo& info,
std::span<const u8> input, std::span<u8> output); std::span<const u8> input, std::span<u8> output);
[[nodiscard]] BufferCopy UploadBufferCopy(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr,
const ImageBase& image, std::span<u8> output);
void ConvertImage(std::span<const u8> input, const ImageInfo& info, std::span<u8> output, void ConvertImage(std::span<const u8> input, const ImageInfo& info, std::span<u8> output,
std::span<BufferImageCopy> copies); std::span<BufferImageCopy> copies);