early-access version 1759

This commit is contained in:
pineappleEA 2021-06-07 03:57:28 +02:00
parent 9e2c8d1015
commit 9665cf4742
28 changed files with 873 additions and 58 deletions

View file

@ -1,7 +1,7 @@
yuzu emulator early access yuzu emulator early access
============= =============
This is the source code for early-access 1757. This is the source code for early-access 1759.
## Legal Notice ## Legal Notice

View file

@ -881,11 +881,11 @@ Xbyak::RegExp EmitVAddrLookup(BlockOfCode& code, A64EmitContext& ctx, size_t bit
} }
Xbyak::RegExp EmitFastmemVAddr(BlockOfCode& code, A64EmitContext& ctx, Xbyak::Label& abort, Xbyak::Reg64 vaddr, bool& require_abort_handling) { Xbyak::RegExp EmitFastmemVAddr(BlockOfCode& code, A64EmitContext& ctx, Xbyak::Label& abort, Xbyak::Reg64 vaddr, bool& require_abort_handling) {
const size_t unused_top_bits = 64 - ctx.conf.page_table_address_space_bits; const size_t unused_top_bits = 64 - ctx.conf.fastmem_address_space_bits;
if (unused_top_bits == 0) { if (unused_top_bits == 0) {
return r13 + vaddr; return r13 + vaddr;
} else if (ctx.conf.silently_mirror_page_table) { } else if (ctx.conf.silently_mirror_fastmem) {
Xbyak::Reg64 tmp = ctx.reg_alloc.ScratchGpr(); Xbyak::Reg64 tmp = ctx.reg_alloc.ScratchGpr();
if (unused_top_bits < 32) { if (unused_top_bits < 32) {
code.mov(tmp, vaddr); code.mov(tmp, vaddr);
@ -895,19 +895,19 @@ Xbyak::RegExp EmitFastmemVAddr(BlockOfCode& code, A64EmitContext& ctx, Xbyak::La
code.mov(tmp.cvt32(), vaddr.cvt32()); code.mov(tmp.cvt32(), vaddr.cvt32());
} else { } else {
code.mov(tmp.cvt32(), vaddr.cvt32()); code.mov(tmp.cvt32(), vaddr.cvt32());
code.and_(tmp, u32((1 << ctx.conf.page_table_address_space_bits) - 1)); code.and_(tmp, u32((1 << ctx.conf.fastmem_address_space_bits) - 1));
} }
return r13 + tmp; return r13 + tmp;
} else { } else {
if (ctx.conf.page_table_address_space_bits < 32) { if (ctx.conf.fastmem_address_space_bits < 32) {
code.test(vaddr, u32(-(1 << ctx.conf.page_table_address_space_bits))); code.test(vaddr, u32(-(1 << ctx.conf.fastmem_address_space_bits)));
code.jnz(abort, code.T_NEAR); code.jnz(abort, code.T_NEAR);
require_abort_handling = true; require_abort_handling = true;
} else { } else {
// TODO: Consider having TEST as above but coalesce 64-bit constant in register allocator // TODO: Consider having TEST as above but coalesce 64-bit constant in register allocator
Xbyak::Reg64 tmp = ctx.reg_alloc.ScratchGpr(); Xbyak::Reg64 tmp = ctx.reg_alloc.ScratchGpr();
code.mov(tmp, vaddr); code.mov(tmp, vaddr);
code.shr(tmp, int(ctx.conf.page_table_address_space_bits)); code.shr(tmp, int(ctx.conf.fastmem_address_space_bits));
code.jnz(abort, code.T_NEAR); code.jnz(abort, code.T_NEAR);
require_abort_handling = true; require_abort_handling = true;
} }

View file

@ -202,7 +202,7 @@ struct UserConfig {
void** page_table = nullptr; void** page_table = nullptr;
/// Declares how many valid address bits are there in virtual addresses. /// Declares how many valid address bits are there in virtual addresses.
/// Determines the size of page_table. Valid values are between 12 and 64 inclusive. /// Determines the size of page_table. Valid values are between 12 and 64 inclusive.
/// This is only used if page_table or fastmem_pointer is not nullptr. /// This is only used if page_table is not nullptr.
size_t page_table_address_space_bits = 36; size_t page_table_address_space_bits = 36;
/// Masks out the first N bits in host pointers from the page table. /// Masks out the first N bits in host pointers from the page table.
/// The intention behind this is to allow users of Dynarmic to pack attributes in the /// The intention behind this is to allow users of Dynarmic to pack attributes in the
@ -213,7 +213,7 @@ struct UserConfig {
/// page table. If true, Dynarmic will silently mirror page_table's address space. If /// page table. If true, Dynarmic will silently mirror page_table's address space. If
/// false, accessing memory outside of page_table bounds will result in a call to the /// false, accessing memory outside of page_table bounds will result in a call to the
/// relevant memory callback. /// relevant memory callback.
/// This is only used if page_table or fastmem_pointer is not nullptr. /// This is only used if page_table is not nullptr.
bool silently_mirror_page_table = true; bool silently_mirror_page_table = true;
/// Determines if the pointer in the page_table shall be offseted locally or globally. /// Determines if the pointer in the page_table shall be offseted locally or globally.
/// 'false' will access page_table[addr >> bits][addr & mask] /// 'false' will access page_table[addr >> bits][addr & mask]
@ -243,6 +243,16 @@ struct UserConfig {
/// Recompiled code will use the page_table if this is available, otherwise memory /// Recompiled code will use the page_table if this is available, otherwise memory
/// accesses will hit the memory callbacks. /// accesses will hit the memory callbacks.
bool recompile_on_fastmem_failure = true; bool recompile_on_fastmem_failure = true;
/// Declares how many valid address bits are there in virtual addresses.
/// Determines the size of fastmem arena. Valid values are between 12 and 64 inclusive.
/// This is only used if fastmem_pointer is not nullptr.
size_t fastmem_address_space_bits = 36;
/// Determines what happens if the guest accesses an entry that is off the end of the
/// fastmem arena. If true, Dynarmic will silently mirror fastmem's address space. If
/// false, accessing memory outside of fastmem bounds will result in a call to the
/// relevant memory callback.
/// This is only used if fastmem_pointer is not nullptr.
bool silently_mirror_fastmem = true;
/// This option relates to translation. Generally when we run into an unpredictable /// This option relates to translation. Generally when we run into an unpredictable
/// instruction the ExceptionRaised callback is called. If this is true, we define /// instruction the ExceptionRaised callback is called. If this is true, we define

View file

@ -852,9 +852,9 @@ TEST_CASE("A64: Memory access (fastmem)", "[a64]") {
A64FastmemTestEnv env{backing_memory}; A64FastmemTestEnv env{backing_memory};
Dynarmic::A64::UserConfig config{&env}; Dynarmic::A64::UserConfig config{&env};
config.fastmem_pointer = backing_memory; config.fastmem_pointer = backing_memory;
config.page_table_address_space_bits = address_width; config.fastmem_address_space_bits = address_width;
config.recompile_on_fastmem_failure = false; config.recompile_on_fastmem_failure = false;
config.silently_mirror_page_table = true; config.silently_mirror_fastmem = true;
config.processor_id = 0; config.processor_id = 0;
Dynarmic::A64::Jit jit{config}; Dynarmic::A64::Jit jit{config};

View file

@ -130,6 +130,8 @@ add_library(common STATIC
hash.h hash.h
hex_util.cpp hex_util.cpp
hex_util.h hex_util.h
host_memory.cpp
host_memory.h
intrusive_red_black_tree.h intrusive_red_black_tree.h
logging/backend.cpp logging/backend.cpp
logging/backend.h logging/backend.h

438
src/common/host_memory.cpp Executable file
View file

@ -0,0 +1,438 @@
#ifdef __linux__
#ifndef _GNU_SOURCE
#define _GNU_SOURCE
#endif
#include <fcntl.h>
#include <sys/mman.h>
#include <unistd.h>
#elif defined(_WIN32) // ^^^ Linux ^^^ vvv Windows vvv
#ifdef _WIN32_WINNT
#undef _WIN32_WINNT
#endif
#define _WIN32_WINNT 0x0A00 // Windows 10
#include <Windows.h>
#include <boost/icl/separate_interval_set.hpp>
#include <iterator>
#include <unordered_map>
#pragma comment(lib, "mincore.lib")
#endif // ^^^ Windows ^^^
#include <mutex>
#include "common/alignment.h"
#include "common/assert.h"
#include "common/host_memory.h"
#include "common/logging/log.h"
namespace Common {
static constexpr size_t PageAlignment = 0x1000;
static constexpr size_t HugePageSize = 0x200000;
#ifdef _WIN32
class HostMemory::Impl {
public:
explicit Impl(size_t backing_size_, size_t virtual_size_)
: backing_size{backing_size_}, virtual_size{virtual_size_}, process{GetCurrentProcess()} {
// Allocate backing file map
backing_handle =
CreateFileMapping2(INVALID_HANDLE_VALUE, nullptr, FILE_MAP_WRITE | FILE_MAP_READ,
PAGE_READWRITE, SEC_COMMIT, backing_size, nullptr, nullptr, 0);
if (!backing_handle) {
throw std::bad_alloc{};
}
// Allocate a virtual memory for the backing file map as placeholder
backing_base = static_cast<u8*>(VirtualAlloc2(process, nullptr, backing_size,
MEM_RESERVE | MEM_RESERVE_PLACEHOLDER,
PAGE_NOACCESS, nullptr, 0));
if (!backing_base) {
Release();
throw std::bad_alloc{};
}
// Map backing placeholder
void* const ret = MapViewOfFile3(backing_handle, process, backing_base, 0, backing_size,
MEM_REPLACE_PLACEHOLDER, PAGE_READWRITE, nullptr, 0);
if (ret != backing_base) {
Release();
throw std::bad_alloc{};
}
// Allocate virtual address placeholder
virtual_base = static_cast<u8*>(VirtualAlloc2(process, nullptr, virtual_size,
MEM_RESERVE | MEM_RESERVE_PLACEHOLDER,
PAGE_NOACCESS, nullptr, 0));
if (!virtual_base) {
Release();
throw std::bad_alloc{};
}
}
~Impl() {
Release();
}
void Map(size_t virtual_offset, size_t host_offset, size_t length) {
std::unique_lock lock{placeholder_mutex};
if (!IsNiechePlaceholder(virtual_offset, length)) {
Split(virtual_offset, length);
}
ASSERT(placeholders.find({virtual_offset, virtual_offset + length}) == placeholders.end());
TrackPlaceholder(virtual_offset, host_offset, length);
MapView(virtual_offset, host_offset, length);
}
void Unmap(size_t virtual_offset, size_t length) {
std::lock_guard lock{placeholder_mutex};
// Unmap until there are no more placeholders
while (UnmapOnePlaceholder(virtual_offset, length)) {
}
}
void Protect(size_t virtual_offset, size_t length, bool read, bool write) {
DWORD new_flags{};
if (read && write) {
new_flags = PAGE_READWRITE;
} else if (read && !write) {
new_flags = PAGE_READONLY;
} else if (!read && !write) {
new_flags = PAGE_NOACCESS;
} else {
UNIMPLEMENTED_MSG("Protection flag combination read={} write={}", read, write);
}
const size_t virtual_end = virtual_offset + length;
std::lock_guard lock{placeholder_mutex};
auto [it, end] = placeholders.equal_range({virtual_offset, virtual_end});
while (it != end) {
const size_t offset = std::max(it->lower(), virtual_offset);
const size_t protect_length = std::min(it->upper(), virtual_end) - offset;
DWORD old_flags{};
if (!VirtualProtect(virtual_base + offset, protect_length, new_flags, &old_flags)) {
LOG_CRITICAL(HW_Memory, "Failed to change virtual memory protect rules");
}
++it;
}
}
const size_t backing_size; ///< Size of the backing memory in bytes
const size_t virtual_size; ///< Size of the virtual address placeholder in bytes
u8* backing_base{};
u8* virtual_base{};
private:
/// Release all resources in the object
void Release() {
if (!placeholders.empty()) {
for (const auto& placeholder : placeholders) {
if (!UnmapViewOfFile2(process, virtual_base + placeholder.lower(),
MEM_PRESERVE_PLACEHOLDER)) {
LOG_CRITICAL(HW_Memory, "Failed to unmap virtual memory placeholder");
}
}
Coalesce(0, virtual_size);
}
if (virtual_base) {
if (!VirtualFree(virtual_base, 0, MEM_RELEASE)) {
LOG_CRITICAL(HW_Memory, "Failed to free virtual memory");
}
}
if (backing_base) {
if (!UnmapViewOfFile2(process, backing_base, MEM_PRESERVE_PLACEHOLDER)) {
LOG_CRITICAL(HW_Memory, "Failed to unmap backing memory placeholder");
}
if (!VirtualFreeEx(process, backing_base, 0, MEM_RELEASE)) {
LOG_CRITICAL(HW_Memory, "Failed to free backing memory");
}
}
if (!CloseHandle(backing_handle)) {
LOG_CRITICAL(HW_Memory, "Failed to free backing memory file handle");
}
}
/// Unmap one placeholder in the given range (partial unmaps are supported)
/// Return true when there are no more placeholders to unmap
bool UnmapOnePlaceholder(size_t virtual_offset, size_t length) {
const auto it = placeholders.find({virtual_offset, virtual_offset + length});
const auto begin = placeholders.begin();
const auto end = placeholders.end();
if (it == end) {
return false;
}
const size_t placeholder_begin = it->lower();
const size_t placeholder_end = it->upper();
const size_t unmap_begin = std::max(virtual_offset, placeholder_begin);
const size_t unmap_end = std::min(virtual_offset + length, placeholder_end);
ASSERT(unmap_begin >= placeholder_begin && unmap_begin < placeholder_end);
ASSERT(unmap_end <= placeholder_end && unmap_end > placeholder_begin);
const auto host_pointer_it = placeholder_host_pointers.find(placeholder_begin);
ASSERT(host_pointer_it != placeholder_host_pointers.end());
const size_t host_offset = host_pointer_it->second;
const bool split_left = unmap_begin > placeholder_begin;
const bool split_right = unmap_end < placeholder_end;
if (!UnmapViewOfFile2(process, virtual_base + placeholder_begin,
MEM_PRESERVE_PLACEHOLDER)) {
LOG_CRITICAL(HW_Memory, "Failed to unmap placeholder");
}
// If we have to remap memory regions due to partial unmaps, we are in a data race as
// Windows doesn't support remapping memory without unmapping first. Avoid adding any extra
// logic within the panic region described below.
// Panic region, we are in a data race right now
if (split_left || split_right) {
Split(unmap_begin, unmap_end - unmap_begin);
}
if (split_left) {
MapView(placeholder_begin, host_offset, unmap_begin - placeholder_begin);
}
if (split_right) {
MapView(unmap_end, host_offset + unmap_end - placeholder_begin,
placeholder_end - unmap_end);
}
// End panic region
size_t coalesce_begin = unmap_begin;
if (!split_left) {
// Try to coalesce pages to the left
coalesce_begin = it == begin ? 0 : std::prev(it)->upper();
if (coalesce_begin != placeholder_begin) {
Coalesce(coalesce_begin, unmap_end - coalesce_begin);
}
}
if (!split_right) {
// Try to coalesce pages to the right
const auto next = std::next(it);
const size_t next_begin = next == end ? virtual_size : next->lower();
if (placeholder_end != next_begin) {
// We can coalesce to the right
Coalesce(coalesce_begin, next_begin - coalesce_begin);
}
}
// Remove and reinsert placeholder trackers
UntrackPlaceholder(it);
if (split_left) {
TrackPlaceholder(placeholder_begin, host_offset, unmap_begin - placeholder_begin);
}
if (split_right) {
TrackPlaceholder(unmap_end, host_offset + unmap_end - placeholder_begin,
placeholder_end - unmap_end);
}
return true;
}
void MapView(size_t virtual_offset, size_t host_offset, size_t length) {
if (!MapViewOfFile3(backing_handle, process, virtual_base + virtual_offset, host_offset,
length, MEM_REPLACE_PLACEHOLDER, PAGE_READWRITE, nullptr, 0)) {
LOG_CRITICAL(HW_Memory, "Failed to map placeholder");
}
}
void Split(size_t virtual_offset, size_t length) {
if (!VirtualFreeEx(process, reinterpret_cast<LPVOID>(virtual_base + virtual_offset), length,
MEM_RELEASE | MEM_PRESERVE_PLACEHOLDER)) {
LOG_CRITICAL(HW_Memory, "Failed to split placeholder");
}
}
void Coalesce(size_t virtual_offset, size_t length) {
if (!VirtualFreeEx(process, reinterpret_cast<LPVOID>(virtual_base + virtual_offset), length,
MEM_RELEASE | MEM_COALESCE_PLACEHOLDERS)) {
LOG_CRITICAL(HW_Memory, "Failed to coalesce placeholders");
}
}
void TrackPlaceholder(size_t virtual_offset, size_t host_offset, size_t length) {
placeholders.insert({virtual_offset, virtual_offset + length});
placeholder_host_pointers.emplace(virtual_offset, host_offset);
}
void UntrackPlaceholder(boost::icl::separate_interval_set<size_t>::iterator it) {
placeholders.erase(it);
placeholder_host_pointers.erase(it->lower());
}
/// Return true when a given memory region is a "nieche" and the placeholders don't have to be
/// splitted.
bool IsNiechePlaceholder(size_t virtual_offset, size_t length) const {
const auto it = placeholders.upper_bound({virtual_offset, virtual_offset + length});
if (it != placeholders.end() && it->lower() == virtual_offset + length) {
const bool is_root = it == placeholders.begin() && virtual_offset == 0;
return is_root || std::prev(it)->upper() == virtual_offset;
}
return false;
}
HANDLE process{}; ///< Current process handle
HANDLE backing_handle{}; ///< File based backing memory
std::mutex placeholder_mutex; ///< Mutex for placeholders
boost::icl::separate_interval_set<size_t> placeholders; ///< Mapped placeholders
std::unordered_map<size_t, size_t> placeholder_host_pointers; ///< Placeholder backing offset
};
#elif defined(__linux__)
class HostMemory::Impl {
public:
explicit Impl(size_t backing_size_, size_t virtual_size_)
: backing_size{backing_size_}, virtual_size{virtual_size_} {
// Backing memory initialization
fd = memfd_create("HostMemory", 0);
if (fd == -1) {
LOG_CRITICAL(HW_Memory, "memfd_create failed: {}", strerror(errno));
Release();
throw std::bad_alloc{};
}
// Defined to extend the file with zeros
int ret = ftruncate(fd, backing_size);
if (ret != 0) {
LOG_CRITICAL(HW_Memory, "ftruncate failed with {}, are you out-of-memory?",
strerror(errno));
Release();
throw std::bad_alloc{};
}
backing_base = static_cast<u8*>(
mmap(nullptr, backing_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0));
if (backing_base == MAP_FAILED) {
LOG_CRITICAL(HW_Memory, "mmap failed: {}", strerror(errno));
Release();
throw std::bad_alloc{};
}
// Virtual memory initialization
virtual_base = static_cast<u8*>(
mmap(nullptr, virtual_size, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0));
if (virtual_base == MAP_FAILED) {
LOG_CRITICAL(HW_Memory, "mmap failed: {}", strerror(errno));
Release();
throw std::bad_alloc{};
}
}
~Impl() {
Release();
}
void Map(size_t virtual_offset, size_t host_offset, size_t length) {
void* ret = mmap(virtual_base + virtual_offset, length, PROT_READ | PROT_WRITE,
MAP_SHARED | MAP_FIXED, fd, host_offset);
ASSERT_MSG(ret != MAP_FAILED, "mmap failed: {}", strerror(errno));
}
void Unmap(size_t virtual_offset, size_t length) {
// The method name is wrong. We're still talking about the virtual range.
// We don't want to unmap, we want to reserve this memory.
void* ret = mmap(virtual_base + virtual_offset, length, PROT_NONE,
MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1, 0);
ASSERT_MSG(ret != MAP_FAILED, "mmap failed: {}", strerror(errno));
}
void Protect(size_t virtual_offset, size_t length, bool read, bool write) {
int flags = 0;
if (read)
flags |= PROT_READ;
if (write)
flags |= PROT_WRITE;
int ret = mprotect(virtual_base + virtual_offset, length, flags);
ASSERT_MSG(ret == 0, "mprotect failed: {}", strerror(errno));
}
const size_t backing_size; ///< Size of the backing memory in bytes
const size_t virtual_size; ///< Size of the virtual address placeholder in bytes
u8* backing_base{reinterpret_cast<u8*>(MAP_FAILED)};
u8* virtual_base{reinterpret_cast<u8*>(MAP_FAILED)};
private:
/// Release all resources in the object
void Release() {
if (virtual_base != MAP_FAILED) {
int ret = munmap(virtual_base, virtual_size);
ASSERT_MSG(ret == 0, "munmap failed: {}", strerror(errno));
}
if (backing_base != MAP_FAILED) {
int ret = munmap(backing_base, backing_size);
ASSERT_MSG(ret == 0, "munmap failed: {}", strerror(errno));
}
if (fd != -1) {
int ret = close(fd);
ASSERT_MSG(ret == 0, "close failed: {}", strerror(errno));
}
}
int fd{-1}; // memfd file descriptor, -1 is the error value of memfd_create
};
#else
#error Please implement the host memory for your platform
#endif
HostMemory::HostMemory(size_t backing_size_, size_t virtual_size_)
: backing_size(backing_size_),
virtual_size(virtual_size_), impl{std::make_unique<HostMemory::Impl>(
AlignUp(backing_size, PageAlignment),
AlignUp(virtual_size, PageAlignment) + 3 * HugePageSize)},
backing_base{impl->backing_base}, virtual_base{impl->virtual_base} {
virtual_base += 2 * HugePageSize - 1;
virtual_base -= reinterpret_cast<size_t>(virtual_base) & (HugePageSize - 1);
virtual_base_offset = virtual_base - impl->virtual_base;
}
HostMemory::~HostMemory() = default;
HostMemory::HostMemory(HostMemory&&) noexcept = default;
HostMemory& HostMemory::operator=(HostMemory&&) noexcept = default;
void HostMemory::Map(size_t virtual_offset, size_t host_offset, size_t length) {
ASSERT(virtual_offset % PageAlignment == 0);
ASSERT(host_offset % PageAlignment == 0);
ASSERT(length % PageAlignment == 0);
ASSERT(virtual_offset + length <= virtual_size);
ASSERT(host_offset + length <= backing_size);
if (length == 0) {
return;
}
impl->Map(virtual_offset + virtual_base_offset, host_offset, length);
}
void HostMemory::Unmap(size_t virtual_offset, size_t length) {
ASSERT(virtual_offset % PageAlignment == 0);
ASSERT(length % PageAlignment == 0);
ASSERT(virtual_offset + length <= virtual_size);
if (length == 0) {
return;
}
impl->Unmap(virtual_offset + virtual_base_offset, length);
}
void HostMemory::Protect(size_t virtual_offset, size_t length, bool read, bool write) {
ASSERT(virtual_offset % PageAlignment == 0);
ASSERT(length % PageAlignment == 0);
ASSERT(virtual_offset + length <= virtual_size);
if (length == 0) {
return;
}
impl->Protect(virtual_offset + virtual_base_offset, length, read, write);
}
} // namespace Common

66
src/common/host_memory.h Executable file
View file

@ -0,0 +1,66 @@
// Copyright 2019 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <memory>
#include "common/common_types.h"
namespace Common {
/**
* A low level linear memory buffer, which supports multiple mappings
* Its purpose is to rebuild a given sparse memory layout, including mirrors.
*/
class HostMemory {
public:
explicit HostMemory(size_t backing_size_, size_t virtual_size_);
~HostMemory();
/**
* Copy constructors. They shall return a copy of the buffer without the mappings.
* TODO: Implement them with COW if needed.
*/
HostMemory(HostMemory& other) = delete;
HostMemory& operator=(HostMemory& other) = delete;
/**
* Move constructors. They will move the buffer and the mappings to the new object.
*/
HostMemory(HostMemory&& other) noexcept;
HostMemory& operator=(HostMemory&& other) noexcept;
void Map(size_t virtual_offset, size_t host_offset, size_t length);
void Unmap(size_t virtual_offset, size_t length);
void Protect(size_t virtual_offset, size_t length, bool read, bool write);
[[nodiscard]] u8* BackingBasePointer() noexcept {
return backing_base;
}
[[nodiscard]] const u8* BackingBasePointer() const noexcept {
return backing_base;
}
[[nodiscard]] u8* VirtualBasePointer() noexcept {
return virtual_base;
}
[[nodiscard]] const u8* VirtualBasePointer() const noexcept {
return virtual_base;
}
private:
size_t backing_size{};
size_t virtual_size{};
// Low level handler for the platform dependent memory routines
class Impl;
std::unique_ptr<Impl> impl;
u8* backing_base{};
u8* virtual_base{};
size_t virtual_base_offset{};
};
} // namespace Common

View file

@ -111,6 +111,8 @@ struct PageTable {
VirtualBuffer<u64> backing_addr; VirtualBuffer<u64> backing_addr;
size_t current_address_space_width_in_bits; size_t current_address_space_width_in_bits;
u8* fastmem_arena;
}; };
} // namespace Common } // namespace Common

View file

@ -90,6 +90,13 @@ bool IsGPULevelHigh() {
values.gpu_accuracy.GetValue() == GPUAccuracy::High; values.gpu_accuracy.GetValue() == GPUAccuracy::High;
} }
bool IsFastmemEnabled() {
if (values.cpu_accuracy.GetValue() == CPUAccuracy::DebugMode) {
return values.cpuopt_fastmem;
}
return true;
}
float Volume() { float Volume() {
if (values.audio_muted) { if (values.audio_muted) {
return 0.0f; return 0.0f;
@ -115,6 +122,7 @@ void RestoreGlobalState(bool is_powered_on) {
values.cpuopt_unsafe_unfuse_fma.SetGlobal(true); values.cpuopt_unsafe_unfuse_fma.SetGlobal(true);
values.cpuopt_unsafe_reduce_fp_error.SetGlobal(true); values.cpuopt_unsafe_reduce_fp_error.SetGlobal(true);
values.cpuopt_unsafe_inaccurate_nan.SetGlobal(true); values.cpuopt_unsafe_inaccurate_nan.SetGlobal(true);
values.cpuopt_unsafe_fastmem_check.SetGlobal(true);
// Renderer // Renderer
values.renderer_backend.SetGlobal(true); values.renderer_backend.SetGlobal(true);

View file

@ -125,10 +125,12 @@ struct Values {
bool cpuopt_const_prop; bool cpuopt_const_prop;
bool cpuopt_misc_ir; bool cpuopt_misc_ir;
bool cpuopt_reduce_misalign_checks; bool cpuopt_reduce_misalign_checks;
bool cpuopt_fastmem;
Setting<bool> cpuopt_unsafe_unfuse_fma; Setting<bool> cpuopt_unsafe_unfuse_fma;
Setting<bool> cpuopt_unsafe_reduce_fp_error; Setting<bool> cpuopt_unsafe_reduce_fp_error;
Setting<bool> cpuopt_unsafe_inaccurate_nan; Setting<bool> cpuopt_unsafe_inaccurate_nan;
Setting<bool> cpuopt_unsafe_fastmem_check;
// Renderer // Renderer
Setting<RendererBackend> renderer_backend; Setting<RendererBackend> renderer_backend;
@ -249,6 +251,8 @@ void SetConfiguringGlobal(bool is_global);
bool IsGPULevelExtreme(); bool IsGPULevelExtreme();
bool IsGPULevelHigh(); bool IsGPULevelHigh();
bool IsFastmemEnabled();
float Volume(); float Volume();
std::string GetTimeZoneString(); std::string GetTimeZoneString();

View file

@ -128,6 +128,7 @@ std::shared_ptr<Dynarmic::A32::Jit> ARM_Dynarmic_32::MakeJit(Common::PageTable*
if (page_table) { if (page_table) {
config.page_table = reinterpret_cast<std::array<std::uint8_t*, NUM_PAGE_TABLE_ENTRIES>*>( config.page_table = reinterpret_cast<std::array<std::uint8_t*, NUM_PAGE_TABLE_ENTRIES>*>(
page_table->pointers.data()); page_table->pointers.data());
config.fastmem_pointer = page_table->fastmem_arena;
} }
config.absolute_offset_page_table = true; config.absolute_offset_page_table = true;
config.page_table_pointer_mask_bits = Common::PageTable::ATTRIBUTE_BITS; config.page_table_pointer_mask_bits = Common::PageTable::ATTRIBUTE_BITS;
@ -143,7 +144,7 @@ std::shared_ptr<Dynarmic::A32::Jit> ARM_Dynarmic_32::MakeJit(Common::PageTable*
// Code cache size // Code cache size
config.code_cache_size = 512 * 1024 * 1024; config.code_cache_size = 512 * 1024 * 1024;
config.far_code_offset = 256 * 1024 * 1024; config.far_code_offset = 400 * 1024 * 1024;
// Safe optimizations // Safe optimizations
if (Settings::values.cpu_accuracy.GetValue() == Settings::CPUAccuracy::DebugMode) { if (Settings::values.cpu_accuracy.GetValue() == Settings::CPUAccuracy::DebugMode) {
@ -171,6 +172,9 @@ std::shared_ptr<Dynarmic::A32::Jit> ARM_Dynarmic_32::MakeJit(Common::PageTable*
if (!Settings::values.cpuopt_reduce_misalign_checks) { if (!Settings::values.cpuopt_reduce_misalign_checks) {
config.only_detect_misalignment_via_page_table_on_page_boundary = false; config.only_detect_misalignment_via_page_table_on_page_boundary = false;
} }
if (!Settings::values.cpuopt_fastmem) {
config.fastmem_pointer = nullptr;
}
} }
// Unsafe optimizations // Unsafe optimizations

View file

@ -160,6 +160,10 @@ std::shared_ptr<Dynarmic::A64::Jit> ARM_Dynarmic_64::MakeJit(Common::PageTable*
config.absolute_offset_page_table = true; config.absolute_offset_page_table = true;
config.detect_misaligned_access_via_page_table = 16 | 32 | 64 | 128; config.detect_misaligned_access_via_page_table = 16 | 32 | 64 | 128;
config.only_detect_misalignment_via_page_table_on_page_boundary = true; config.only_detect_misalignment_via_page_table_on_page_boundary = true;
config.fastmem_pointer = page_table->fastmem_arena;
config.fastmem_address_space_bits = address_space_bits;
config.silently_mirror_fastmem = false;
} }
// Multi-process state // Multi-process state
@ -181,7 +185,7 @@ std::shared_ptr<Dynarmic::A64::Jit> ARM_Dynarmic_64::MakeJit(Common::PageTable*
// Code cache size // Code cache size
config.code_cache_size = 512 * 1024 * 1024; config.code_cache_size = 512 * 1024 * 1024;
config.far_code_offset = 256 * 1024 * 1024; config.far_code_offset = 400 * 1024 * 1024;
// Safe optimizations // Safe optimizations
if (Settings::values.cpu_accuracy.GetValue() == Settings::CPUAccuracy::DebugMode) { if (Settings::values.cpu_accuracy.GetValue() == Settings::CPUAccuracy::DebugMode) {
@ -209,6 +213,9 @@ std::shared_ptr<Dynarmic::A64::Jit> ARM_Dynarmic_64::MakeJit(Common::PageTable*
if (!Settings::values.cpuopt_reduce_misalign_checks) { if (!Settings::values.cpuopt_reduce_misalign_checks) {
config.only_detect_misalignment_via_page_table_on_page_boundary = false; config.only_detect_misalignment_via_page_table_on_page_boundary = false;
} }
if (!Settings::values.cpuopt_fastmem) {
config.fastmem_pointer = nullptr;
}
} }
// Unsafe optimizations // Unsafe optimizations
@ -223,6 +230,9 @@ std::shared_ptr<Dynarmic::A64::Jit> ARM_Dynarmic_64::MakeJit(Common::PageTable*
if (Settings::values.cpuopt_unsafe_inaccurate_nan.GetValue()) { if (Settings::values.cpuopt_unsafe_inaccurate_nan.GetValue()) {
config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_InaccurateNaN; config.optimizations |= Dynarmic::OptimizationFlag::Unsafe_InaccurateNaN;
} }
if (Settings::values.cpuopt_unsafe_fastmem_check.GetValue()) {
config.fastmem_address_space_bits = 64;
}
} }
return std::make_shared<Dynarmic::A64::Jit>(config); return std::make_shared<Dynarmic::A64::Jit>(config);

View file

@ -6,7 +6,7 @@
namespace Core { namespace Core {
DeviceMemory::DeviceMemory() : buffer{DramMemoryMap::Size} {} DeviceMemory::DeviceMemory() : buffer{DramMemoryMap::Size, 1ULL << 39} {}
DeviceMemory::~DeviceMemory() = default; DeviceMemory::~DeviceMemory() = default;
} // namespace Core } // namespace Core

View file

@ -5,7 +5,7 @@
#pragma once #pragma once
#include "common/common_types.h" #include "common/common_types.h"
#include "common/virtual_buffer.h" #include "common/host_memory.h"
namespace Core { namespace Core {
@ -21,27 +21,30 @@ enum : u64 {
}; };
}; // namespace DramMemoryMap }; // namespace DramMemoryMap
class DeviceMemory : NonCopyable { class DeviceMemory {
public: public:
explicit DeviceMemory(); explicit DeviceMemory();
~DeviceMemory(); ~DeviceMemory();
DeviceMemory& operator=(const DeviceMemory&) = delete;
DeviceMemory(const DeviceMemory&) = delete;
template <typename T> template <typename T>
PAddr GetPhysicalAddr(const T* ptr) const { PAddr GetPhysicalAddr(const T* ptr) const {
return (reinterpret_cast<uintptr_t>(ptr) - reinterpret_cast<uintptr_t>(buffer.data())) + return (reinterpret_cast<uintptr_t>(ptr) -
reinterpret_cast<uintptr_t>(buffer.BackingBasePointer())) +
DramMemoryMap::Base; DramMemoryMap::Base;
} }
u8* GetPointer(PAddr addr) { u8* GetPointer(PAddr addr) {
return buffer.data() + (addr - DramMemoryMap::Base); return buffer.BackingBasePointer() + (addr - DramMemoryMap::Base);
} }
const u8* GetPointer(PAddr addr) const { const u8* GetPointer(PAddr addr) const {
return buffer.data() + (addr - DramMemoryMap::Base); return buffer.BackingBasePointer() + (addr - DramMemoryMap::Base);
} }
private: Common::HostMemory buffer;
Common::VirtualBuffer<u8> buffer;
}; };
} // namespace Core } // namespace Core

View file

@ -85,8 +85,8 @@ public:
*/ */
void ClientDisconnected(KServerSession* session); void ClientDisconnected(KServerSession* session);
std::weak_ptr<ServiceThread> GetServiceThread() const { std::shared_ptr<ServiceThread> GetServiceThread() const {
return service_thread; return service_thread.lock();
} }
protected: protected:
@ -152,7 +152,7 @@ public:
session_handler = std::move(handler); session_handler = std::move(handler);
} }
std::weak_ptr<ServiceThread> GetServiceThread() const { std::shared_ptr<ServiceThread> GetServiceThread() const {
return session_handler->GetServiceThread(); return session_handler->GetServiceThread();
} }

View file

@ -119,9 +119,11 @@ ResultCode KServerSession::QueueSyncRequest(KThread* thread, Core::Memory::Memor
context->PopulateFromIncomingCommandBuffer(kernel.CurrentProcess()->GetHandleTable(), cmd_buf); context->PopulateFromIncomingCommandBuffer(kernel.CurrentProcess()->GetHandleTable(), cmd_buf);
if (auto strong_ptr = manager->GetServiceThread().lock()) { if (auto strong_ptr = manager->GetServiceThread(); strong_ptr) {
strong_ptr->QueueSyncRequest(*parent, std::move(context)); strong_ptr->QueueSyncRequest(*parent, std::move(context));
return ResultSuccess; return ResultSuccess;
} else {
ASSERT(false, "strong_ptr was nullptr!");
} }
return ResultSuccess; return ResultSuccess;

View file

@ -12,6 +12,7 @@
#include "common/common_types.h" #include "common/common_types.h"
#include "common/logging/log.h" #include "common/logging/log.h"
#include "common/page_table.h" #include "common/page_table.h"
#include "common/settings.h"
#include "common/swap.h" #include "common/swap.h"
#include "core/arm/arm_interface.h" #include "core/arm/arm_interface.h"
#include "core/core.h" #include "core/core.h"
@ -32,6 +33,7 @@ struct Memory::Impl {
void SetCurrentPageTable(Kernel::KProcess& process, u32 core_id) { void SetCurrentPageTable(Kernel::KProcess& process, u32 core_id) {
current_page_table = &process.PageTable().PageTableImpl(); current_page_table = &process.PageTable().PageTableImpl();
current_page_table->fastmem_arena = system.DeviceMemory().buffer.VirtualBasePointer();
const std::size_t address_space_width = process.PageTable().GetAddressSpaceWidth(); const std::size_t address_space_width = process.PageTable().GetAddressSpaceWidth();
@ -41,13 +43,23 @@ struct Memory::Impl {
void MapMemoryRegion(Common::PageTable& page_table, VAddr base, u64 size, PAddr target) { void MapMemoryRegion(Common::PageTable& page_table, VAddr base, u64 size, PAddr target) {
ASSERT_MSG((size & PAGE_MASK) == 0, "non-page aligned size: {:016X}", size); ASSERT_MSG((size & PAGE_MASK) == 0, "non-page aligned size: {:016X}", size);
ASSERT_MSG((base & PAGE_MASK) == 0, "non-page aligned base: {:016X}", base); ASSERT_MSG((base & PAGE_MASK) == 0, "non-page aligned base: {:016X}", base);
ASSERT_MSG(target >= DramMemoryMap::Base && target < DramMemoryMap::End,
"Out of bounds target: {:016X}", target);
MapPages(page_table, base / PAGE_SIZE, size / PAGE_SIZE, target, Common::PageType::Memory); MapPages(page_table, base / PAGE_SIZE, size / PAGE_SIZE, target, Common::PageType::Memory);
if (Settings::IsFastmemEnabled()) {
system.DeviceMemory().buffer.Map(base, target - DramMemoryMap::Base, size);
}
} }
void UnmapRegion(Common::PageTable& page_table, VAddr base, u64 size) { void UnmapRegion(Common::PageTable& page_table, VAddr base, u64 size) {
ASSERT_MSG((size & PAGE_MASK) == 0, "non-page aligned size: {:016X}", size); ASSERT_MSG((size & PAGE_MASK) == 0, "non-page aligned size: {:016X}", size);
ASSERT_MSG((base & PAGE_MASK) == 0, "non-page aligned base: {:016X}", base); ASSERT_MSG((base & PAGE_MASK) == 0, "non-page aligned base: {:016X}", base);
MapPages(page_table, base / PAGE_SIZE, size / PAGE_SIZE, 0, Common::PageType::Unmapped); MapPages(page_table, base / PAGE_SIZE, size / PAGE_SIZE, 0, Common::PageType::Unmapped);
if (Settings::IsFastmemEnabled()) {
system.DeviceMemory().buffer.Unmap(base, size);
}
} }
bool IsValidVirtualAddress(const Kernel::KProcess& process, const VAddr vaddr) const { bool IsValidVirtualAddress(const Kernel::KProcess& process, const VAddr vaddr) const {
@ -466,6 +478,12 @@ struct Memory::Impl {
if (vaddr == 0) { if (vaddr == 0) {
return; return;
} }
if (Settings::IsFastmemEnabled()) {
const bool is_read_enable = Settings::IsGPULevelHigh() || !cached;
system.DeviceMemory().buffer.Protect(vaddr, size, is_read_enable, !cached);
}
// Iterate over a contiguous CPU address space, which corresponds to the specified GPU // Iterate over a contiguous CPU address space, which corresponds to the specified GPU
// address space, marking the region as un/cached. The region is marked un/cached at a // address space, marking the region as un/cached. The region is marked un/cached at a
// granularity of CPU pages, hence why we iterate on a CPU page basis (note: GPU page size // granularity of CPU pages, hence why we iterate on a CPU page basis (note: GPU page size

View file

@ -2,6 +2,7 @@ add_executable(tests
common/bit_field.cpp common/bit_field.cpp
common/cityhash.cpp common/cityhash.cpp
common/fibers.cpp common/fibers.cpp
common/host_memory.cpp
common/param_package.cpp common/param_package.cpp
common/ring_buffer.cpp common/ring_buffer.cpp
core/core_timing.cpp core/core_timing.cpp

183
src/tests/common/host_memory.cpp Executable file
View file

@ -0,0 +1,183 @@
// Copyright 2021 yuzu emulator team
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <catch2/catch.hpp>
#include "common/host_memory.h"
using Common::HostMemory;
static constexpr size_t VIRTUAL_SIZE = 1ULL << 39;
static constexpr size_t BACKING_SIZE = 4ULL * 1024 * 1024 * 1024;
TEST_CASE("HostMemory: Initialize and deinitialize", "[common]") {
{ HostMemory mem(BACKING_SIZE, VIRTUAL_SIZE); }
{ HostMemory mem(BACKING_SIZE, VIRTUAL_SIZE); }
}
TEST_CASE("HostMemory: Simple map", "[common]") {
HostMemory mem(BACKING_SIZE, VIRTUAL_SIZE);
mem.Map(0x5000, 0x8000, 0x1000);
volatile u8* const data = mem.VirtualBasePointer() + 0x5000;
data[0] = 50;
REQUIRE(data[0] == 50);
}
TEST_CASE("HostMemory: Simple mirror map", "[common]") {
HostMemory mem(BACKING_SIZE, VIRTUAL_SIZE);
mem.Map(0x5000, 0x3000, 0x2000);
mem.Map(0x8000, 0x4000, 0x1000);
volatile u8* const mirror_a = mem.VirtualBasePointer() + 0x5000;
volatile u8* const mirror_b = mem.VirtualBasePointer() + 0x8000;
mirror_b[0] = 76;
REQUIRE(mirror_a[0x1000] == 76);
}
TEST_CASE("HostMemory: Simple unmap", "[common]") {
HostMemory mem(BACKING_SIZE, VIRTUAL_SIZE);
mem.Map(0x5000, 0x3000, 0x2000);
volatile u8* const data = mem.VirtualBasePointer() + 0x5000;
data[75] = 50;
REQUIRE(data[75] == 50);
mem.Unmap(0x5000, 0x2000);
}
TEST_CASE("HostMemory: Simple unmap and remap", "[common]") {
HostMemory mem(BACKING_SIZE, VIRTUAL_SIZE);
mem.Map(0x5000, 0x3000, 0x2000);
volatile u8* const data = mem.VirtualBasePointer() + 0x5000;
data[0] = 50;
REQUIRE(data[0] == 50);
mem.Unmap(0x5000, 0x2000);
mem.Map(0x5000, 0x3000, 0x2000);
REQUIRE(data[0] == 50);
mem.Map(0x7000, 0x2000, 0x5000);
REQUIRE(data[0x3000] == 50);
}
TEST_CASE("HostMemory: Nieche allocation", "[common]") {
HostMemory mem(BACKING_SIZE, VIRTUAL_SIZE);
mem.Map(0x0000, 0, 0x20000);
mem.Unmap(0x0000, 0x4000);
mem.Map(0x1000, 0, 0x2000);
mem.Map(0x3000, 0, 0x1000);
mem.Map(0, 0, 0x1000);
}
TEST_CASE("HostMemory: Full unmap", "[common]") {
HostMemory mem(BACKING_SIZE, VIRTUAL_SIZE);
mem.Map(0x8000, 0, 0x4000);
mem.Unmap(0x8000, 0x4000);
mem.Map(0x6000, 0, 0x16000);
}
TEST_CASE("HostMemory: Right out of bounds unmap", "[common]") {
HostMemory mem(BACKING_SIZE, VIRTUAL_SIZE);
mem.Map(0x0000, 0, 0x4000);
mem.Unmap(0x2000, 0x4000);
mem.Map(0x2000, 0x80000, 0x4000);
}
TEST_CASE("HostMemory: Left out of bounds unmap", "[common]") {
HostMemory mem(BACKING_SIZE, VIRTUAL_SIZE);
mem.Map(0x8000, 0, 0x4000);
mem.Unmap(0x6000, 0x4000);
mem.Map(0x8000, 0, 0x2000);
}
TEST_CASE("HostMemory: Multiple placeholder unmap", "[common]") {
HostMemory mem(BACKING_SIZE, VIRTUAL_SIZE);
mem.Map(0x0000, 0, 0x4000);
mem.Map(0x4000, 0, 0x1b000);
mem.Unmap(0x3000, 0x1c000);
mem.Map(0x3000, 0, 0x20000);
}
TEST_CASE("HostMemory: Unmap between placeholders", "[common]") {
HostMemory mem(BACKING_SIZE, VIRTUAL_SIZE);
mem.Map(0x0000, 0, 0x4000);
mem.Map(0x4000, 0, 0x4000);
mem.Unmap(0x2000, 0x4000);
mem.Map(0x2000, 0, 0x4000);
}
TEST_CASE("HostMemory: Unmap to origin", "[common]") {
HostMemory mem(BACKING_SIZE, VIRTUAL_SIZE);
mem.Map(0x4000, 0, 0x4000);
mem.Map(0x8000, 0, 0x4000);
mem.Unmap(0x4000, 0x4000);
mem.Map(0, 0, 0x4000);
mem.Map(0x4000, 0, 0x4000);
}
TEST_CASE("HostMemory: Unmap to right", "[common]") {
HostMemory mem(BACKING_SIZE, VIRTUAL_SIZE);
mem.Map(0x4000, 0, 0x4000);
mem.Map(0x8000, 0, 0x4000);
mem.Unmap(0x8000, 0x4000);
mem.Map(0x8000, 0, 0x4000);
}
TEST_CASE("HostMemory: Partial right unmap check bindings", "[common]") {
HostMemory mem(BACKING_SIZE, VIRTUAL_SIZE);
mem.Map(0x4000, 0x10000, 0x4000);
volatile u8* const ptr = mem.VirtualBasePointer() + 0x4000;
ptr[0x1000] = 17;
mem.Unmap(0x6000, 0x2000);
REQUIRE(ptr[0x1000] == 17);
}
TEST_CASE("HostMemory: Partial left unmap check bindings", "[common]") {
HostMemory mem(BACKING_SIZE, VIRTUAL_SIZE);
mem.Map(0x4000, 0x10000, 0x4000);
volatile u8* const ptr = mem.VirtualBasePointer() + 0x4000;
ptr[0x3000] = 19;
ptr[0x3fff] = 12;
mem.Unmap(0x4000, 0x2000);
REQUIRE(ptr[0x3000] == 19);
REQUIRE(ptr[0x3fff] == 12);
}
TEST_CASE("HostMemory: Partial middle unmap check bindings", "[common]") {
HostMemory mem(BACKING_SIZE, VIRTUAL_SIZE);
mem.Map(0x4000, 0x10000, 0x4000);
volatile u8* const ptr = mem.VirtualBasePointer() + 0x4000;
ptr[0x0000] = 19;
ptr[0x3fff] = 12;
mem.Unmap(0x1000, 0x2000);
REQUIRE(ptr[0x0000] == 19);
REQUIRE(ptr[0x3fff] == 12);
}
TEST_CASE("HostMemory: Partial sparse middle unmap and check bindings", "[common]") {
HostMemory mem(BACKING_SIZE, VIRTUAL_SIZE);
mem.Map(0x4000, 0x10000, 0x2000);
mem.Map(0x6000, 0x20000, 0x2000);
volatile u8* const ptr = mem.VirtualBasePointer() + 0x4000;
ptr[0x0000] = 19;
ptr[0x3fff] = 12;
mem.Unmap(0x5000, 0x2000);
REQUIRE(ptr[0x0000] == 19);
REQUIRE(ptr[0x3fff] == 12);
}

View file

@ -99,25 +99,13 @@ void ThreadManager::FlushRegion(VAddr addr, u64 size) {
PushCommand(FlushRegionCommand(addr, size)); PushCommand(FlushRegionCommand(addr, size));
return; return;
} }
if (!Settings::IsGPULevelExtreme()) {
// Asynchronous GPU mode return;
switch (Settings::values.gpu_accuracy.GetValue()) {
case Settings::GPUAccuracy::Normal:
PushCommand(FlushRegionCommand(addr, size));
break;
case Settings::GPUAccuracy::High:
// TODO(bunnei): Is this right? Preserving existing behavior for now
break;
case Settings::GPUAccuracy::Extreme: {
auto& gpu = system.GPU();
u64 fence = gpu.RequestFlush(addr, size);
PushCommand(GPUTickCommand(), true);
ASSERT(fence <= gpu.CurrentFlushRequestFence());
break;
}
default:
UNIMPLEMENTED_MSG("Unsupported gpu_accuracy {}", Settings::values.gpu_accuracy.GetValue());
} }
auto& gpu = system.GPU();
u64 fence = gpu.RequestFlush(addr, size);
PushCommand(GPUTickCommand(), true);
ASSERT(fence <= gpu.CurrentFlushRequestFence());
} }
void ThreadManager::InvalidateRegion(VAddr addr, u64 size) { void ThreadManager::InvalidateRegion(VAddr addr, u64 size) {

View file

@ -2,6 +2,8 @@
// Licensed under GPLv2 or any later version // Licensed under GPLv2 or any later version
// Refer to the license.txt file included. // Refer to the license.txt file included.
#include <atomic>
#include "common/assert.h" #include "common/assert.h"
#include "common/common_types.h" #include "common/common_types.h"
#include "common/div_ceil.h" #include "common/div_ceil.h"
@ -10,35 +12,59 @@
namespace VideoCore { namespace VideoCore {
RasterizerAccelerated::RasterizerAccelerated(Core::Memory::Memory& cpu_memory_) using namespace Core::Memory;
: cpu_memory{cpu_memory_} {}
RasterizerAccelerated::RasterizerAccelerated(Memory& cpu_memory_) : cpu_memory{cpu_memory_} {}
RasterizerAccelerated::~RasterizerAccelerated() = default; RasterizerAccelerated::~RasterizerAccelerated() = default;
void RasterizerAccelerated::UpdatePagesCachedCount(VAddr addr, u64 size, int delta) { void RasterizerAccelerated::UpdatePagesCachedCount(VAddr addr, u64 size, int delta) {
const auto page_end = Common::DivCeil(addr + size, Core::Memory::PAGE_SIZE); u64 uncache_begin = 0;
for (auto page = addr >> Core::Memory::PAGE_BITS; page != page_end; ++page) { u64 cache_begin = 0;
auto& count = cached_pages.at(page >> 2).Count(page); u64 uncache_bytes = 0;
u64 cache_bytes = 0;
std::atomic_thread_fence(std::memory_order_acquire);
const u64 page_end = Common::DivCeil(addr + size, PAGE_SIZE);
for (u64 page = addr >> PAGE_BITS; page != page_end; ++page) {
std::atomic_uint16_t& count = cached_pages.at(page >> 2).Count(page);
if (delta > 0) { if (delta > 0) {
ASSERT_MSG(count < UINT16_MAX, "Count may overflow!"); ASSERT_MSG(count.load(std::memory_order::relaxed) < UINT16_MAX, "Count may overflow!");
} else if (delta < 0) { } else if (delta < 0) {
ASSERT_MSG(count > 0, "Count may underflow!"); ASSERT_MSG(count.load(std::memory_order::relaxed) > 0, "Count may underflow!");
} else { } else {
ASSERT_MSG(true, "Delta must be non-zero!"); ASSERT_MSG(false, "Delta must be non-zero!");
} }
// Adds or subtracts 1, as count is a unsigned 8-bit value // Adds or subtracts 1, as count is a unsigned 8-bit value
count += static_cast<u16>(delta); count.fetch_add(static_cast<u16>(delta), std::memory_order_release);
// Assume delta is either -1 or 1 // Assume delta is either -1 or 1
if (count == 0) { if (count.load(std::memory_order::relaxed) == 0) {
cpu_memory.RasterizerMarkRegionCached(page << Core::Memory::PAGE_BITS, if (uncache_bytes == 0) {
Core::Memory::PAGE_SIZE, false); uncache_begin = page;
} else if (count == 1 && delta > 0) { }
cpu_memory.RasterizerMarkRegionCached(page << Core::Memory::PAGE_BITS, uncache_bytes += PAGE_SIZE;
Core::Memory::PAGE_SIZE, true); } else if (uncache_bytes > 0) {
cpu_memory.RasterizerMarkRegionCached(uncache_begin << PAGE_BITS, uncache_bytes, false);
uncache_bytes = 0;
} }
if (count.load(std::memory_order::relaxed) == 1 && delta > 0) {
if (cache_bytes == 0) {
cache_begin = page;
}
cache_bytes += PAGE_SIZE;
} else if (cache_bytes > 0) {
cpu_memory.RasterizerMarkRegionCached(cache_begin << PAGE_BITS, cache_bytes, true);
cache_bytes = 0;
}
}
if (uncache_bytes > 0) {
cpu_memory.RasterizerMarkRegionCached(uncache_begin << PAGE_BITS, uncache_bytes, false);
}
if (cache_bytes > 0) {
cpu_memory.RasterizerMarkRegionCached(cache_begin << PAGE_BITS, cache_bytes, true);
} }
} }

View file

@ -756,6 +756,8 @@ void Config::ReadCpuValues() {
QStringLiteral("cpuopt_unsafe_reduce_fp_error"), true); QStringLiteral("cpuopt_unsafe_reduce_fp_error"), true);
ReadSettingGlobal(Settings::values.cpuopt_unsafe_inaccurate_nan, ReadSettingGlobal(Settings::values.cpuopt_unsafe_inaccurate_nan,
QStringLiteral("cpuopt_unsafe_inaccurate_nan"), true); QStringLiteral("cpuopt_unsafe_inaccurate_nan"), true);
ReadSettingGlobal(Settings::values.cpuopt_unsafe_fastmem_check,
QStringLiteral("cpuopt_unsafe_fastmem_check"), true);
if (global) { if (global) {
Settings::values.cpuopt_page_tables = Settings::values.cpuopt_page_tables =
@ -774,6 +776,8 @@ void Config::ReadCpuValues() {
ReadSetting(QStringLiteral("cpuopt_misc_ir"), true).toBool(); ReadSetting(QStringLiteral("cpuopt_misc_ir"), true).toBool();
Settings::values.cpuopt_reduce_misalign_checks = Settings::values.cpuopt_reduce_misalign_checks =
ReadSetting(QStringLiteral("cpuopt_reduce_misalign_checks"), true).toBool(); ReadSetting(QStringLiteral("cpuopt_reduce_misalign_checks"), true).toBool();
Settings::values.cpuopt_fastmem =
ReadSetting(QStringLiteral("cpuopt_fastmem"), true).toBool();
} }
qt_config->endGroup(); qt_config->endGroup();
@ -1332,6 +1336,8 @@ void Config::SaveCpuValues() {
Settings::values.cpuopt_unsafe_reduce_fp_error, true); Settings::values.cpuopt_unsafe_reduce_fp_error, true);
WriteSettingGlobal(QStringLiteral("cpuopt_unsafe_inaccurate_nan"), WriteSettingGlobal(QStringLiteral("cpuopt_unsafe_inaccurate_nan"),
Settings::values.cpuopt_unsafe_inaccurate_nan, true); Settings::values.cpuopt_unsafe_inaccurate_nan, true);
WriteSettingGlobal(QStringLiteral("cpuopt_unsafe_fastmem_check"),
Settings::values.cpuopt_unsafe_fastmem_check, true);
if (global) { if (global) {
WriteSetting(QStringLiteral("cpuopt_page_tables"), Settings::values.cpuopt_page_tables, WriteSetting(QStringLiteral("cpuopt_page_tables"), Settings::values.cpuopt_page_tables,
@ -1348,6 +1354,7 @@ void Config::SaveCpuValues() {
WriteSetting(QStringLiteral("cpuopt_misc_ir"), Settings::values.cpuopt_misc_ir, true); WriteSetting(QStringLiteral("cpuopt_misc_ir"), Settings::values.cpuopt_misc_ir, true);
WriteSetting(QStringLiteral("cpuopt_reduce_misalign_checks"), WriteSetting(QStringLiteral("cpuopt_reduce_misalign_checks"),
Settings::values.cpuopt_reduce_misalign_checks, true); Settings::values.cpuopt_reduce_misalign_checks, true);
WriteSetting(QStringLiteral("cpuopt_fastmem"), Settings::values.cpuopt_fastmem, true);
} }
qt_config->endGroup(); qt_config->endGroup();

View file

@ -35,12 +35,15 @@ void ConfigureCpu::SetConfiguration() {
ui->cpuopt_unsafe_unfuse_fma->setEnabled(runtime_lock); ui->cpuopt_unsafe_unfuse_fma->setEnabled(runtime_lock);
ui->cpuopt_unsafe_reduce_fp_error->setEnabled(runtime_lock); ui->cpuopt_unsafe_reduce_fp_error->setEnabled(runtime_lock);
ui->cpuopt_unsafe_inaccurate_nan->setEnabled(runtime_lock); ui->cpuopt_unsafe_inaccurate_nan->setEnabled(runtime_lock);
ui->cpuopt_unsafe_fastmem_check->setEnabled(runtime_lock);
ui->cpuopt_unsafe_unfuse_fma->setChecked(Settings::values.cpuopt_unsafe_unfuse_fma.GetValue()); ui->cpuopt_unsafe_unfuse_fma->setChecked(Settings::values.cpuopt_unsafe_unfuse_fma.GetValue());
ui->cpuopt_unsafe_reduce_fp_error->setChecked( ui->cpuopt_unsafe_reduce_fp_error->setChecked(
Settings::values.cpuopt_unsafe_reduce_fp_error.GetValue()); Settings::values.cpuopt_unsafe_reduce_fp_error.GetValue());
ui->cpuopt_unsafe_inaccurate_nan->setChecked( ui->cpuopt_unsafe_inaccurate_nan->setChecked(
Settings::values.cpuopt_unsafe_inaccurate_nan.GetValue()); Settings::values.cpuopt_unsafe_inaccurate_nan.GetValue());
ui->cpuopt_unsafe_fastmem_check->setChecked(
Settings::values.cpuopt_unsafe_fastmem_check.GetValue());
if (Settings::IsConfiguringGlobal()) { if (Settings::IsConfiguringGlobal()) {
ui->accuracy->setCurrentIndex(static_cast<int>(Settings::values.cpu_accuracy.GetValue())); ui->accuracy->setCurrentIndex(static_cast<int>(Settings::values.cpu_accuracy.GetValue()));
@ -84,6 +87,9 @@ void ConfigureCpu::ApplyConfiguration() {
ConfigurationShared::ApplyPerGameSetting(&Settings::values.cpuopt_unsafe_inaccurate_nan, ConfigurationShared::ApplyPerGameSetting(&Settings::values.cpuopt_unsafe_inaccurate_nan,
ui->cpuopt_unsafe_inaccurate_nan, ui->cpuopt_unsafe_inaccurate_nan,
cpuopt_unsafe_inaccurate_nan); cpuopt_unsafe_inaccurate_nan);
ConfigurationShared::ApplyPerGameSetting(&Settings::values.cpuopt_unsafe_fastmem_check,
ui->cpuopt_unsafe_fastmem_check,
cpuopt_unsafe_fastmem_check);
if (Settings::IsConfiguringGlobal()) { if (Settings::IsConfiguringGlobal()) {
// Guard if during game and set to game-specific value // Guard if during game and set to game-specific value
@ -134,4 +140,7 @@ void ConfigureCpu::SetupPerGameUI() {
ConfigurationShared::SetColoredTristate(ui->cpuopt_unsafe_inaccurate_nan, ConfigurationShared::SetColoredTristate(ui->cpuopt_unsafe_inaccurate_nan,
Settings::values.cpuopt_unsafe_inaccurate_nan, Settings::values.cpuopt_unsafe_inaccurate_nan,
cpuopt_unsafe_inaccurate_nan); cpuopt_unsafe_inaccurate_nan);
ConfigurationShared::SetColoredTristate(ui->cpuopt_unsafe_fastmem_check,
Settings::values.cpuopt_unsafe_fastmem_check,
cpuopt_unsafe_fastmem_check);
} }

View file

@ -41,4 +41,5 @@ private:
ConfigurationShared::CheckState cpuopt_unsafe_unfuse_fma; ConfigurationShared::CheckState cpuopt_unsafe_unfuse_fma;
ConfigurationShared::CheckState cpuopt_unsafe_reduce_fp_error; ConfigurationShared::CheckState cpuopt_unsafe_reduce_fp_error;
ConfigurationShared::CheckState cpuopt_unsafe_inaccurate_nan; ConfigurationShared::CheckState cpuopt_unsafe_inaccurate_nan;
ConfigurationShared::CheckState cpuopt_unsafe_fastmem_check;
}; };

View file

@ -123,6 +123,18 @@
</property> </property>
</widget> </widget>
</item> </item>
<item>
<widget class="QCheckBox" name="cpuopt_unsafe_fastmem_check">
<property name="toolTip">
<string>
&lt;div&gt;This option improves speed by eliminating a safety check before every memory read/write in guest. Disabling it may allow a game to read/write the emulator's memory.&lt;/div&gt;
</string>
</property>
<property name="text">
<string>Disable adress space checks</string>
</property>
</widget>
</item>
</layout> </layout>
</widget> </widget>
</item> </item>

View file

@ -39,6 +39,8 @@ void ConfigureCpuDebug::SetConfiguration() {
ui->cpuopt_misc_ir->setChecked(Settings::values.cpuopt_misc_ir); ui->cpuopt_misc_ir->setChecked(Settings::values.cpuopt_misc_ir);
ui->cpuopt_reduce_misalign_checks->setEnabled(runtime_lock); ui->cpuopt_reduce_misalign_checks->setEnabled(runtime_lock);
ui->cpuopt_reduce_misalign_checks->setChecked(Settings::values.cpuopt_reduce_misalign_checks); ui->cpuopt_reduce_misalign_checks->setChecked(Settings::values.cpuopt_reduce_misalign_checks);
ui->cpuopt_fastmem->setEnabled(runtime_lock);
ui->cpuopt_fastmem->setChecked(Settings::values.cpuopt_fastmem);
} }
void ConfigureCpuDebug::ApplyConfiguration() { void ConfigureCpuDebug::ApplyConfiguration() {
@ -50,6 +52,7 @@ void ConfigureCpuDebug::ApplyConfiguration() {
Settings::values.cpuopt_const_prop = ui->cpuopt_const_prop->isChecked(); Settings::values.cpuopt_const_prop = ui->cpuopt_const_prop->isChecked();
Settings::values.cpuopt_misc_ir = ui->cpuopt_misc_ir->isChecked(); Settings::values.cpuopt_misc_ir = ui->cpuopt_misc_ir->isChecked();
Settings::values.cpuopt_reduce_misalign_checks = ui->cpuopt_reduce_misalign_checks->isChecked(); Settings::values.cpuopt_reduce_misalign_checks = ui->cpuopt_reduce_misalign_checks->isChecked();
Settings::values.cpuopt_fastmem = ui->cpuopt_fastmem->isChecked();
} }
void ConfigureCpuDebug::changeEvent(QEvent* event) { void ConfigureCpuDebug::changeEvent(QEvent* event) {

View file

@ -139,6 +139,20 @@
</property> </property>
</widget> </widget>
</item> </item>
<item>
<widget class="QCheckBox" name="cpuopt_fastmem">
<property name="text">
<string>Enable Host MMU Emulation</string>
</property>
<property name="toolTip">
<string>
&lt;div style="white-space: nowrap"&gt;This optimization speeds up memory accesses by the guest program.&lt;/div&gt;
&lt;div style="white-space: nowrap"&gt;Enabling it causes guest memory reads/writes to be done directly into memory and make use of Host's MMU.&lt;/div&gt;
&lt;div style="white-space: nowrap"&gt;Disabling this forces all memory accesses to use Software MMU Emulation.&lt;/div&gt;
</string>
</property>
</widget>
</item>
</layout> </layout>
</widget> </widget>
</item> </item>

View file

@ -150,6 +150,10 @@ cpuopt_misc_ir =
# 0: Disabled, 1 (default): Enabled # 0: Disabled, 1 (default): Enabled
cpuopt_reduce_misalign_checks = cpuopt_reduce_misalign_checks =
# Enable Host MMU Emulation (faster guest memory access)
# 0: Disabled, 1 (default): Enabled
cpuopt_fastmem =
[Renderer] [Renderer]
# Which backend API to use. # Which backend API to use.
# 0 (default): OpenGL, 1: Vulkan # 0 (default): OpenGL, 1: Vulkan