From 9bede4eeed523f9707a989f1297279c006086e76 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Thu, 18 Jul 2019 18:15:53 -0400 Subject: [PATCH] VM_Manager: Align allocated memory to 256bytes This commit ensures that all backing memory allocated for the Guest CPU is aligned to 256 bytes. This due to how gpu memory works and the heavy constraints it has in the alignment of physical memory. --- src/common/alignment.h | 79 +++++++++++++++++++++++++ src/core/hle/kernel/code_set.h | 3 +- src/core/hle/kernel/physical_memory.h | 13 ++++ src/core/hle/kernel/process.cpp | 6 +- src/core/hle/kernel/shared_memory.cpp | 4 +- src/core/hle/kernel/shared_memory.h | 13 ++-- src/core/hle/kernel/transfer_memory.cpp | 2 +- src/core/hle/kernel/transfer_memory.h | 3 +- src/core/hle/kernel/vm_manager.cpp | 15 ++--- src/core/hle/kernel/vm_manager.h | 9 +-- src/core/hle/service/ns/pl_u.cpp | 12 ++-- src/core/loader/elf.cpp | 2 +- src/core/loader/kip.cpp | 2 +- src/core/loader/nro.cpp | 2 +- src/core/loader/nso.cpp | 2 +- 15 files changed, 131 insertions(+), 36 deletions(-) create mode 100644 src/core/hle/kernel/physical_memory.h diff --git a/src/common/alignment.h b/src/common/alignment.h index 617b14d9b..b3fbdfe20 100644 --- a/src/common/alignment.h +++ b/src/common/alignment.h @@ -3,7 +3,10 @@ #pragma once #include +#include #include +#include +#include namespace Common { @@ -37,4 +40,80 @@ constexpr bool IsWordAligned(T value) { return (value & 0b11) == 0; } +template +class AlignmentAllocator { +public: + typedef T value_type; + typedef std::size_t size_type; + typedef std::ptrdiff_t difference_type; + + typedef T* pointer; + typedef const T* const_pointer; + + typedef T& reference; + typedef const T& const_reference; + +public: + inline AlignmentAllocator() throw() {} + + template + inline AlignmentAllocator(const AlignmentAllocator&) throw() {} + + inline ~AlignmentAllocator() throw() {} + + inline pointer adress(reference r) { + return &r; + } + + inline const_pointer adress(const_reference r) const { + return &r; + } + +#if (defined _MSC_VER) + inline pointer allocate(size_type n) { + return (pointer)_aligned_malloc(n * sizeof(value_type), Align); + } + + inline void deallocate(pointer p, size_type) { + _aligned_free(p); + } +#else + inline pointer allocate(size_type n) { + return (pointer)std::aligned_alloc(Align, n * sizeof(value_type)); + } + + inline void deallocate(pointer p, size_type) { + std::free(p); + } +#endif + + inline void construct(pointer p, const value_type& wert) { + new (p) value_type(wert); + } + + inline void destroy(pointer p) { + p->~value_type(); + } + + inline size_type max_size() const throw() { + return size_type(-1) / sizeof(value_type); + } + + template + struct rebind { + typedef AlignmentAllocator other; + }; + + bool operator!=(const AlignmentAllocator& other) const { + return !(*this == other); + } + + // Returns true if and only if storage allocated from *this + // can be deallocated from other, and vice versa. + // Always returns true for stateless allocators. + bool operator==(const AlignmentAllocator& other) const { + return true; + } +}; + } // namespace Common diff --git a/src/core/hle/kernel/code_set.h b/src/core/hle/kernel/code_set.h index 879957dcb..d8ad54030 100644 --- a/src/core/hle/kernel/code_set.h +++ b/src/core/hle/kernel/code_set.h @@ -8,6 +8,7 @@ #include #include "common/common_types.h" +#include "core/hle/kernel/physical_memory.h" namespace Kernel { @@ -77,7 +78,7 @@ struct CodeSet final { } /// The overall data that backs this code set. - std::vector memory; + Kernel::PhysicalMemory memory; /// The segments that comprise this code set. std::array segments; diff --git a/src/core/hle/kernel/physical_memory.h b/src/core/hle/kernel/physical_memory.h new file mode 100644 index 000000000..dd49c75a2 --- /dev/null +++ b/src/core/hle/kernel/physical_memory.h @@ -0,0 +1,13 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include "common/alignment.h" + +namespace Kernel { + +using PhysicalMemory = std::vector>; + +} diff --git a/src/core/hle/kernel/process.cpp b/src/core/hle/kernel/process.cpp index 92169a97b..e80a12ac3 100644 --- a/src/core/hle/kernel/process.cpp +++ b/src/core/hle/kernel/process.cpp @@ -247,7 +247,7 @@ VAddr Process::CreateTLSRegion() { ASSERT(region_address.Succeeded()); const auto map_result = vm_manager.MapMemoryBlock( - *region_address, std::make_shared>(Memory::PAGE_SIZE), 0, + *region_address, std::make_shared(Memory::PAGE_SIZE), 0, Memory::PAGE_SIZE, MemoryState::ThreadLocal); ASSERT(map_result.Succeeded()); @@ -277,7 +277,7 @@ void Process::FreeTLSRegion(VAddr tls_address) { } void Process::LoadModule(CodeSet module_, VAddr base_addr) { - const auto memory = std::make_shared>(std::move(module_.memory)); + const auto memory = std::make_shared(std::move(module_.memory)); const auto MapSegment = [&](const CodeSet::Segment& segment, VMAPermission permissions, MemoryState memory_state) { @@ -327,7 +327,7 @@ void Process::AllocateMainThreadStack(u64 stack_size) { // Allocate and map the main thread stack const VAddr mapping_address = vm_manager.GetTLSIORegionEndAddress() - main_thread_stack_size; vm_manager - .MapMemoryBlock(mapping_address, std::make_shared>(main_thread_stack_size), + .MapMemoryBlock(mapping_address, std::make_shared(main_thread_stack_size), 0, main_thread_stack_size, MemoryState::Stack) .Unwrap(); } diff --git a/src/core/hle/kernel/shared_memory.cpp b/src/core/hle/kernel/shared_memory.cpp index f15c5ee36..45a9e1942 100644 --- a/src/core/hle/kernel/shared_memory.cpp +++ b/src/core/hle/kernel/shared_memory.cpp @@ -28,7 +28,7 @@ SharedPtr SharedMemory::Create(KernelCore& kernel, Process* owner_ shared_memory->other_permissions = other_permissions; if (address == 0) { - shared_memory->backing_block = std::make_shared>(size); + shared_memory->backing_block = std::make_shared(size); shared_memory->backing_block_offset = 0; // Refresh the address mappings for the current process. @@ -59,7 +59,7 @@ SharedPtr SharedMemory::Create(KernelCore& kernel, Process* owner_ } SharedPtr SharedMemory::CreateForApplet( - KernelCore& kernel, std::shared_ptr> heap_block, std::size_t offset, u64 size, + KernelCore& kernel, std::shared_ptr heap_block, std::size_t offset, u64 size, MemoryPermission permissions, MemoryPermission other_permissions, std::string name) { SharedPtr shared_memory(new SharedMemory(kernel)); diff --git a/src/core/hle/kernel/shared_memory.h b/src/core/hle/kernel/shared_memory.h index c2b6155e1..01ca6dcd2 100644 --- a/src/core/hle/kernel/shared_memory.h +++ b/src/core/hle/kernel/shared_memory.h @@ -10,6 +10,7 @@ #include "common/common_types.h" #include "core/hle/kernel/object.h" +#include "core/hle/kernel/physical_memory.h" #include "core/hle/kernel/process.h" #include "core/hle/result.h" @@ -62,12 +63,10 @@ public: * block. * @param name Optional object name, used for debugging purposes. */ - static SharedPtr CreateForApplet(KernelCore& kernel, - std::shared_ptr> heap_block, - std::size_t offset, u64 size, - MemoryPermission permissions, - MemoryPermission other_permissions, - std::string name = "Unknown Applet"); + static SharedPtr CreateForApplet( + KernelCore& kernel, std::shared_ptr heap_block, std::size_t offset, + u64 size, MemoryPermission permissions, MemoryPermission other_permissions, + std::string name = "Unknown Applet"); std::string GetTypeName() const override { return "SharedMemory"; @@ -135,7 +134,7 @@ private: ~SharedMemory() override; /// Backing memory for this shared memory block. - std::shared_ptr> backing_block; + std::shared_ptr backing_block; /// Offset into the backing block for this shared memory. std::size_t backing_block_offset = 0; /// Size of the memory block. Page-aligned. diff --git a/src/core/hle/kernel/transfer_memory.cpp b/src/core/hle/kernel/transfer_memory.cpp index 26c4e5e67..1113c815e 100644 --- a/src/core/hle/kernel/transfer_memory.cpp +++ b/src/core/hle/kernel/transfer_memory.cpp @@ -47,7 +47,7 @@ ResultCode TransferMemory::MapMemory(VAddr address, u64 size, MemoryPermission p return ERR_INVALID_STATE; } - backing_block = std::make_shared>(size); + backing_block = std::make_shared(size); const auto map_state = owner_permissions == MemoryPermission::None ? MemoryState::TransferMemoryIsolated diff --git a/src/core/hle/kernel/transfer_memory.h b/src/core/hle/kernel/transfer_memory.h index a140b1e2b..6be9dc094 100644 --- a/src/core/hle/kernel/transfer_memory.h +++ b/src/core/hle/kernel/transfer_memory.h @@ -8,6 +8,7 @@ #include #include "core/hle/kernel/object.h" +#include "core/hle/kernel/physical_memory.h" union ResultCode; @@ -82,7 +83,7 @@ private: ~TransferMemory() override; /// Memory block backing this instance. - std::shared_ptr> backing_block; + std::shared_ptr backing_block; /// The base address for the memory managed by this instance. VAddr base_address = 0; diff --git a/src/core/hle/kernel/vm_manager.cpp b/src/core/hle/kernel/vm_manager.cpp index 4f45fb03b..40cea1e7c 100644 --- a/src/core/hle/kernel/vm_manager.cpp +++ b/src/core/hle/kernel/vm_manager.cpp @@ -5,6 +5,7 @@ #include #include #include +#include "common/alignment.h" #include "common/assert.h" #include "common/logging/log.h" #include "common/memory_hook.h" @@ -103,7 +104,7 @@ bool VMManager::IsValidHandle(VMAHandle handle) const { } ResultVal VMManager::MapMemoryBlock(VAddr target, - std::shared_ptr> block, + std::shared_ptr block, std::size_t offset, u64 size, MemoryState state, VMAPermission perm) { ASSERT(block != nullptr); @@ -260,7 +261,7 @@ ResultVal VMManager::SetHeapSize(u64 size) { if (heap_memory == nullptr) { // Initialize heap - heap_memory = std::make_shared>(size); + heap_memory = std::make_shared(size); heap_end = heap_region_base + size; } else { UnmapRange(heap_region_base, GetCurrentHeapSize()); @@ -341,7 +342,7 @@ ResultCode VMManager::MapPhysicalMemory(VAddr target, u64 size) { const auto map_size = std::min(end_addr - cur_addr, vma_end - cur_addr); if (vma.state == MemoryState::Unmapped) { const auto map_res = - MapMemoryBlock(cur_addr, std::make_shared>(map_size, 0), 0, + MapMemoryBlock(cur_addr, std::make_shared(map_size, 0), 0, map_size, MemoryState::Heap, VMAPermission::ReadWrite); result = map_res.Code(); if (result.IsError()) { @@ -442,7 +443,7 @@ ResultCode VMManager::UnmapPhysicalMemory(VAddr target, u64 size) { if (result.IsError()) { for (const auto [map_address, map_size] : unmapped_regions) { const auto remap_res = - MapMemoryBlock(map_address, std::make_shared>(map_size, 0), 0, + MapMemoryBlock(map_address, std::make_shared(map_size, 0), 0, map_size, MemoryState::Heap, VMAPermission::None); ASSERT_MSG(remap_res.Succeeded(), "UnmapPhysicalMemory re-map on error"); } @@ -593,7 +594,7 @@ ResultCode VMManager::MirrorMemory(VAddr dst_addr, VAddr src_addr, u64 size, Mem ASSERT_MSG(vma_offset + size <= vma->second.size, "Shared memory exceeds bounds of mapped block"); - const std::shared_ptr>& backing_block = vma->second.backing_block; + const std::shared_ptr& backing_block = vma->second.backing_block; const std::size_t backing_block_offset = vma->second.offset + vma_offset; CASCADE_RESULT(auto new_vma, @@ -606,7 +607,7 @@ ResultCode VMManager::MirrorMemory(VAddr dst_addr, VAddr src_addr, u64 size, Mem return RESULT_SUCCESS; } -void VMManager::RefreshMemoryBlockMappings(const std::vector* block) { +void VMManager::RefreshMemoryBlockMappings(const PhysicalMemory* block) { // If this ever proves to have a noticeable performance impact, allow users of the function to // specify a specific range of addresses to limit the scan to. for (const auto& p : vma_map) { @@ -764,7 +765,7 @@ void VMManager::MergeAdjacentVMA(VirtualMemoryArea& left, const VirtualMemoryAre right.backing_block->begin() + right.offset + right.size); } else { // Slow case: make a new memory block for left and right. - auto new_memory = std::make_shared>(); + auto new_memory = std::make_shared(); new_memory->insert(new_memory->end(), left.backing_block->begin() + left.offset, left.backing_block->begin() + left.offset + left.size); new_memory->insert(new_memory->end(), right.backing_block->begin() + right.offset, diff --git a/src/core/hle/kernel/vm_manager.h b/src/core/hle/kernel/vm_manager.h index 0aecb7499..b18cde619 100644 --- a/src/core/hle/kernel/vm_manager.h +++ b/src/core/hle/kernel/vm_manager.h @@ -11,6 +11,7 @@ #include "common/common_types.h" #include "common/memory_hook.h" #include "common/page_table.h" +#include "core/hle/kernel/physical_memory.h" #include "core/hle/result.h" #include "core/memory.h" @@ -290,7 +291,7 @@ struct VirtualMemoryArea { // Settings for type = AllocatedMemoryBlock /// Memory block backing this VMA. - std::shared_ptr> backing_block = nullptr; + std::shared_ptr backing_block = nullptr; /// Offset into the backing_memory the mapping starts from. std::size_t offset = 0; @@ -348,7 +349,7 @@ public: * @param size Size of the mapping. * @param state MemoryState tag to attach to the VMA. */ - ResultVal MapMemoryBlock(VAddr target, std::shared_ptr> block, + ResultVal MapMemoryBlock(VAddr target, std::shared_ptr block, std::size_t offset, u64 size, MemoryState state, VMAPermission perm = VMAPermission::ReadWrite); @@ -547,7 +548,7 @@ public: * Scans all VMAs and updates the page table range of any that use the given vector as backing * memory. This should be called after any operation that causes reallocation of the vector. */ - void RefreshMemoryBlockMappings(const std::vector* block); + void RefreshMemoryBlockMappings(const PhysicalMemory* block); /// Dumps the address space layout to the log, for debugging void LogLayout() const; @@ -777,7 +778,7 @@ private: // the entire virtual address space extents that bound the allocations, including any holes. // This makes deallocation and reallocation of holes fast and keeps process memory contiguous // in the emulator address space, allowing Memory::GetPointer to be reasonably safe. - std::shared_ptr> heap_memory; + std::shared_ptr heap_memory; // The end of the currently allocated heap. This is not an inclusive // end of the range. This is essentially 'base_address + current_size'. diff --git a/src/core/hle/service/ns/pl_u.cpp b/src/core/hle/service/ns/pl_u.cpp index ad176f89d..2a522136d 100644 --- a/src/core/hle/service/ns/pl_u.cpp +++ b/src/core/hle/service/ns/pl_u.cpp @@ -77,7 +77,7 @@ enum class LoadState : u32 { Done = 1, }; -static void DecryptSharedFont(const std::vector& input, std::vector& output, +static void DecryptSharedFont(const std::vector& input, Kernel::PhysicalMemory& output, std::size_t& offset) { ASSERT_MSG(offset + (input.size() * sizeof(u32)) < SHARED_FONT_MEM_SIZE, "Shared fonts exceeds 17mb!"); @@ -94,7 +94,7 @@ static void DecryptSharedFont(const std::vector& input, std::vector& ou offset += transformed_font.size() * sizeof(u32); } -static void EncryptSharedFont(const std::vector& input, std::vector& output, +static void EncryptSharedFont(const std::vector& input, Kernel::PhysicalMemory& output, std::size_t& offset) { ASSERT_MSG(offset + input.size() + 8 < SHARED_FONT_MEM_SIZE, "Shared fonts exceeds 17mb!"); const u32 KEY = EXPECTED_MAGIC ^ EXPECTED_RESULT; @@ -121,7 +121,7 @@ struct PL_U::Impl { return shared_font_regions.at(index); } - void BuildSharedFontsRawRegions(const std::vector& input) { + void BuildSharedFontsRawRegions(const Kernel::PhysicalMemory& input) { // As we can derive the xor key we can just populate the offsets // based on the shared memory dump unsigned cur_offset = 0; @@ -144,7 +144,7 @@ struct PL_U::Impl { Kernel::SharedPtr shared_font_mem; /// Backing memory for the shared font data - std::shared_ptr> shared_font; + std::shared_ptr shared_font; // Automatically populated based on shared_fonts dump or system archives. std::vector shared_font_regions; @@ -166,7 +166,7 @@ PL_U::PL_U() : ServiceFramework("pl:u"), impl{std::make_unique()} { // Rebuild shared fonts from data ncas if (nand->HasEntry(static_cast(FontArchives::Standard), FileSys::ContentRecordType::Data)) { - impl->shared_font = std::make_shared>(SHARED_FONT_MEM_SIZE); + impl->shared_font = std::make_shared(SHARED_FONT_MEM_SIZE); for (auto font : SHARED_FONTS) { const auto nca = nand->GetEntry(static_cast(font.first), FileSys::ContentRecordType::Data); @@ -207,7 +207,7 @@ PL_U::PL_U() : ServiceFramework("pl:u"), impl{std::make_unique()} { } } else { - impl->shared_font = std::make_shared>( + impl->shared_font = std::make_shared( SHARED_FONT_MEM_SIZE); // Shared memory needs to always be allocated and a fixed size const std::string user_path = FileUtil::GetUserPath(FileUtil::UserPath::SysDataDir); diff --git a/src/core/loader/elf.cpp b/src/core/loader/elf.cpp index 6d4b02375..f1795fdd6 100644 --- a/src/core/loader/elf.cpp +++ b/src/core/loader/elf.cpp @@ -295,7 +295,7 @@ Kernel::CodeSet ElfReader::LoadInto(VAddr vaddr) { } } - std::vector program_image(total_image_size); + Kernel::PhysicalMemory program_image(total_image_size); std::size_t current_image_position = 0; Kernel::CodeSet codeset; diff --git a/src/core/loader/kip.cpp b/src/core/loader/kip.cpp index 70051c13a..474b55cb1 100644 --- a/src/core/loader/kip.cpp +++ b/src/core/loader/kip.cpp @@ -69,7 +69,7 @@ AppLoader::LoadResult AppLoader_KIP::Load(Kernel::Process& process) { const VAddr base_address = process.VMManager().GetCodeRegionBaseAddress(); Kernel::CodeSet codeset; - std::vector program_image; + Kernel::PhysicalMemory program_image; const auto load_segment = [&program_image](Kernel::CodeSet::Segment& segment, const std::vector& data, u32 offset) { diff --git a/src/core/loader/nro.cpp b/src/core/loader/nro.cpp index 6a0ca389b..e92e2e06e 100644 --- a/src/core/loader/nro.cpp +++ b/src/core/loader/nro.cpp @@ -143,7 +143,7 @@ static bool LoadNroImpl(Kernel::Process& process, const std::vector& data, } // Build program image - std::vector program_image(PageAlignSize(nro_header.file_size)); + Kernel::PhysicalMemory program_image(PageAlignSize(nro_header.file_size)); std::memcpy(program_image.data(), data.data(), program_image.size()); if (program_image.size() != PageAlignSize(nro_header.file_size)) { return {}; diff --git a/src/core/loader/nso.cpp b/src/core/loader/nso.cpp index 29311404a..70c90109f 100644 --- a/src/core/loader/nso.cpp +++ b/src/core/loader/nso.cpp @@ -89,7 +89,7 @@ std::optional AppLoader_NSO::LoadModule(Kernel::Process& process, // Build program image Kernel::CodeSet codeset; - std::vector program_image; + Kernel::PhysicalMemory program_image; for (std::size_t i = 0; i < nso_header.segments.size(); ++i) { std::vector data = file.ReadBytes(nso_header.segments_compressed_size[i], nso_header.segments[i].offset);