From b4f39e2cad90bdcb2fceba02bde116885b8ad3c3 Mon Sep 17 00:00:00 2001 From: pineappleEA Date: Sat, 3 Jul 2021 15:26:41 +0200 Subject: [PATCH] early-access version 1841 --- README.md | 2 +- src/core/hle/service/hid/controllers/npad.cpp | 1 - src/video_core/engines/fermi_2d.cpp | 12 +- src/video_core/memory_manager.cpp | 109 +---- src/video_core/memory_manager.h | 22 +- src/video_core/rasterizer_interface.h | 3 - .../renderer_opengl/gl_rasterizer.cpp | 7 - .../renderer_opengl/gl_rasterizer.h | 1 - .../renderer_vulkan/vk_rasterizer.cpp | 7 - .../renderer_vulkan/vk_rasterizer.h | 1 - src/video_core/texture_cache/image_base.cpp | 3 - src/video_core/texture_cache/image_base.h | 39 +- src/video_core/texture_cache/texture_cache.h | 417 ++---------------- src/video_core/texture_cache/types.h | 1 - src/video_core/texture_cache/util.cpp | 24 +- src/video_core/texture_cache/util.h | 4 +- 16 files changed, 66 insertions(+), 587 deletions(-) diff --git a/README.md b/README.md index 5a5eb80a2..485604290 100755 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ yuzu emulator early access ============= -This is the source code for early-access 1840. +This is the source code for early-access 1841. ## Legal Notice diff --git a/src/core/hle/service/hid/controllers/npad.cpp b/src/core/hle/service/hid/controllers/npad.cpp index dcbb13660..9e7aa086b 100755 --- a/src/core/hle/service/hid/controllers/npad.cpp +++ b/src/core/hle/service/hid/controllers/npad.cpp @@ -254,7 +254,6 @@ void Controller_NPad::InitNewlyAddedController(std::size_t controller_idx) { } void Controller_NPad::OnInit() { - auto& kernel = system.Kernel(); for (std::size_t i = 0; i < styleset_changed_events.size(); ++i) { styleset_changed_events[i] = service_context.CreateEvent(fmt::format("npad:NpadStyleSetChanged_{}", i)); diff --git a/src/video_core/engines/fermi_2d.cpp b/src/video_core/engines/fermi_2d.cpp index 5f075e20b..8107caad4 100755 --- a/src/video_core/engines/fermi_2d.cpp +++ b/src/video_core/engines/fermi_2d.cpp @@ -66,18 +66,18 @@ void Fermi2D::Blit() { .src_y1 = static_cast((args.dv_dy * args.dst_height + args.src_y0) >> 32), }; + Surface src = regs.src; s32 src_address_offset = 0; - const auto bytes_per_pixel = BytesPerBlock(PixelFormatFromRenderTargetFormat(regs.src.format)); - if (regs.src.linear == Tegra::Engines::Fermi2D::MemoryLayout::Pitch && - regs.src.width == config.src_x1 && - config.src_x1 > static_cast(regs.src.pitch / bytes_per_pixel) && config.src_x0 > 0) { + const auto bytes_per_pixel = BytesPerBlock(PixelFormatFromRenderTargetFormat(src.format)); + if (src.linear == Tegra::Engines::Fermi2D::MemoryLayout::Pitch && src.width == config.src_x1 && + config.src_x1 > static_cast(src.pitch / bytes_per_pixel) && config.src_x0 > 0) { src_address_offset = config.src_x0 * bytes_per_pixel; - regs.src.width -= config.src_x0; + src.width -= config.src_x0; config.src_x1 -= config.src_x0; config.src_x0 = 0; } - if (!rasterizer->AccelerateSurfaceCopy(regs.src, src_address_offset, regs.dst, config)) { + if (!rasterizer->AccelerateSurfaceCopy(src, src_address_offset, regs.dst, config)) { UNIMPLEMENTED(); } } diff --git a/src/video_core/memory_manager.cpp b/src/video_core/memory_manager.cpp index 9deea9a26..02385384c 100755 --- a/src/video_core/memory_manager.cpp +++ b/src/video_core/memory_manager.cpp @@ -69,16 +69,11 @@ void MemoryManager::Unmap(GPUVAddr gpu_addr, std::size_t size) { } else { UNREACHABLE_MSG("Unmapping non-existent GPU address=0x{:x}", gpu_addr); } + // Flush and invalidate through the GPU interface, to be asynchronous if possible. + const std::optional cpu_addr = GpuToCpuAddress(gpu_addr); + ASSERT(cpu_addr); - const auto submapped_ranges = GetSubmappedRange(gpu_addr, size); - - for (const auto& map : submapped_ranges) { - // Flush and invalidate through the GPU interface, to be asynchronous if possible. - const std::optional cpu_addr = GpuToCpuAddress(map.first); - ASSERT(cpu_addr); - - rasterizer->UnmapMemory(*cpu_addr, map.second); - } + rasterizer->UnmapMemory(*cpu_addr, size); UpdateRange(gpu_addr, PageEntry::State::Unmapped, size); } @@ -151,14 +146,8 @@ void MemoryManager::SetPageEntry(GPUVAddr gpu_addr, PageEntry page_entry, std::s //// Lock the new page // TryLockPage(page_entry, size); - auto& current_page = page_table[PageEntryIndex(gpu_addr)]; - if ((!current_page.IsValid() && page_entry.IsValid()) || - current_page.ToAddress() != page_entry.ToAddress()) { - rasterizer->ModifyGPUMemory(gpu_addr, size); - } - - current_page = page_entry; + page_table[PageEntryIndex(gpu_addr)] = page_entry; } std::optional MemoryManager::FindFreeRange(std::size_t size, std::size_t align, @@ -204,19 +193,6 @@ std::optional MemoryManager::GpuToCpuAddress(GPUVAddr gpu_addr) const { return page_entry.ToAddress() + (gpu_addr & page_mask); } -std::optional MemoryManager::GpuToCpuAddress(GPUVAddr addr, std::size_t size) const { - size_t page_index{addr >> page_bits}; - const size_t page_last{(addr + size + page_size - 1) >> page_bits}; - while (page_index < page_last) { - const auto page_addr{GpuToCpuAddress(page_index << page_bits)}; - if (page_addr && *page_addr != 0) { - return page_addr; - } - ++page_index; - } - return std::nullopt; -} - template T MemoryManager::Read(GPUVAddr addr) const { if (auto page_pointer{GetPointer(addr)}; page_pointer) { @@ -413,79 +389,4 @@ bool MemoryManager::IsGranularRange(GPUVAddr gpu_addr, std::size_t size) const { return page <= Core::Memory::PAGE_SIZE; } -bool MemoryManager::IsContinousRange(GPUVAddr gpu_addr, std::size_t size) const { - size_t page_index{gpu_addr >> page_bits}; - const size_t page_last{(gpu_addr + size + page_size - 1) >> page_bits}; - std::optional old_page_addr{}; - while (page_index != page_last) { - const auto page_addr{GpuToCpuAddress(page_index << page_bits)}; - if (!page_addr || *page_addr == 0) { - return false; - } - if (old_page_addr) { - if (*old_page_addr + page_size != *page_addr) { - return false; - } - } - old_page_addr = page_addr; - ++page_index; - } - return true; -} - -bool MemoryManager::IsFullyMappedRange(GPUVAddr gpu_addr, std::size_t size) const { - size_t page_index{gpu_addr >> page_bits}; - const size_t page_last{(gpu_addr + size + page_size - 1) >> page_bits}; - while (page_index < page_last) { - if (!page_table[page_index].IsValid() || page_table[page_index].ToAddress() == 0) { - return false; - } - ++page_index; - } - return true; -} - -std::vector> MemoryManager::GetSubmappedRange( - GPUVAddr gpu_addr, std::size_t size) const { - std::vector> result{}; - size_t page_index{gpu_addr >> page_bits}; - size_t remaining_size{size}; - size_t page_offset{gpu_addr & page_mask}; - std::optional> last_segment{}; - std::optional old_page_addr{}; - const auto extend_size = [this, &last_segment, &page_index](std::size_t bytes) { - if (!last_segment) { - GPUVAddr new_base_addr = page_index << page_bits; - last_segment = {new_base_addr, bytes}; - } else { - last_segment->second += bytes; - } - }; - const auto split = [this, &last_segment, &result] { - if (last_segment) { - result.push_back(*last_segment); - last_segment = std::nullopt; - } - }; - while (remaining_size > 0) { - const size_t num_bytes{std::min(page_size - page_offset, remaining_size)}; - const auto page_addr{GpuToCpuAddress(page_index << page_bits)}; - if (!page_addr) { - split(); - } else if (old_page_addr) { - if (*old_page_addr + page_size != *page_addr) { - split(); - } - extend_size(num_bytes); - } else { - extend_size(num_bytes); - } - ++page_index; - page_offset = 0; - remaining_size -= num_bytes; - } - split(); - return result; -} - } // namespace Tegra diff --git a/src/video_core/memory_manager.h b/src/video_core/memory_manager.h index 509f14f26..5d6c196fa 100755 --- a/src/video_core/memory_manager.h +++ b/src/video_core/memory_manager.h @@ -76,8 +76,6 @@ public: [[nodiscard]] std::optional GpuToCpuAddress(GPUVAddr addr) const; - [[nodiscard]] std::optional GpuToCpuAddress(GPUVAddr addr, std::size_t size) const; - template [[nodiscard]] T Read(GPUVAddr addr) const; @@ -114,28 +112,10 @@ public: void WriteBlockUnsafe(GPUVAddr gpu_dest_addr, const void* src_buffer, std::size_t size); /** - * Checks if a gpu region can be simply read with a pointer. + * IsGranularRange checks if a gpu region can be simply read with a pointer. */ [[nodiscard]] bool IsGranularRange(GPUVAddr gpu_addr, std::size_t size) const; - /** - * Checks if a gpu region is mapped by a single range of cpu addresses. - */ - [[nodiscard]] bool IsContinousRange(GPUVAddr gpu_addr, std::size_t size) const; - - /** - * Checks if a gpu region is mapped entirely. - */ - [[nodiscard]] bool IsFullyMappedRange(GPUVAddr gpu_addr, std::size_t size) const; - - /** - * Returns a vector with all the subranges of cpu addresses mapped beneath. - * if the region is continous, a single pair will be returned. If it's unmapped, an empty vector - * will be returned; - */ - std::vector> GetSubmappedRange(GPUVAddr gpu_addr, - std::size_t size) const; - [[nodiscard]] GPUVAddr Map(VAddr cpu_addr, GPUVAddr gpu_addr, std::size_t size); [[nodiscard]] GPUVAddr MapAllocate(VAddr cpu_addr, std::size_t size, std::size_t align); [[nodiscard]] GPUVAddr MapAllocate32(VAddr cpu_addr, std::size_t size); diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h index 922dc4095..c1c636ceb 100755 --- a/src/video_core/rasterizer_interface.h +++ b/src/video_core/rasterizer_interface.h @@ -93,9 +93,6 @@ public: /// Unmap memory range virtual void UnmapMemory(VAddr addr, u64 size) = 0; - /// Remap GPU memory range. This means underneath backing memory changed - virtual void ModifyGPUMemory(GPUVAddr addr, u64 size) = 0; - /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory /// and invalidated virtual void FlushAndInvalidateRegion(VAddr addr, u64 size) = 0; diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 3bc39b09b..ecf67e5ce 100755 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -631,13 +631,6 @@ void RasterizerOpenGL::UnmapMemory(VAddr addr, u64 size) { shader_cache.OnCPUWrite(addr, size); } -void RasterizerOpenGL::ModifyGPUMemory(GPUVAddr addr, u64 size) { - { - std::scoped_lock lock{texture_cache.mutex}; - texture_cache.UnmapGPUMemory(addr, size); - } -} - void RasterizerOpenGL::SignalSemaphore(GPUVAddr addr, u32 value) { if (!gpu.IsAsync()) { gpu_memory.Write(addr, value); diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index 87d69a91c..7444ebd8d 100755 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -82,7 +82,6 @@ public: void OnCPUWrite(VAddr addr, u64 size) override; void SyncGuestHost() override; void UnmapMemory(VAddr addr, u64 size) override; - void ModifyGPUMemory(GPUVAddr addr, u64 size) override; void SignalSemaphore(GPUVAddr addr, u32 value) override; void SignalSyncPoint(u32 value) override; void ReleaseFences() override; diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 37aba5a4a..524fb2ad4 100755 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -577,13 +577,6 @@ void RasterizerVulkan::UnmapMemory(VAddr addr, u64 size) { pipeline_cache.OnCPUWrite(addr, size); } -void RasterizerVulkan::ModifyGPUMemory(GPUVAddr addr, u64 size) { - { - std::scoped_lock lock{texture_cache.mutex}; - texture_cache.UnmapGPUMemory(addr, size); - } -} - void RasterizerVulkan::SignalSemaphore(GPUVAddr addr, u32 value) { if (!gpu.IsAsync()) { gpu_memory.Write(addr, value); diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h index 01d7eec5d..d4a4ee58e 100755 --- a/src/video_core/renderer_vulkan/vk_rasterizer.h +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h @@ -74,7 +74,6 @@ public: void OnCPUWrite(VAddr addr, u64 size) override; void SyncGuestHost() override; void UnmapMemory(VAddr addr, u64 size) override; - void ModifyGPUMemory(GPUVAddr addr, u64 size) override; void SignalSemaphore(GPUVAddr addr, u32 value) override; void SignalSyncPoint(u32 value) override; void ReleaseFences() override; diff --git a/src/video_core/texture_cache/image_base.cpp b/src/video_core/texture_cache/image_base.cpp index 2aae338b6..ad69d32d1 100755 --- a/src/video_core/texture_cache/image_base.cpp +++ b/src/video_core/texture_cache/image_base.cpp @@ -69,9 +69,6 @@ ImageBase::ImageBase(const ImageInfo& info_, GPUVAddr gpu_addr_, VAddr cpu_addr_ } } -ImageMapView::ImageMapView(GPUVAddr gpu_addr_, VAddr cpu_addr_, size_t size_, ImageId image_id_) - : gpu_addr{gpu_addr_}, cpu_addr{cpu_addr_}, size{size_}, image_id{image_id_} {} - std::optional ImageBase::TryFindBase(GPUVAddr other_addr) const noexcept { if (other_addr < gpu_addr) { // Subresource address can't be lower than the base diff --git a/src/video_core/texture_cache/image_base.h b/src/video_core/texture_cache/image_base.h index ff1feda9b..e326cab71 100755 --- a/src/video_core/texture_cache/image_base.h +++ b/src/video_core/texture_cache/image_base.h @@ -25,14 +25,12 @@ enum class ImageFlagBits : u32 { Strong = 1 << 5, ///< Exists in the image table, the dimensions are can be trusted Registered = 1 << 6, ///< True when the image is registered Picked = 1 << 7, ///< Temporary flag to mark the image as picked - Remapped = 1 << 8, ///< Image has been remapped. - Sparse = 1 << 9, ///< Image has non continous submemory. // Garbage Collection Flags - BadOverlap = 1 << 10, ///< This image overlaps other but doesn't fit, has higher - ///< garbage collection priority - Alias = 1 << 11, ///< This image has aliases and has priority on garbage - ///< collection + BadOverlap = 1 << 8, ///< This image overlaps other but doesn't fit, has higher + ///< garbage collection priority + Alias = 1 << 9, ///< This image has aliases and has priority on garbage + ///< collection }; DECLARE_ENUM_FLAG_OPERATORS(ImageFlagBits) @@ -59,12 +57,6 @@ struct ImageBase { return cpu_addr < overlap_end && overlap_cpu_addr < cpu_addr_end; } - [[nodiscard]] bool OverlapsGPU(GPUVAddr overlap_gpu_addr, size_t overlap_size) const noexcept { - const VAddr overlap_end = overlap_gpu_addr + overlap_size; - const GPUVAddr gpu_addr_end = gpu_addr + guest_size_bytes; - return gpu_addr < overlap_end && overlap_gpu_addr < gpu_addr_end; - } - void CheckBadOverlapState(); void CheckAliasState(); @@ -92,29 +84,6 @@ struct ImageBase { std::vector aliased_images; std::vector overlapping_images; - ImageMapId map_view_id{}; -}; - -struct ImageMapView { - explicit ImageMapView(GPUVAddr gpu_addr, VAddr cpu_addr, size_t size, ImageId image_id); - - [[nodiscard]] bool Overlaps(VAddr overlap_cpu_addr, size_t overlap_size) const noexcept { - const VAddr overlap_end = overlap_cpu_addr + overlap_size; - const VAddr cpu_addr_end = cpu_addr + size; - return cpu_addr < overlap_end && overlap_cpu_addr < cpu_addr_end; - } - - [[nodiscard]] bool OverlapsGPU(GPUVAddr overlap_gpu_addr, size_t overlap_size) const noexcept { - const GPUVAddr overlap_end = overlap_gpu_addr + overlap_size; - const GPUVAddr gpu_addr_end = gpu_addr + size; - return gpu_addr < overlap_end && overlap_gpu_addr < gpu_addr_end; - } - - GPUVAddr gpu_addr; - VAddr cpu_addr; - size_t size; - ImageId image_id; - bool picked{}; }; struct ImageAllocBase { diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index a3120a25e..917184df8 100755 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -13,7 +13,6 @@ #include #include #include -#include #include #include @@ -153,9 +152,6 @@ public: /// Remove images in a region void UnmapMemory(VAddr cpu_addr, size_t size); - /// Remove images in a region - void UnmapGPUMemory(GPUVAddr gpu_addr, size_t size); - /// Blit an image with the given parameters void BlitImage(const Tegra::Engines::Fermi2D::Surface& dst, const Tegra::Engines::Fermi2D::Surface& src, s32 src_address_offset, @@ -192,22 +188,7 @@ public: private: /// Iterate over all page indices in a range template - static void ForEachCPUPage(VAddr addr, size_t size, Func&& func) { - static constexpr bool RETURNS_BOOL = std::is_same_v, bool>; - const u64 page_end = (addr + size - 1) >> PAGE_BITS; - for (u64 page = addr >> PAGE_BITS; page <= page_end; ++page) { - if constexpr (RETURNS_BOOL) { - if (func(page)) { - break; - } - } else { - func(page); - } - } - } - - template - static void ForEachGPUPage(GPUVAddr addr, size_t size, Func&& func) { + static void ForEachPage(VAddr addr, size_t size, Func&& func) { static constexpr bool RETURNS_BOOL = std::is_same_v, bool>; const u64 page_end = (addr + size - 1) >> PAGE_BITS; for (u64 page = addr >> PAGE_BITS; page <= page_end; ++page) { @@ -237,7 +218,7 @@ private: FramebufferId GetFramebufferId(const RenderTargets& key); /// Refresh the contents (pixel data) of an image - void RefreshContents(Image& image, ImageId image_id); + void RefreshContents(Image& image); /// Upload data from guest to an image template @@ -287,16 +268,6 @@ private: template void ForEachImageInRegion(VAddr cpu_addr, size_t size, Func&& func); - template - void ForEachImageInRegionGPU(GPUVAddr gpu_addr, size_t size, Func&& func); - - template - void ForEachSparseImageInRegion(GPUVAddr gpu_addr, size_t size, Func&& func); - - /// Iterates over all the images in a region calling func - template - void ForEachSparseSegment(ImageBase& image, Func&& func); - /// Find or create an image view in the given image with the passed parameters [[nodiscard]] ImageViewId FindOrEmplaceImageView(ImageId image_id, const ImageViewInfo& info); @@ -307,10 +278,10 @@ private: void UnregisterImage(ImageId image); /// Track CPU reads and writes for image - void TrackImage(ImageBase& image, ImageId image_id); + void TrackImage(ImageBase& image); /// Stop tracking CPU reads and writes for image - void UntrackImage(ImageBase& image, ImageId image_id); + void UntrackImage(ImageBase& image); /// Delete image from the cache void DeleteImage(ImageId image); @@ -368,13 +339,7 @@ private: std::unordered_map samplers; std::unordered_map framebuffers; - std::unordered_map, IdentityHash> page_table; - std::unordered_map, IdentityHash> gpu_page_table; - std::unordered_map, IdentityHash> sparse_page_table; - - std::unordered_map> sparse_views; - - VAddr virtual_invalid_space{}; + std::unordered_map, IdentityHash> page_table; bool has_deleted_images = false; u64 total_used_memory = 0; @@ -383,7 +348,6 @@ private: u64 critical_memory; SlotVector slot_images; - SlotVector slot_map_views; SlotVector slot_image_views; SlotVector slot_image_allocs; SlotVector slot_samplers; @@ -494,7 +458,7 @@ void TextureCache

::RunGarbageCollector() { } } if (True(image->flags & ImageFlagBits::Tracked)) { - UntrackImage(*image, image_id); + UntrackImage(*image); } UnregisterImage(image_id); DeleteImage(image_id); @@ -693,9 +657,7 @@ void TextureCache

::WriteMemory(VAddr cpu_addr, size_t size) { return; } image.flags |= ImageFlagBits::CpuModified; - if (True(image.flags & ImageFlagBits::Tracked)) { - UntrackImage(image, image_id); - } + UntrackImage(image); }); } @@ -732,30 +694,13 @@ void TextureCache

::UnmapMemory(VAddr cpu_addr, size_t size) { for (const ImageId id : deleted_images) { Image& image = slot_images[id]; if (True(image.flags & ImageFlagBits::Tracked)) { - UntrackImage(image, id); + UntrackImage(image); } UnregisterImage(id); DeleteImage(id); } } -template -void TextureCache

::UnmapGPUMemory(GPUVAddr gpu_addr, size_t size) { - std::vector deleted_images; - ForEachImageInRegionGPU(gpu_addr, size, - [&](ImageId id, Image&) { deleted_images.push_back(id); }); - for (const ImageId id : deleted_images) { - Image& image = slot_images[id]; - if (True(image.flags & ImageFlagBits::Remapped)) { - continue; - } - image.flags |= ImageFlagBits::Remapped; - if (True(image.flags & ImageFlagBits::Tracked)) { - UntrackImage(image, id); - } - } -} - template void TextureCache

::BlitImage(const Tegra::Engines::Fermi2D::Surface& dst, const Tegra::Engines::Fermi2D::Surface& src, s32 src_address_offset, @@ -848,10 +793,9 @@ typename P::ImageView* TextureCache

::TryFindFramebufferImageView(VAddr cpu_ad if (it == page_table.end()) { return nullptr; } - const auto& image_map_ids = it->second; - for (const ImageMapId map_id : image_map_ids) { - const ImageMapView& map = slot_map_views[map_id]; - const ImageBase& image = slot_images[map.image_id]; + const auto& image_ids = it->second; + for (const ImageId image_id : image_ids) { + const ImageBase& image = slot_images[image_id]; if (image.cpu_addr != cpu_addr) { continue; } @@ -931,13 +875,13 @@ bool TextureCache

::IsRegionGpuModified(VAddr addr, size_t size) { } template -void TextureCache

::RefreshContents(Image& image, ImageId image_id) { +void TextureCache

::RefreshContents(Image& image) { if (False(image.flags & ImageFlagBits::CpuModified)) { // Only upload modified images return; } image.flags &= ~ImageFlagBits::CpuModified; - TrackImage(image, image_id); + TrackImage(image); if (image.info.num_samples > 1) { LOG_WARNING(HW_GPU, "MSAA image uploads are not implemented"); @@ -974,7 +918,7 @@ void TextureCache

::UploadImageContents(Image& image, StagingBuffer& staging) template ImageViewId TextureCache

::FindImageView(const TICEntry& config) { - if (!IsValidEntry(gpu_memory, config)) { + if (!IsValidAddress(gpu_memory, config)) { return NULL_IMAGE_VIEW_ID; } const auto [pair, is_new] = image_views.try_emplace(config); @@ -1016,20 +960,14 @@ ImageId TextureCache

::FindOrInsertImage(const ImageInfo& info, GPUVAddr gpu_a template ImageId TextureCache

::FindImage(const ImageInfo& info, GPUVAddr gpu_addr, RelaxedOptions options) { - std::optional cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); + const std::optional cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); if (!cpu_addr) { - cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr, CalculateGuestSizeInBytes(info)); - if (!cpu_addr) { - return ImageId{}; - } + return ImageId{}; } const bool broken_views = runtime.HasBrokenTextureViewFormats(); const bool native_bgr = runtime.HasNativeBgr(); ImageId image_id; const auto lambda = [&](ImageId existing_image_id, ImageBase& existing_image) { - if (True(existing_image.flags & ImageFlagBits::Remapped)) { - return false; - } if (info.type == ImageType::Linear || existing_image.info.type == ImageType::Linear) { const bool strict_size = False(options & RelaxedOptions::Size) && True(existing_image.flags & ImageFlagBits::Strong); @@ -1055,16 +993,7 @@ ImageId TextureCache

::FindImage(const ImageInfo& info, GPUVAddr gpu_addr, template ImageId TextureCache

::InsertImage(const ImageInfo& info, GPUVAddr gpu_addr, RelaxedOptions options) { - std::optional cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); - if (!cpu_addr) { - const auto size = CalculateGuestSizeInBytes(info); - cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr, size); - if (!cpu_addr) { - const VAddr fake_addr = ~(1ULL << 40ULL) + virtual_invalid_space; - virtual_invalid_space += Common::AlignUp(size, 32); - cpu_addr = std::optional(fake_addr); - } - } + const std::optional cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); ASSERT_MSG(cpu_addr, "Tried to insert an image to an invalid gpu_addr=0x{:x}", gpu_addr); const ImageId image_id = JoinImages(info, gpu_addr, *cpu_addr); const Image& image = slot_images[image_id]; @@ -1084,19 +1013,10 @@ ImageId TextureCache

::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA const bool broken_views = runtime.HasBrokenTextureViewFormats(); const bool native_bgr = runtime.HasNativeBgr(); std::vector overlap_ids; - std::unordered_set overlaps_found; std::vector left_aliased_ids; std::vector right_aliased_ids; - std::unordered_set ignore_textures; std::vector bad_overlap_ids; - const auto region_check = [&](ImageId overlap_id, ImageBase& overlap) { - if (True(overlap.flags & ImageFlagBits::Remapped)) { - ignore_textures.insert(overlap_id); - return; - } - if (info.type != overlap.info.type) { - return; - } + ForEachImageInRegion(cpu_addr, size_bytes, [&](ImageId overlap_id, ImageBase& overlap) { if (info.type == ImageType::Linear) { if (info.pitch == overlap.info.pitch && gpu_addr == overlap.gpu_addr) { // Alias linear images with the same pitch @@ -1104,7 +1024,6 @@ ImageId TextureCache

::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA } return; } - overlaps_found.insert(overlap_id); static constexpr bool strict_size = true; const std::optional solution = ResolveOverlap( new_info, gpu_addr, cpu_addr, overlap, strict_size, broken_views, native_bgr); @@ -1128,35 +1047,12 @@ ImageId TextureCache

::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA bad_overlap_ids.push_back(overlap_id); overlap.flags |= ImageFlagBits::BadOverlap; } - }; - ForEachImageInRegion(cpu_addr, size_bytes, region_check); - const auto region_check_gpu = [&](ImageId overlap_id, ImageBase& overlap) { - if (!overlaps_found.contains(overlap_id)) { - ignore_textures.insert(overlap_id); - } - }; - ForEachSparseImageInRegion(gpu_addr, size_bytes, region_check_gpu); + }); const ImageId new_image_id = slot_images.insert(runtime, new_info, gpu_addr, cpu_addr); Image& new_image = slot_images[new_image_id]; - if (!gpu_memory.IsContinousRange(new_image.gpu_addr, new_image.guest_size_bytes)) { - new_image.flags |= ImageFlagBits::Sparse; - } - - for (const ImageId overlap_id : ignore_textures) { - Image& overlap = slot_images[overlap_id]; - if (True(overlap.flags & ImageFlagBits::GpuModified)) { - UNIMPLEMENTED(); - } - if (True(overlap.flags & ImageFlagBits::Tracked)) { - UntrackImage(overlap, overlap_id); - } - UnregisterImage(overlap_id); - DeleteImage(overlap_id); - } - // TODO: Only upload what we need - RefreshContents(new_image, new_image_id); + RefreshContents(new_image); for (const ImageId overlap_id : overlap_ids) { Image& overlap = slot_images[overlap_id]; @@ -1168,7 +1064,7 @@ ImageId TextureCache

::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA runtime.CopyImage(new_image, overlap, copies); } if (True(overlap.flags & ImageFlagBits::Tracked)) { - UntrackImage(overlap, overlap_id); + UntrackImage(overlap); } UnregisterImage(overlap_id); DeleteImage(overlap_id); @@ -1304,8 +1200,7 @@ void TextureCache

::ForEachImageInRegion(VAddr cpu_addr, size_t size, Func&& f using FuncReturn = typename std::invoke_result::type; static constexpr bool BOOL_BREAK = std::is_same_v; boost::container::small_vector images; - boost::container::small_vector maps; - ForEachCPUPage(cpu_addr, size, [this, &images, &maps, cpu_addr, size, func](u64 page) { + ForEachPage(cpu_addr, size, [this, &images, cpu_addr, size, func](u64 page) { const auto it = page_table.find(page); if (it == page_table.end()) { if constexpr (BOOL_BREAK) { @@ -1314,63 +1209,12 @@ void TextureCache

::ForEachImageInRegion(VAddr cpu_addr, size_t size, Func&& f return; } } - for (const ImageMapId map_id : it->second) { - ImageMapView& map = slot_map_views[map_id]; - if (map.picked) { - continue; - } - if (!map.Overlaps(cpu_addr, size)) { - continue; - } - map.picked = true; - maps.push_back(map_id); - Image& image = slot_images[map.image_id]; - if (True(image.flags & ImageFlagBits::Picked)) { - continue; - } - image.flags |= ImageFlagBits::Picked; - images.push_back(map.image_id); - if constexpr (BOOL_BREAK) { - if (func(map.image_id, image)) { - return true; - } - } else { - func(map.image_id, image); - } - } - if constexpr (BOOL_BREAK) { - return false; - } - }); - for (const ImageId image_id : images) { - slot_images[image_id].flags &= ~ImageFlagBits::Picked; - } - for (const ImageMapId map_id : maps) { - slot_map_views[map_id].picked = false; - } -} - -template -template -void TextureCache

::ForEachImageInRegionGPU(GPUVAddr gpu_addr, size_t size, Func&& func) { - using FuncReturn = typename std::invoke_result::type; - static constexpr bool BOOL_BREAK = std::is_same_v; - boost::container::small_vector images; - ForEachGPUPage(gpu_addr, size, [this, &images, gpu_addr, size, func](u64 page) { - const auto it = gpu_page_table.find(page); - if (it == gpu_page_table.end()) { - if constexpr (BOOL_BREAK) { - return false; - } else { - return; - } - } for (const ImageId image_id : it->second) { Image& image = slot_images[image_id]; if (True(image.flags & ImageFlagBits::Picked)) { continue; } - if (!image.OverlapsGPU(gpu_addr, size)) { + if (!image.Overlaps(cpu_addr, size)) { continue; } image.flags |= ImageFlagBits::Picked; @@ -1392,69 +1236,6 @@ void TextureCache

::ForEachImageInRegionGPU(GPUVAddr gpu_addr, size_t size, Fu } } -template -template -void TextureCache

::ForEachSparseImageInRegion(GPUVAddr gpu_addr, size_t size, Func&& func) { - using FuncReturn = typename std::invoke_result::type; - static constexpr bool BOOL_BREAK = std::is_same_v; - boost::container::small_vector images; - ForEachGPUPage(gpu_addr, size, [this, &images, gpu_addr, size, func](u64 page) { - const auto it = sparse_page_table.find(page); - if (it == sparse_page_table.end()) { - if constexpr (BOOL_BREAK) { - return false; - } else { - return; - } - } - for (const ImageId image_id : it->second) { - Image& image = slot_images[image_id]; - if (True(image.flags & ImageFlagBits::Picked)) { - continue; - } - if (!image.OverlapsGPU(gpu_addr, size)) { - continue; - } - image.flags |= ImageFlagBits::Picked; - images.push_back(image_id); - if constexpr (BOOL_BREAK) { - if (func(image_id, image)) { - return true; - } - } else { - func(image_id, image); - } - } - if constexpr (BOOL_BREAK) { - return false; - } - }); - for (const ImageId image_id : images) { - slot_images[image_id].flags &= ~ImageFlagBits::Picked; - } -} - -template -template -void TextureCache

::ForEachSparseSegment(ImageBase& image, Func&& func) { - using FuncReturn = typename std::invoke_result::type; - static constexpr bool RETURNS_BOOL = std::is_same_v; - const auto segments = gpu_memory.GetSubmappedRange(image.gpu_addr, image.guest_size_bytes); - for (auto& segment : segments) { - const auto gpu_addr = segment.first; - const auto size = segment.second; - std::optional cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); - ASSERT(cpu_addr); - if constexpr (RETURNS_BOOL) { - if (func(gpu_addr, *cpu_addr, size)) { - break; - } - } else { - func(gpu_addr, *cpu_addr, size); - } - } -} - template ImageViewId TextureCache

::FindOrEmplaceImageView(ImageId image_id, const ImageViewInfo& info) { Image& image = slot_images[image_id]; @@ -1472,6 +1253,8 @@ void TextureCache

::RegisterImage(ImageId image_id) { ASSERT_MSG(False(image.flags & ImageFlagBits::Registered), "Trying to register an already registered image"); image.flags |= ImageFlagBits::Registered; + ForEachPage(image.cpu_addr, image.guest_size_bytes, + [this, image_id](u64 page) { page_table[page].push_back(image_id); }); u64 tentative_size = std::max(image.guest_size_bytes, image.unswizzled_size_bytes); if ((IsPixelFormatASTC(image.info.format) && True(image.flags & ImageFlagBits::AcceleratedUpload)) || @@ -1479,27 +1262,6 @@ void TextureCache

::RegisterImage(ImageId image_id) { tentative_size = EstimatedDecompressedSize(tentative_size, image.info.format); } total_used_memory += Common::AlignUp(tentative_size, 1024); - ForEachGPUPage(image.gpu_addr, image.guest_size_bytes, - [this, image_id](u64 page) { gpu_page_table[page].push_back(image_id); }); - if (False(image.flags & ImageFlagBits::Sparse)) { - auto map_id = - slot_map_views.insert(image.gpu_addr, image.cpu_addr, image.guest_size_bytes, image_id); - ForEachCPUPage(image.cpu_addr, image.guest_size_bytes, - [this, map_id](u64 page) { page_table[page].push_back(map_id); }); - image.map_view_id = map_id; - return; - } - std::vector sparse_maps{}; - ForEachSparseSegment( - image, [this, image_id, &sparse_maps](GPUVAddr gpu_addr, VAddr cpu_addr, size_t size) { - auto map_id = slot_map_views.insert(gpu_addr, cpu_addr, size, image_id); - ForEachCPUPage(cpu_addr, size, - [this, map_id](u64 page) { page_table[page].push_back(map_id); }); - sparse_maps.push_back(map_id); - }); - sparse_views.emplace(image_id, std::move(sparse_maps)); - ForEachGPUPage(image.gpu_addr, image.guest_size_bytes, - [this, image_id](u64 page) { sparse_page_table[page].push_back(image_id); }); } template @@ -1516,125 +1278,34 @@ void TextureCache

::UnregisterImage(ImageId image_id) { tentative_size = EstimatedDecompressedSize(tentative_size, image.info.format); } total_used_memory -= Common::AlignUp(tentative_size, 1024); - const auto& clear_page_table = - [this, image_id]( - u64 page, - std::unordered_map, IdentityHash>& selected_page_table) { - const auto page_it = selected_page_table.find(page); - if (page_it == selected_page_table.end()) { - UNREACHABLE_MSG("Unregistering unregistered page=0x{:x}", page << PAGE_BITS); - return; - } - std::vector& image_ids = page_it->second; - const auto vector_it = std::ranges::find(image_ids, image_id); - if (vector_it == image_ids.end()) { - UNREACHABLE_MSG("Unregistering unregistered image in page=0x{:x}", - page << PAGE_BITS); - return; - } - image_ids.erase(vector_it); - }; - ForEachGPUPage(image.gpu_addr, image.guest_size_bytes, - [this, &clear_page_table](u64 page) { clear_page_table(page, gpu_page_table); }); - if (False(image.flags & ImageFlagBits::Sparse)) { - const auto map_id = image.map_view_id; - ForEachCPUPage(image.cpu_addr, image.guest_size_bytes, [this, map_id](u64 page) { - const auto page_it = page_table.find(page); - if (page_it == page_table.end()) { - UNREACHABLE_MSG("Unregistering unregistered page=0x{:x}", page << PAGE_BITS); - return; - } - std::vector& image_map_ids = page_it->second; - const auto vector_it = std::ranges::find(image_map_ids, map_id); - if (vector_it == image_map_ids.end()) { - UNREACHABLE_MSG("Unregistering unregistered image in page=0x{:x}", - page << PAGE_BITS); - return; - } - image_map_ids.erase(vector_it); - }); - slot_map_views.erase(map_id); - return; - } - ForEachGPUPage(image.gpu_addr, image.guest_size_bytes, [this, &clear_page_table](u64 page) { - clear_page_table(page, sparse_page_table); + ForEachPage(image.cpu_addr, image.guest_size_bytes, [this, image_id](u64 page) { + const auto page_it = page_table.find(page); + if (page_it == page_table.end()) { + UNREACHABLE_MSG("Unregistering unregistered page=0x{:x}", page << PAGE_BITS); + return; + } + std::vector& image_ids = page_it->second; + const auto vector_it = std::ranges::find(image_ids, image_id); + if (vector_it == image_ids.end()) { + UNREACHABLE_MSG("Unregistering unregistered image in page=0x{:x}", page << PAGE_BITS); + return; + } + image_ids.erase(vector_it); }); - auto it = sparse_views.find(image_id); - ASSERT(it != sparse_views.end()); - auto& sparse_maps = it->second; - for (auto& map_view_id : sparse_maps) { - const auto& map_range = slot_map_views[map_view_id]; - const VAddr cpu_addr = map_range.cpu_addr; - const std::size_t size = map_range.size; - ForEachCPUPage(cpu_addr, size, [this, image_id](u64 page) { - const auto page_it = page_table.find(page); - if (page_it == page_table.end()) { - UNREACHABLE_MSG("Unregistering unregistered page=0x{:x}", page << PAGE_BITS); - return; - } - std::vector& image_map_ids = page_it->second; - auto vector_it = image_map_ids.begin(); - while (vector_it != image_map_ids.end()) { - ImageMapView& map = slot_map_views[*vector_it]; - if (map.image_id != image_id) { - vector_it++; - continue; - } - if (!map.picked) { - map.picked = true; - } - vector_it = image_map_ids.erase(vector_it); - } - }); - slot_map_views.erase(map_view_id); - } - sparse_views.erase(it); } template -void TextureCache

::TrackImage(ImageBase& image, ImageId image_id) { +void TextureCache

::TrackImage(ImageBase& image) { ASSERT(False(image.flags & ImageFlagBits::Tracked)); image.flags |= ImageFlagBits::Tracked; - if (False(image.flags & ImageFlagBits::Sparse)) { - rasterizer.UpdatePagesCachedCount(image.cpu_addr, image.guest_size_bytes, 1); - return; - } - if (True(image.flags & ImageFlagBits::Registered)) { - auto it = sparse_views.find(image_id); - ASSERT(it != sparse_views.end()); - auto& sparse_maps = it->second; - for (auto& map_view_id : sparse_maps) { - const auto& map = slot_map_views[map_view_id]; - const VAddr cpu_addr = map.cpu_addr; - const std::size_t size = map.size; - rasterizer.UpdatePagesCachedCount(cpu_addr, size, 1); - } - return; - } - ForEachSparseSegment(image, - [this]([[maybe_unused]] GPUVAddr gpu_addr, VAddr cpu_addr, size_t size) { - rasterizer.UpdatePagesCachedCount(cpu_addr, size, 1); - }); + rasterizer.UpdatePagesCachedCount(image.cpu_addr, image.guest_size_bytes, 1); } template -void TextureCache

::UntrackImage(ImageBase& image, ImageId image_id) { +void TextureCache

::UntrackImage(ImageBase& image) { ASSERT(True(image.flags & ImageFlagBits::Tracked)); image.flags &= ~ImageFlagBits::Tracked; - if (False(image.flags & ImageFlagBits::Sparse)) { - rasterizer.UpdatePagesCachedCount(image.cpu_addr, image.guest_size_bytes, -1); - return; - } - ASSERT(True(image.flags & ImageFlagBits::Registered)); - auto it = sparse_views.find(image_id); - ASSERT(it != sparse_views.end()); - auto& sparse_maps = it->second; - for (auto& map_view_id : sparse_maps) { - const auto& map = slot_map_views[map_view_id]; - const VAddr cpu_addr = map.cpu_addr; - const std::size_t size = map.size; - rasterizer.UpdatePagesCachedCount(cpu_addr, size, -1); - } + rasterizer.UpdatePagesCachedCount(image.cpu_addr, image.guest_size_bytes, -1); } template @@ -1776,10 +1447,10 @@ void TextureCache

::PrepareImage(ImageId image_id, bool is_modification, bool if (invalidate) { image.flags &= ~(ImageFlagBits::CpuModified | ImageFlagBits::GpuModified); if (False(image.flags & ImageFlagBits::Tracked)) { - TrackImage(image, image_id); + TrackImage(image); } } else { - RefreshContents(image, image_id); + RefreshContents(image); SynchronizeAliases(image_id); } if (is_modification) { diff --git a/src/video_core/texture_cache/types.h b/src/video_core/texture_cache/types.h index 9fbdc1ac6..c9571f7e4 100755 --- a/src/video_core/texture_cache/types.h +++ b/src/video_core/texture_cache/types.h @@ -16,7 +16,6 @@ constexpr size_t MAX_MIP_LEVELS = 14; constexpr SlotId CORRUPT_ID{0xfffffffe}; using ImageId = SlotId; -using ImageMapId = SlotId; using ImageViewId = SlotId; using ImageAllocId = SlotId; using SamplerId = SlotId; diff --git a/src/video_core/texture_cache/util.cpp b/src/video_core/texture_cache/util.cpp index 10093a11d..4efe042b6 100755 --- a/src/video_core/texture_cache/util.cpp +++ b/src/video_core/texture_cache/util.cpp @@ -664,16 +664,6 @@ LevelArray CalculateMipLevelOffsets(const ImageInfo& info) noexcept { return offsets; } -LevelArray CalculateMipLevelSizes(const ImageInfo& info) noexcept { - const u32 num_levels = info.resources.levels; - const LevelInfo level_info = MakeLevelInfo(info); - LevelArray sizes{}; - for (u32 level = 0; level < num_levels; ++level) { - sizes[level] = CalculateLevelSize(level_info, level); - } - return sizes; -} - std::vector CalculateSliceOffsets(const ImageInfo& info) { ASSERT(info.type == ImageType::e3D); std::vector offsets; @@ -786,20 +776,14 @@ std::vector MakeShrinkImageCopies(const ImageInfo& dst, const ImageIn return copies; } -bool IsValidEntry(const Tegra::MemoryManager& gpu_memory, const TICEntry& config) { - const GPUVAddr address = config.Address(); - if (address == 0) { +bool IsValidAddress(const Tegra::MemoryManager& gpu_memory, const TICEntry& config) { + if (config.Address() == 0) { return false; } - if (address > (1ULL << 48)) { + if (config.Address() > (u64(1) << 48)) { return false; } - if (gpu_memory.GpuToCpuAddress(address).has_value()) { - return true; - } - const ImageInfo info{config}; - const size_t guest_size_bytes = CalculateGuestSizeInBytes(info); - return gpu_memory.GpuToCpuAddress(address, guest_size_bytes).has_value(); + return gpu_memory.GpuToCpuAddress(config.Address()).has_value(); } std::vector UnswizzleImage(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr, diff --git a/src/video_core/texture_cache/util.h b/src/video_core/texture_cache/util.h index 766502908..cdc5cbc75 100755 --- a/src/video_core/texture_cache/util.h +++ b/src/video_core/texture_cache/util.h @@ -40,8 +40,6 @@ struct OverlapResult { [[nodiscard]] LevelArray CalculateMipLevelOffsets(const ImageInfo& info) noexcept; -[[nodiscard]] LevelArray CalculateMipLevelSizes(const ImageInfo& info) noexcept; - [[nodiscard]] std::vector CalculateSliceOffsets(const ImageInfo& info); [[nodiscard]] std::vector CalculateSliceSubresources(const ImageInfo& info); @@ -57,7 +55,7 @@ struct OverlapResult { const ImageInfo& src, SubresourceBase base); -[[nodiscard]] bool IsValidEntry(const Tegra::MemoryManager& gpu_memory, const TICEntry& config); +[[nodiscard]] bool IsValidAddress(const Tegra::MemoryManager& gpu_memory, const TICEntry& config); [[nodiscard]] std::vector UnswizzleImage(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr, const ImageInfo& info,