From b5cdabcc51365582b931a75607297eb3cf774b2a Mon Sep 17 00:00:00 2001 From: pineappleEA Date: Sat, 7 Oct 2023 20:35:47 +0200 Subject: [PATCH] early-access version 3913 --- README.md | 2 +- src/android/app/src/main/jni/native.cpp | 1 - src/video_core/renderer_base.h | 3 - src/video_core/renderer_vulkan/blit_image.cpp | 17 +++--- .../renderer_vulkan/renderer_vulkan.h | 4 -- .../renderer_vulkan/vk_present_manager.cpp | 59 ++++++++----------- .../renderer_vulkan/vk_present_manager.h | 10 ++-- .../renderer_vulkan/vk_rasterizer.cpp | 3 +- .../vk_staging_buffer_pool.cpp | 33 +++++++---- .../renderer_vulkan/vk_staging_buffer_pool.h | 5 ++ .../vulkan_common/vulkan_memory_allocator.cpp | 21 +++++-- .../vulkan_common/vulkan_memory_allocator.h | 14 +++++ src/video_core/vulkan_common/vulkan_wrapper.h | 3 + 13 files changed, 103 insertions(+), 72 deletions(-) diff --git a/README.md b/README.md index 9eb52baaf..84a8b26ad 100755 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ yuzu emulator early access ============= -This is the source code for early-access 3912. +This is the source code for early-access 3913. ## Legal Notice diff --git a/src/android/app/src/main/jni/native.cpp b/src/android/app/src/main/jni/native.cpp index 9cf71680c..598f4e8bf 100755 --- a/src/android/app/src/main/jni/native.cpp +++ b/src/android/app/src/main/jni/native.cpp @@ -218,7 +218,6 @@ public: return; } m_window->OnSurfaceChanged(m_native_window); - m_system.Renderer().NotifySurfaceChanged(); } void ConfigureFilesystemProvider(const std::string& filepath) { diff --git a/src/video_core/renderer_base.h b/src/video_core/renderer_base.h index 93abc1677..c17067e7e 100755 --- a/src/video_core/renderer_base.h +++ b/src/video_core/renderer_base.h @@ -89,9 +89,6 @@ public: void RequestScreenshot(void* data, std::function callback, const Layout::FramebufferLayout& layout); - /// This is called to notify the rendering backend of a surface change - virtual void NotifySurfaceChanged() {} - protected: Core::Frontend::EmuWindow& render_window; ///< Reference to the render window handle. std::unique_ptr context; diff --git a/src/video_core/renderer_vulkan/blit_image.cpp b/src/video_core/renderer_vulkan/blit_image.cpp index f6a744ef0..abcbf83b1 100755 --- a/src/video_core/renderer_vulkan/blit_image.cpp +++ b/src/video_core/renderer_vulkan/blit_image.cpp @@ -618,13 +618,14 @@ void BlitImageHelper::ClearDepthStencil(const Framebuffer* dst_framebuffer, bool const VkPipeline pipeline = FindOrEmplaceClearStencilPipeline(key); const VkPipelineLayout layout = *clear_color_pipeline_layout; scheduler.RequestRenderpass(dst_framebuffer); - scheduler.Record( - [pipeline, layout, clear_depth, dst_region](vk::CommandBuffer cmdbuf, vk::CommandBuffer) { - cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline); - BindBlitState(cmdbuf, dst_region); - cmdbuf.PushConstants(layout, VK_SHADER_STAGE_FRAGMENT_BIT, clear_depth); - cmdbuf.Draw(3, 1, 0, 0); - }); + scheduler.Record([pipeline, layout, clear_depth, dst_region](vk::CommandBuffer cmdbuf, vk::CommandBuffer) { + constexpr std::array blend_constants{0.0f, 0.0f, 0.0f, 0.0f}; + cmdbuf.SetBlendConstants(blend_constants.data()); + cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline); + BindBlitState(cmdbuf, dst_region); + cmdbuf.PushConstants(layout, VK_SHADER_STAGE_FRAGMENT_BIT, clear_depth); + cmdbuf.Draw(3, 1, 0, 0); + }); scheduler.InvalidateState(); } @@ -877,7 +878,7 @@ VkPipeline BlitImageHelper::FindOrEmplaceClearStencilPipeline( .sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO, .pNext = nullptr, .flags = 0, - .depthTestEnable = VK_FALSE, + .depthTestEnable = key.depth_clear, .depthWriteEnable = key.depth_clear, .depthCompareOp = VK_COMPARE_OP_ALWAYS, .depthBoundsTestEnable = VK_FALSE, diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.h b/src/video_core/renderer_vulkan/renderer_vulkan.h index c82b8fea2..606f2aea2 100755 --- a/src/video_core/renderer_vulkan/renderer_vulkan.h +++ b/src/video_core/renderer_vulkan/renderer_vulkan.h @@ -56,10 +56,6 @@ public: return device.GetDriverName(); } - void NotifySurfaceChanged() override { - present_manager.NotifySurfaceChanged(); - } - private: void Report() const; diff --git a/src/video_core/renderer_vulkan/vk_present_manager.cpp b/src/video_core/renderer_vulkan/vk_present_manager.cpp index 752723352..5ba2b3bb3 100755 --- a/src/video_core/renderer_vulkan/vk_present_manager.cpp +++ b/src/video_core/renderer_vulkan/vk_present_manager.cpp @@ -103,8 +103,7 @@ PresentManager::PresentManager(const vk::Instance& instance_, surface{surface_}, blit_supported{CanBlitToSwapchain(device.GetPhysical(), swapchain.GetImageViewFormat())}, use_present_thread{Settings::values.async_presentation.GetValue()}, - image_count{swapchain.GetImageCount()}, last_render_surface{ - render_window_.GetWindowInfo().render_surface} { + image_count{swapchain.GetImageCount()} { auto& dld = device.GetLogical(); cmdpool = dld.CreateCommandPool({ @@ -289,44 +288,36 @@ void PresentManager::PresentThread(std::stop_token token) { } } -void PresentManager::NotifySurfaceChanged() { -#ifdef ANDROID - std::scoped_lock lock{recreate_surface_mutex}; - recreate_surface_cv.notify_one(); -#endif +void PresentManager::RecreateSwapchain(Frame* frame) { + swapchain.Create(*surface, frame->width, frame->height, frame->is_srgb); + image_count = swapchain.GetImageCount(); } void PresentManager::CopyToSwapchain(Frame* frame) { - MICROPROFILE_SCOPE(Vulkan_CopyToSwapchain); + bool requires_recreation = false; - const auto recreate_swapchain = [&] { - swapchain.Create(*surface, frame->width, frame->height, frame->is_srgb); - image_count = swapchain.GetImageCount(); - }; + while (true) { + try { + // Recreate surface and swapchain if needed. + if (requires_recreation) { + surface = CreateSurface(instance, render_window.GetWindowInfo()); + RecreateSwapchain(frame); + } -#ifdef ANDROID - std::unique_lock lock{recreate_surface_mutex}; + // Draw to swapchain. + return CopyToSwapchainImpl(frame); + } catch (const vk::Exception& except) { + if (except.GetResult() != VK_ERROR_SURFACE_LOST_KHR) { + throw; + } - const auto needs_recreation = [&] { - if (last_render_surface != render_window.GetWindowInfo().render_surface) { - return true; + requires_recreation = true; } - if (swapchain.NeedsRecreation(frame->is_srgb)) { - return true; - } - return false; - }; - - recreate_surface_cv.wait_for(lock, std::chrono::milliseconds(400), - [&]() { return !needs_recreation(); }); - - // If the frontend recreated the surface, recreate the renderer surface and swapchain. - if (last_render_surface != render_window.GetWindowInfo().render_surface) { - last_render_surface = render_window.GetWindowInfo().render_surface; - surface = CreateSurface(instance, render_window.GetWindowInfo()); - recreate_swapchain(); } -#endif +} + +void PresentManager::CopyToSwapchainImpl(Frame* frame) { + MICROPROFILE_SCOPE(Vulkan_CopyToSwapchain); // If the size or colorspace of the incoming frames has changed, recreate the swapchain // to account for that. @@ -334,11 +325,11 @@ void PresentManager::CopyToSwapchain(Frame* frame) { const bool size_changed = swapchain.GetWidth() != frame->width || swapchain.GetHeight() != frame->height; if (srgb_changed || size_changed) { - recreate_swapchain(); + RecreateSwapchain(frame); } while (swapchain.AcquireNextImage()) { - recreate_swapchain(); + RecreateSwapchain(frame); } const vk::CommandBuffer cmdbuf{frame->cmdbuf}; diff --git a/src/video_core/renderer_vulkan/vk_present_manager.h b/src/video_core/renderer_vulkan/vk_present_manager.h index 83e859416..a3d825fe6 100755 --- a/src/video_core/renderer_vulkan/vk_present_manager.h +++ b/src/video_core/renderer_vulkan/vk_present_manager.h @@ -54,14 +54,15 @@ public: /// Waits for the present thread to finish presenting all queued frames. void WaitPresent(); - /// This is called to notify the rendering backend of a surface change - void NotifySurfaceChanged(); - private: void PresentThread(std::stop_token token); void CopyToSwapchain(Frame* frame); + void CopyToSwapchainImpl(Frame* frame); + + void RecreateSwapchain(Frame* frame); + private: const vk::Instance& instance; Core::Frontend::EmuWindow& render_window; @@ -76,16 +77,13 @@ private: std::queue free_queue; std::condition_variable_any frame_cv; std::condition_variable free_cv; - std::condition_variable recreate_surface_cv; std::mutex swapchain_mutex; - std::mutex recreate_surface_mutex; std::mutex queue_mutex; std::mutex free_mutex; std::jthread present_thread; bool blit_supported; bool use_present_thread; std::size_t image_count{}; - void* last_render_surface{}; }; } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 3cdd2721d..169cc7e83 100755 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -424,7 +424,8 @@ void RasterizerVulkan::Clear(u32 layer_count) { return; } - if (use_stencil && regs.stencil_front_mask != 0xFF && regs.stencil_front_mask != 0) { + if (use_stencil && framebuffer->HasAspectStencilBit() && regs.stencil_front_mask != 0xFF && + regs.stencil_front_mask != 0) { Region2D dst_region = { Offset2D{.x = clear_rect.rect.offset.x, .y = clear_rect.rect.offset.y}, Offset2D{.x = clear_rect.rect.offset.x + static_cast(clear_rect.rect.extent.width), diff --git a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp index c5a18ee62..bdd02e16c 100755 --- a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp +++ b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp @@ -24,25 +24,38 @@ using namespace Common::Literals; // Maximum potential alignment of a Vulkan buffer constexpr VkDeviceSize MAX_ALIGNMENT = 256; -// Maximum size to put elements in the stream buffer -constexpr VkDeviceSize MAX_STREAM_BUFFER_REQUEST_SIZE = 8_MiB; // Stream buffer size in bytes -constexpr VkDeviceSize STREAM_BUFFER_SIZE = 128_MiB; -constexpr VkDeviceSize REGION_SIZE = STREAM_BUFFER_SIZE / StagingBufferPool::NUM_SYNCS; +constexpr VkDeviceSize MAX_STREAM_BUFFER_SIZE = 128_MiB; -size_t Region(size_t iterator) noexcept { - return iterator / REGION_SIZE; +size_t GetStreamBufferSize(const Device& device) { + VkDeviceSize size{0}; + if (device.HasDebuggingToolAttached()) { + ForEachDeviceLocalHostVisibleHeap(device, [&size](size_t index, VkMemoryHeap& heap) { + size = std::max(size, heap.size); + }); + // If rebar is not supported, cut the max heap size to 40%. This will allow 2 captures to be + // loaded at the same time in RenderDoc. If rebar is supported, this shouldn't be an issue + // as the heap will be much larger. + if (size <= 256_MiB) { + size = size * 40 / 100; + } + } else { + size = MAX_STREAM_BUFFER_SIZE; + } + return std::min(Common::AlignUp(size, MAX_ALIGNMENT), MAX_STREAM_BUFFER_SIZE); } } // Anonymous namespace StagingBufferPool::StagingBufferPool(const Device& device_, MemoryAllocator& memory_allocator_, Scheduler& scheduler_) - : device{device_}, memory_allocator{memory_allocator_}, scheduler{scheduler_} { + : device{device_}, memory_allocator{memory_allocator_}, scheduler{scheduler_}, + stream_buffer_size{GetStreamBufferSize(device)}, region_size{stream_buffer_size / + StagingBufferPool::NUM_SYNCS} { VkBufferCreateInfo stream_ci = { .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, .pNext = nullptr, .flags = 0, - .size = STREAM_BUFFER_SIZE, + .size = stream_buffer_size, .usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_INDEX_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, .sharingMode = VK_SHARING_MODE_EXCLUSIVE, @@ -63,7 +76,7 @@ StagingBufferPool::StagingBufferPool(const Device& device_, MemoryAllocator& mem StagingBufferPool::~StagingBufferPool() = default; StagingBufferRef StagingBufferPool::Request(size_t size, MemoryUsage usage, bool deferred) { - if (!deferred && usage == MemoryUsage::Upload && size <= MAX_STREAM_BUFFER_REQUEST_SIZE) { + if (!deferred && usage == MemoryUsage::Upload && size <= region_size) { return GetStreamBuffer(size); } return GetStagingBuffer(size, usage, deferred); @@ -101,7 +114,7 @@ StagingBufferRef StagingBufferPool::GetStreamBuffer(size_t size) { used_iterator = iterator; free_iterator = std::max(free_iterator, iterator + size); - if (iterator + size >= STREAM_BUFFER_SIZE) { + if (iterator + size >= stream_buffer_size) { std::fill(sync_ticks.begin() + Region(used_iterator), sync_ticks.begin() + NUM_SYNCS, current_tick); used_iterator = 0; diff --git a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h index 087d7bb75..d1aed7f99 100755 --- a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h +++ b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h @@ -94,6 +94,9 @@ private: void ReleaseCache(MemoryUsage usage); void ReleaseLevel(StagingBuffersCache& cache, size_t log2); + size_t Region(size_t iter) const noexcept { + return iter / region_size; + } const Device& device; MemoryAllocator& memory_allocator; @@ -101,6 +104,8 @@ private: vk::Buffer stream_buffer; std::span stream_pointer; + VkDeviceSize stream_buffer_size; + VkDeviceSize region_size; size_t iterator = 0; size_t used_iterator = 0; diff --git a/src/video_core/vulkan_common/vulkan_memory_allocator.cpp b/src/video_core/vulkan_common/vulkan_memory_allocator.cpp index 59849a85f..4b52563c7 100755 --- a/src/video_core/vulkan_common/vulkan_memory_allocator.cpp +++ b/src/video_core/vulkan_common/vulkan_memory_allocator.cpp @@ -9,6 +9,7 @@ #include "common/alignment.h" #include "common/assert.h" #include "common/common_types.h" +#include "common/literals.h" #include "common/logging/log.h" #include "common/polyfill_ranges.h" #include "video_core/vulkan_common/vma.h" @@ -69,8 +70,7 @@ struct Range { case MemoryUsage::Download: return VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT; case MemoryUsage::DeviceLocal: - return VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT | - VMA_ALLOCATION_CREATE_HOST_ACCESS_ALLOW_TRANSFER_INSTEAD_BIT; + return {}; } return {}; } @@ -212,7 +212,20 @@ MemoryAllocator::MemoryAllocator(const Device& device_) : device{device_}, allocator{device.GetAllocator()}, properties{device_.GetPhysical().GetMemoryProperties().memoryProperties}, buffer_image_granularity{ - device_.GetPhysical().GetProperties().limits.bufferImageGranularity} {} + device_.GetPhysical().GetProperties().limits.bufferImageGranularity} { + // GPUs not supporting rebar may only have a region with less than 256MB host visible/device + // local memory. In that case, opening 2 RenderDoc captures side-by-side is not possible due to + // the heap running out of memory. With RenderDoc attached and only a small host/device region, + // only allow the stream buffer in this memory heap. + if (device.HasDebuggingToolAttached()) { + using namespace Common::Literals; + ForEachDeviceLocalHostVisibleHeap(device, [this](size_t index, VkMemoryHeap& heap) { + if (heap.size <= 256_MiB) { + valid_memory_types &= ~(1u << index); + } + }); + } +} MemoryAllocator::~MemoryAllocator() = default; @@ -244,7 +257,7 @@ vk::Buffer MemoryAllocator::CreateBuffer(const VkBufferCreateInfo& ci, MemoryUsa .usage = MemoryUsageVma(usage), .requiredFlags = 0, .preferredFlags = MemoryUsagePreferedVmaFlags(usage), - .memoryTypeBits = 0, + .memoryTypeBits = usage == MemoryUsage::Stream ? 0u : valid_memory_types, .pool = VK_NULL_HANDLE, .pUserData = nullptr, .priority = 0.f, diff --git a/src/video_core/vulkan_common/vulkan_memory_allocator.h b/src/video_core/vulkan_common/vulkan_memory_allocator.h index b60e8e346..b5e511f94 100755 --- a/src/video_core/vulkan_common/vulkan_memory_allocator.h +++ b/src/video_core/vulkan_common/vulkan_memory_allocator.h @@ -7,6 +7,7 @@ #include #include #include "common/common_types.h" +#include "video_core/vulkan_common/vulkan_device.h" #include "video_core/vulkan_common/vulkan_wrapper.h" VK_DEFINE_HANDLE(VmaAllocator) @@ -26,6 +27,18 @@ enum class MemoryUsage { Stream, ///< Requests device local host visible buffer, falling back host memory. }; +template +void ForEachDeviceLocalHostVisibleHeap(const Device& device, F&& f) { + auto memory_props = device.GetPhysical().GetMemoryProperties().memoryProperties; + for (size_t i = 0; i < memory_props.memoryTypeCount; i++) { + auto& memory_type = memory_props.memoryTypes[i]; + if ((memory_type.propertyFlags & VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT) && + (memory_type.propertyFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT)) { + f(memory_type.heapIndex, memory_props.memoryHeaps[memory_type.heapIndex]); + } + } +} + /// Ownership handle of a memory commitment. /// Points to a subregion of a memory allocation. class MemoryCommit { @@ -124,6 +137,7 @@ private: std::vector> allocations; ///< Current allocations. VkDeviceSize buffer_image_granularity; // The granularity for adjacent offsets between buffers // and optimal images + u32 valid_memory_types{~0u}; }; } // namespace Vulkan diff --git a/src/video_core/vulkan_common/vulkan_wrapper.h b/src/video_core/vulkan_common/vulkan_wrapper.h index 56982f17b..38e47962c 100755 --- a/src/video_core/vulkan_common/vulkan_wrapper.h +++ b/src/video_core/vulkan_common/vulkan_wrapper.h @@ -117,6 +117,9 @@ public: virtual ~Exception() = default; const char* what() const noexcept override; + VkResult GetResult() const noexcept { + return result; + } private: VkResult result;