diff --git a/CMakeLists.txt b/CMakeLists.txt index 857550e71..d98ba7767 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -496,7 +496,7 @@ endif() # Ensure libusb is properly configured (based on dolphin libusb include) if(NOT APPLE AND NOT YUZU_USE_BUNDLED_LIBUSB) include(FindPkgConfig) - if (PKG_CONFIG_FOUND) + if (PKG_CONFIG_FOUND AND NOT CMAKE_SYSTEM_NAME MATCHES "DragonFly|FreeBSD") pkg_check_modules(LIBUSB QUIET libusb-1.0>=1.0.24) else() find_package(LibUSB) diff --git a/README.md b/README.md index 893b613cf..13943dfc6 100755 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ yuzu emulator early access ============= -This is the source code for early-access 1935. +This is the source code for early-access 1936. ## Legal Notice diff --git a/src/shader_recompiler/frontend/ir/value.h b/src/shader_recompiler/frontend/ir/value.h index 795194d41..334bb47aa 100755 --- a/src/shader_recompiler/frontend/ir/value.h +++ b/src/shader_recompiler/frontend/ir/value.h @@ -57,6 +57,7 @@ public: [[nodiscard]] IR::Inst* Inst() const; [[nodiscard]] IR::Inst* InstRecursive() const; + [[nodiscard]] IR::Inst* TryInstRecursive() const; [[nodiscard]] IR::Value Resolve() const; [[nodiscard]] IR::Reg Reg() const; [[nodiscard]] IR::Pred Pred() const; @@ -308,6 +309,13 @@ inline IR::Inst* Value::InstRecursive() const { return inst; } +inline IR::Inst* Value::TryInstRecursive() const { + if (IsIdentity()) { + return inst->Arg(0).TryInstRecursive(); + } + return type == Type::Opaque ? inst : nullptr; +} + inline IR::Value Value::Resolve() const { if (IsIdentity()) { return inst->Arg(0).Resolve(); diff --git a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp index 8dd6d6c2c..c403a5fae 100755 --- a/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp +++ b/src/shader_recompiler/ir_opt/constant_propagation_pass.cpp @@ -3,6 +3,7 @@ // Refer to the license.txt file included. #include +#include #include #include @@ -88,6 +89,26 @@ bool FoldWhenAllImmediates(IR::Inst& inst, Func&& func) { return true; } +/// Return true when all values in a range are equal +template +bool AreEqual(const Range& range) { + auto resolver{[](const auto& value) { return value.Resolve(); }}; + auto equal{[](const IR::Value& lhs, const IR::Value& rhs) { + if (lhs == rhs) { + return true; + } + // Not equal, but try to match if they read the same constant buffer + if (!lhs.IsImmediate() && !rhs.IsImmediate() && + lhs.Inst()->GetOpcode() == IR::Opcode::GetCbufU32 && + rhs.Inst()->GetOpcode() == IR::Opcode::GetCbufU32 && + lhs.Inst()->Arg(0) == rhs.Inst()->Arg(0) && lhs.Inst()->Arg(1) == rhs.Inst()->Arg(1)) { + return true; + } + return false; + }}; + return std::ranges::adjacent_find(range, std::not_fn(equal), resolver) == std::end(range); +} + void FoldGetRegister(IR::Inst& inst) { if (inst.Arg(0).Reg() == IR::Reg::RZ) { inst.ReplaceUsesWith(IR::Value{u32{0}}); @@ -100,6 +121,157 @@ void FoldGetPred(IR::Inst& inst) { } } +/// Replaces the XMAD pattern generated by an integer FMA +bool FoldXmadMultiplyAdd(IR::Block& block, IR::Inst& inst) { + /* + * We are looking for this specific pattern: + * %6 = BitFieldUExtract %op_b, #0, #16 + * %7 = BitFieldUExtract %op_a', #16, #16 + * %8 = IMul32 %6, %7 + * %10 = BitFieldUExtract %op_a', #0, #16 + * %11 = BitFieldInsert %8, %10, #16, #16 + * %15 = BitFieldUExtract %op_b, #0, #16 + * %16 = BitFieldUExtract %op_a, #0, #16 + * %17 = IMul32 %15, %16 + * %18 = IAdd32 %17, %op_c + * %22 = BitFieldUExtract %op_b, #16, #16 + * %23 = BitFieldUExtract %11, #16, #16 + * %24 = IMul32 %22, %23 + * %25 = ShiftLeftLogical32 %24, #16 + * %26 = ShiftLeftLogical32 %11, #16 + * %27 = IAdd32 %26, %18 + * %result = IAdd32 %25, %27 + * + * And replace it with: + * %temp = IMul32 %op_a, %op_b + * %result = IAdd32 %temp, %op_c + * + * This optimization has been proven safe by Nvidia's compiler logic being reversed. + * (If Nvidia generates this code from 'fma(a, b, c)', we can do the same in the reverse order.) + */ + const IR::Value zero{0u}; + const IR::Value sixteen{16u}; + IR::Inst* const _25{inst.Arg(0).TryInstRecursive()}; + IR::Inst* const _27{inst.Arg(1).TryInstRecursive()}; + if (!_25 || !_27) { + return false; + } + if (_27->GetOpcode() != IR::Opcode::IAdd32) { + return false; + } + if (_25->GetOpcode() != IR::Opcode::ShiftLeftLogical32 || _25->Arg(1) != sixteen) { + return false; + } + IR::Inst* const _24{_25->Arg(0).TryInstRecursive()}; + if (!_24 || _24->GetOpcode() != IR::Opcode::IMul32) { + return false; + } + IR::Inst* const _22{_24->Arg(0).TryInstRecursive()}; + IR::Inst* const _23{_24->Arg(1).TryInstRecursive()}; + if (!_22 || !_23) { + return false; + } + if (_22->GetOpcode() != IR::Opcode::BitFieldUExtract) { + return false; + } + if (_23->GetOpcode() != IR::Opcode::BitFieldUExtract) { + return false; + } + if (_22->Arg(1) != sixteen || _22->Arg(2) != sixteen) { + return false; + } + if (_23->Arg(1) != sixteen || _23->Arg(2) != sixteen) { + return false; + } + IR::Inst* const _11{_23->Arg(0).TryInstRecursive()}; + if (!_11 || _11->GetOpcode() != IR::Opcode::BitFieldInsert) { + return false; + } + if (_11->Arg(2) != sixteen || _11->Arg(3) != sixteen) { + return false; + } + IR::Inst* const _8{_11->Arg(0).TryInstRecursive()}; + IR::Inst* const _10{_11->Arg(1).TryInstRecursive()}; + if (!_8 || !_10) { + return false; + } + if (_8->GetOpcode() != IR::Opcode::IMul32) { + return false; + } + if (_10->GetOpcode() != IR::Opcode::BitFieldUExtract) { + return false; + } + IR::Inst* const _6{_8->Arg(0).TryInstRecursive()}; + IR::Inst* const _7{_8->Arg(1).TryInstRecursive()}; + if (!_6 || !_7) { + return false; + } + if (_6->GetOpcode() != IR::Opcode::BitFieldUExtract) { + return false; + } + if (_7->GetOpcode() != IR::Opcode::BitFieldUExtract) { + return false; + } + if (_6->Arg(1) != zero || _6->Arg(2) != sixteen) { + return false; + } + if (_7->Arg(1) != sixteen || _7->Arg(2) != sixteen) { + return false; + } + IR::Inst* const _26{_27->Arg(0).TryInstRecursive()}; + IR::Inst* const _18{_27->Arg(1).TryInstRecursive()}; + if (!_26 || !_18) { + return false; + } + if (_26->GetOpcode() != IR::Opcode::ShiftLeftLogical32 || _26->Arg(1) != sixteen) { + return false; + } + if (_26->Arg(0).InstRecursive() != _11) { + return false; + } + if (_18->GetOpcode() != IR::Opcode::IAdd32) { + return false; + } + IR::Inst* const _17{_18->Arg(0).TryInstRecursive()}; + if (!_17 || _17->GetOpcode() != IR::Opcode::IMul32) { + return false; + } + IR::Inst* const _15{_17->Arg(0).TryInstRecursive()}; + IR::Inst* const _16{_17->Arg(1).TryInstRecursive()}; + if (!_15 || !_16) { + return false; + } + if (_15->GetOpcode() != IR::Opcode::BitFieldUExtract) { + return false; + } + if (_16->GetOpcode() != IR::Opcode::BitFieldUExtract) { + return false; + } + if (_15->Arg(1) != zero || _16->Arg(1) != zero || _10->Arg(1) != zero) { + return false; + } + if (_15->Arg(2) != sixteen || _16->Arg(2) != sixteen || _10->Arg(2) != sixteen) { + return false; + } + const std::array op_as{ + _7->Arg(0).Resolve(), + _16->Arg(0).Resolve(), + _10->Arg(0).Resolve(), + }; + const std::array op_bs{ + _22->Arg(0).Resolve(), + _6->Arg(0).Resolve(), + _15->Arg(0).Resolve(), + }; + const IR::U32 op_c{_18->Arg(1)}; + if (!AreEqual(op_as) || !AreEqual(op_bs)) { + return false; + } + IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)}; + inst.ReplaceUsesWith(ir.IAdd(ir.IMul(IR::U32{op_as[0]}, IR::U32{op_bs[1]}), op_c)); + return true; +} + /// Replaces the pattern generated by two XMAD multiplications bool FoldXmadMultiply(IR::Block& block, IR::Inst& inst) { /* @@ -116,33 +288,31 @@ bool FoldXmadMultiply(IR::Block& block, IR::Inst& inst) { * * This optimization has been proven safe by LLVM and MSVC. */ - const IR::Value lhs_arg{inst.Arg(0)}; - const IR::Value rhs_arg{inst.Arg(1)}; - if (lhs_arg.IsImmediate() || rhs_arg.IsImmediate()) { + IR::Inst* const lhs_shl{inst.Arg(0).TryInstRecursive()}; + IR::Inst* const rhs_mul{inst.Arg(1).TryInstRecursive()}; + if (!lhs_shl || !rhs_mul) { return false; } - IR::Inst* const lhs_shl{lhs_arg.InstRecursive()}; if (lhs_shl->GetOpcode() != IR::Opcode::ShiftLeftLogical32 || lhs_shl->Arg(1) != IR::Value{16U}) { return false; } - if (lhs_shl->Arg(0).IsImmediate()) { + IR::Inst* const lhs_mul{lhs_shl->Arg(0).TryInstRecursive()}; + if (!lhs_mul) { return false; } - IR::Inst* const lhs_mul{lhs_shl->Arg(0).InstRecursive()}; - IR::Inst* const rhs_mul{rhs_arg.InstRecursive()}; if (lhs_mul->GetOpcode() != IR::Opcode::IMul32 || rhs_mul->GetOpcode() != IR::Opcode::IMul32) { return false; } - if (lhs_mul->Arg(1).Resolve() != rhs_mul->Arg(1).Resolve()) { - return false; - } const IR::U32 factor_b{lhs_mul->Arg(1)}; - if (lhs_mul->Arg(0).IsImmediate() || rhs_mul->Arg(0).IsImmediate()) { + if (factor_b.Resolve() != rhs_mul->Arg(1).Resolve()) { + return false; + } + IR::Inst* const lhs_bfe{lhs_mul->Arg(0).TryInstRecursive()}; + IR::Inst* const rhs_bfe{rhs_mul->Arg(0).TryInstRecursive()}; + if (!lhs_bfe || !rhs_bfe) { return false; } - IR::Inst* const lhs_bfe{lhs_mul->Arg(0).InstRecursive()}; - IR::Inst* const rhs_bfe{rhs_mul->Arg(0).InstRecursive()}; if (lhs_bfe->GetOpcode() != IR::Opcode::BitFieldUExtract) { return false; } @@ -155,10 +325,10 @@ bool FoldXmadMultiply(IR::Block& block, IR::Inst& inst) { if (rhs_bfe->Arg(1) != IR::Value{0U} || rhs_bfe->Arg(2) != IR::Value{16U}) { return false; } - if (lhs_bfe->Arg(0).Resolve() != rhs_bfe->Arg(0).Resolve()) { + const IR::U32 factor_a{lhs_bfe->Arg(0)}; + if (factor_a.Resolve() != rhs_bfe->Arg(0).Resolve()) { return false; } - const IR::U32 factor_a{lhs_bfe->Arg(0)}; IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)}; inst.ReplaceUsesWith(ir.IMul(factor_a, factor_b)); return true; @@ -181,6 +351,9 @@ void FoldAdd(IR::Block& block, IR::Inst& inst) { if (FoldXmadMultiply(block, inst)) { return; } + if (FoldXmadMultiplyAdd(block, inst)) { + return; + } } } diff --git a/src/video_core/renderer_base.cpp b/src/video_core/renderer_base.cpp index 3ea72fda9..a99c33c37 100755 --- a/src/video_core/renderer_base.cpp +++ b/src/video_core/renderer_base.cpp @@ -27,7 +27,7 @@ void RendererBase::UpdateCurrentFramebufferLayout() { render_window.UpdateCurrentFramebufferLayout(layout.width, layout.height); } -void RendererBase::RequestScreenshot(void* data, std::function callback, +void RendererBase::RequestScreenshot(void* data, std::function callback, const Layout::FramebufferLayout& layout) { if (renderer_settings.screenshot_requested) { LOG_ERROR(Render, "A screenshot is already requested or in progress, ignoring the request"); diff --git a/src/video_core/renderer_base.h b/src/video_core/renderer_base.h index 22b80c328..bb204454e 100755 --- a/src/video_core/renderer_base.h +++ b/src/video_core/renderer_base.h @@ -24,7 +24,7 @@ struct RendererSettings { // Screenshot std::atomic screenshot_requested{false}; void* screenshot_bits{}; - std::function screenshot_complete_callback; + std::function screenshot_complete_callback; Layout::FramebufferLayout screenshot_framebuffer_layout; }; @@ -80,7 +80,7 @@ public: void RefreshBaseSettings(); /// Request a screenshot of the next frame - void RequestScreenshot(void* data, std::function callback, + void RequestScreenshot(void* data, std::function callback, const Layout::FramebufferLayout& layout); protected: diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index f1b00c24c..7d7cba69c 100755 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp @@ -486,7 +486,7 @@ void RendererOpenGL::RenderScreenshot() { glBindFramebuffer(GL_READ_FRAMEBUFFER, old_read_fb); glBindFramebuffer(GL_DRAW_FRAMEBUFFER, old_draw_fb); - renderer_settings.screenshot_complete_callback(); + renderer_settings.screenshot_complete_callback(true); renderer_settings.screenshot_requested = false; } diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.cpp b/src/video_core/renderer_vulkan/renderer_vulkan.cpp index a8d04dc61..7c9b0d6db 100755 --- a/src/video_core/renderer_vulkan/renderer_vulkan.cpp +++ b/src/video_core/renderer_vulkan/renderer_vulkan.cpp @@ -138,6 +138,7 @@ void RendererVulkan::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { const bool use_accelerated = rasterizer.AccelerateDisplay(*framebuffer, framebuffer_addr, framebuffer->stride); const bool is_srgb = use_accelerated && screen_info.is_srgb; + RenderScreenshot(*framebuffer, use_accelerated); bool has_been_recreated = false; const auto recreate_swapchain = [&] { @@ -162,7 +163,7 @@ void RendererVulkan::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { if (has_been_recreated) { blit_screen.Recreate(); } - const VkSemaphore render_semaphore = blit_screen.Draw(*framebuffer, use_accelerated); + const VkSemaphore render_semaphore = blit_screen.DrawToSwapchain(*framebuffer, use_accelerated); scheduler.Flush(render_semaphore); scheduler.WaitWorker(); swapchain.Present(render_semaphore); @@ -193,4 +194,153 @@ void RendererVulkan::Report() const { telemetry_session.AddField(field, "GPU_Vulkan_Extensions", extensions); } +void Vulkan::RendererVulkan::RenderScreenshot(const Tegra::FramebufferConfig& framebuffer, + bool use_accelerated) { + if (!renderer_settings.screenshot_requested) { + return; + } + const Layout::FramebufferLayout layout{renderer_settings.screenshot_framebuffer_layout}; + vk::Image staging_image = device.GetLogical().CreateImage(VkImageCreateInfo{ + .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, + .pNext = nullptr, + .flags = VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT, + .imageType = VK_IMAGE_TYPE_2D, + .format = VK_FORMAT_B8G8R8A8_UNORM, + .extent = + { + .width = layout.width, + .height = layout.height, + .depth = 1, + }, + .mipLevels = 1, + .arrayLayers = 1, + .samples = VK_SAMPLE_COUNT_1_BIT, + .tiling = VK_IMAGE_TILING_OPTIMAL, + .usage = VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT | + VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT, + .sharingMode = VK_SHARING_MODE_EXCLUSIVE, + .queueFamilyIndexCount = 0, + .pQueueFamilyIndices = nullptr, + .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED, + }); + const auto image_commit = memory_allocator.Commit(staging_image, MemoryUsage::DeviceLocal); + + const vk::ImageView dst_view = device.GetLogical().CreateImageView(VkImageViewCreateInfo{ + .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .image = *staging_image, + .viewType = VK_IMAGE_VIEW_TYPE_2D, + .format = screen_info.is_srgb ? VK_FORMAT_B8G8R8A8_SRGB : VK_FORMAT_B8G8R8A8_UNORM, + .components{ + .r = VK_COMPONENT_SWIZZLE_IDENTITY, + .g = VK_COMPONENT_SWIZZLE_IDENTITY, + .b = VK_COMPONENT_SWIZZLE_IDENTITY, + .a = VK_COMPONENT_SWIZZLE_IDENTITY, + }, + .subresourceRange{ + .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .baseMipLevel = 0, + .levelCount = 1, + .baseArrayLayer = 0, + .layerCount = VK_REMAINING_ARRAY_LAYERS, + }, + }); + const VkExtent2D render_area{.width = layout.width, .height = layout.height}; + const vk::Framebuffer screenshot_fb = blit_screen.CreateFramebuffer(*dst_view, render_area); + // Since we're not rendering to the screen, ignore the render semaphore. + void(blit_screen.Draw(framebuffer, *screenshot_fb, layout, render_area, use_accelerated)); + + const auto buffer_size = static_cast(layout.width * layout.height * 4); + const VkBufferCreateInfo dst_buffer_info{ + .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .size = buffer_size, + .usage = VK_BUFFER_USAGE_TRANSFER_DST_BIT, + .sharingMode = VK_SHARING_MODE_EXCLUSIVE, + .queueFamilyIndexCount = 0, + .pQueueFamilyIndices = nullptr, + }; + const vk::Buffer dst_buffer = device.GetLogical().CreateBuffer(dst_buffer_info); + MemoryCommit dst_buffer_memory = memory_allocator.Commit(dst_buffer, MemoryUsage::Download); + + scheduler.RequestOutsideRenderPassOperationContext(); + scheduler.Record([&](vk::CommandBuffer cmdbuf) { + const VkImageMemoryBarrier read_barrier{ + .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, + .pNext = nullptr, + .srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT, + .dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT, + .oldLayout = VK_IMAGE_LAYOUT_PRESENT_SRC_KHR, + .newLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = *staging_image, + .subresourceRange{ + .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .baseMipLevel = 0, + .levelCount = VK_REMAINING_MIP_LEVELS, + .baseArrayLayer = 0, + .layerCount = VK_REMAINING_ARRAY_LAYERS, + }, + }; + const VkImageMemoryBarrier image_write_barrier{ + .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, + .pNext = nullptr, + .srcAccessMask = 0, + .dstAccessMask = VK_ACCESS_MEMORY_WRITE_BIT, + .oldLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, + .newLayout = VK_IMAGE_LAYOUT_GENERAL, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = *staging_image, + .subresourceRange{ + .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .baseMipLevel = 0, + .levelCount = VK_REMAINING_MIP_LEVELS, + .baseArrayLayer = 0, + .layerCount = VK_REMAINING_ARRAY_LAYERS, + }, + }; + static constexpr VkMemoryBarrier memory_write_barrier{ + .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER, + .pNext = nullptr, + .srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT, + .dstAccessMask = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT, + }; + const VkBufferImageCopy copy{ + .bufferOffset = 0, + .bufferRowLength = 0, + .bufferImageHeight = 0, + .imageSubresource{ + .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .mipLevel = 0, + .baseArrayLayer = 0, + .layerCount = 1, + }, + .imageOffset{.x = 0, .y = 0, .z = 0}, + .imageExtent{ + .width = layout.width, + .height = layout.height, + .depth = 1, + }, + }; + cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, + 0, read_barrier); + cmdbuf.CopyImageToBuffer(*staging_image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, *dst_buffer, + copy); + cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, + 0, memory_write_barrier, nullptr, image_write_barrier); + }); + // Ensure the copy is fully completed before saving the screenshot + scheduler.Finish(); + + // Copy backing image data to the QImage screenshot buffer + const auto dst_memory_map = dst_buffer_memory.Map(); + std::memcpy(renderer_settings.screenshot_bits, dst_memory_map.data(), dst_memory_map.size()); + renderer_settings.screenshot_complete_callback(false); + renderer_settings.screenshot_requested = false; +} + } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.h b/src/video_core/renderer_vulkan/renderer_vulkan.h index d7d17e110..6dc985109 100755 --- a/src/video_core/renderer_vulkan/renderer_vulkan.h +++ b/src/video_core/renderer_vulkan/renderer_vulkan.h @@ -54,6 +54,8 @@ public: private: void Report() const; + void RenderScreenshot(const Tegra::FramebufferConfig& framebuffer, bool use_accelerated); + Core::TelemetrySession& telemetry_session; Core::Memory::Memory& cpu_memory; Tegra::GPU& gpu; diff --git a/src/video_core/renderer_vulkan/vk_blit_screen.cpp b/src/video_core/renderer_vulkan/vk_blit_screen.cpp index 516f428e7..01a2b2b78 100755 --- a/src/video_core/renderer_vulkan/vk_blit_screen.cpp +++ b/src/video_core/renderer_vulkan/vk_blit_screen.cpp @@ -130,7 +130,10 @@ void VKBlitScreen::Recreate() { CreateDynamicResources(); } -VkSemaphore VKBlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer, bool use_accelerated) { +VkSemaphore VKBlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer, + const VkFramebuffer& host_framebuffer, + const Layout::FramebufferLayout layout, VkExtent2D render_area, + bool use_accelerated) { RefreshResources(framebuffer); // Finish any pending renderpass @@ -145,8 +148,8 @@ VkSemaphore VKBlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer, bool use_accelerated ? screen_info.image_view : *raw_image_views[image_index]); BufferData data; - SetUniformData(data, framebuffer); - SetVertexData(data, framebuffer); + SetUniformData(data, layout); + SetVertexData(data, framebuffer, layout); const std::span mapped_span = buffer_commit.Map(); std::memcpy(mapped_span.data(), &data, sizeof(data)); @@ -220,52 +223,75 @@ VkSemaphore VKBlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer, bool VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, 0, write_barrier); }); } - scheduler.Record([this, image_index, size = swapchain.GetSize()](vk::CommandBuffer cmdbuf) { - const f32 bg_red = Settings::values.bg_red.GetValue() / 255.0f; - const f32 bg_green = Settings::values.bg_green.GetValue() / 255.0f; - const f32 bg_blue = Settings::values.bg_blue.GetValue() / 255.0f; - const VkClearValue clear_color{ - .color = {.float32 = {bg_red, bg_green, bg_blue, 1.0f}}, - }; - const VkRenderPassBeginInfo renderpass_bi{ - .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO, - .pNext = nullptr, - .renderPass = *renderpass, - .framebuffer = *framebuffers[image_index], - .renderArea = - { - .offset = {0, 0}, - .extent = size, - }, - .clearValueCount = 1, - .pClearValues = &clear_color, - }; - const VkViewport viewport{ - .x = 0.0f, - .y = 0.0f, - .width = static_cast(size.width), - .height = static_cast(size.height), - .minDepth = 0.0f, - .maxDepth = 1.0f, - }; - const VkRect2D scissor{ - .offset = {0, 0}, - .extent = size, - }; - cmdbuf.BeginRenderPass(renderpass_bi, VK_SUBPASS_CONTENTS_INLINE); - cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline); - cmdbuf.SetViewport(0, viewport); - cmdbuf.SetScissor(0, scissor); + scheduler.Record( + [this, host_framebuffer, image_index, size = render_area](vk::CommandBuffer cmdbuf) { + const f32 bg_red = Settings::values.bg_red.GetValue() / 255.0f; + const f32 bg_green = Settings::values.bg_green.GetValue() / 255.0f; + const f32 bg_blue = Settings::values.bg_blue.GetValue() / 255.0f; + const VkClearValue clear_color{ + .color = {.float32 = {bg_red, bg_green, bg_blue, 1.0f}}, + }; + const VkRenderPassBeginInfo renderpass_bi{ + .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO, + .pNext = nullptr, + .renderPass = *renderpass, + .framebuffer = host_framebuffer, + .renderArea = + { + .offset = {0, 0}, + .extent = size, + }, + .clearValueCount = 1, + .pClearValues = &clear_color, + }; + const VkViewport viewport{ + .x = 0.0f, + .y = 0.0f, + .width = static_cast(size.width), + .height = static_cast(size.height), + .minDepth = 0.0f, + .maxDepth = 1.0f, + }; + const VkRect2D scissor{ + .offset = {0, 0}, + .extent = size, + }; + cmdbuf.BeginRenderPass(renderpass_bi, VK_SUBPASS_CONTENTS_INLINE); + cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline); + cmdbuf.SetViewport(0, viewport); + cmdbuf.SetScissor(0, scissor); - cmdbuf.BindVertexBuffer(0, *buffer, offsetof(BufferData, vertices)); - cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline_layout, 0, - descriptor_sets[image_index], {}); - cmdbuf.Draw(4, 1, 0, 0); - cmdbuf.EndRenderPass(); - }); + cmdbuf.BindVertexBuffer(0, *buffer, offsetof(BufferData, vertices)); + cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline_layout, 0, + descriptor_sets[image_index], {}); + cmdbuf.Draw(4, 1, 0, 0); + cmdbuf.EndRenderPass(); + }); return *semaphores[image_index]; } +VkSemaphore VKBlitScreen::DrawToSwapchain(const Tegra::FramebufferConfig& framebuffer, + bool use_accelerated) { + const std::size_t image_index = swapchain.GetImageIndex(); + const VkExtent2D render_area = swapchain.GetSize(); + const Layout::FramebufferLayout layout = render_window.GetFramebufferLayout(); + return Draw(framebuffer, *framebuffers[image_index], layout, render_area, use_accelerated); +} + +vk::Framebuffer VKBlitScreen::CreateFramebuffer(const VkImageView& image_view, VkExtent2D extent) { + return device.GetLogical().CreateFramebuffer(VkFramebufferCreateInfo{ + .sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .renderPass = *renderpass, + .attachmentCount = 1, + .pAttachments = &image_view, + .width = extent.width, + .height = extent.height, + .layers = 1, + }); +} + void VKBlitScreen::CreateStaticResources() { CreateShaders(); CreateSemaphores(); @@ -752,15 +778,13 @@ void VKBlitScreen::UpdateDescriptorSet(std::size_t image_index, VkImageView imag device.GetLogical().UpdateDescriptorSets(std::array{ubo_write, sampler_write}, {}); } -void VKBlitScreen::SetUniformData(BufferData& data, - const Tegra::FramebufferConfig& framebuffer) const { - const auto& layout = render_window.GetFramebufferLayout(); +void VKBlitScreen::SetUniformData(BufferData& data, const Layout::FramebufferLayout layout) const { data.uniform.modelview_matrix = MakeOrthographicMatrix(static_cast(layout.width), static_cast(layout.height)); } -void VKBlitScreen::SetVertexData(BufferData& data, - const Tegra::FramebufferConfig& framebuffer) const { +void VKBlitScreen::SetVertexData(BufferData& data, const Tegra::FramebufferConfig& framebuffer, + const Layout::FramebufferLayout layout) const { const auto& framebuffer_transform_flags = framebuffer.transform_flags; const auto& framebuffer_crop_rect = framebuffer.crop_rect; @@ -798,7 +822,7 @@ void VKBlitScreen::SetVertexData(BufferData& data, static_cast(screen_info.height); } - const auto& screen = render_window.GetFramebufferLayout().screen; + const auto& screen = layout.screen; const auto x = static_cast(screen.left); const auto y = static_cast(screen.top); const auto w = static_cast(screen.GetWidth()); diff --git a/src/video_core/renderer_vulkan/vk_blit_screen.h b/src/video_core/renderer_vulkan/vk_blit_screen.h index 5e3177685..430bcfbca 100755 --- a/src/video_core/renderer_vulkan/vk_blit_screen.h +++ b/src/video_core/renderer_vulkan/vk_blit_screen.h @@ -56,8 +56,16 @@ public: void Recreate(); [[nodiscard]] VkSemaphore Draw(const Tegra::FramebufferConfig& framebuffer, + const VkFramebuffer& host_framebuffer, + const Layout::FramebufferLayout layout, VkExtent2D render_area, bool use_accelerated); + [[nodiscard]] VkSemaphore DrawToSwapchain(const Tegra::FramebufferConfig& framebuffer, + bool use_accelerated); + + [[nodiscard]] vk::Framebuffer CreateFramebuffer(const VkImageView& image_view, + VkExtent2D extent); + private: struct BufferData; @@ -81,8 +89,9 @@ private: void CreateRawImages(const Tegra::FramebufferConfig& framebuffer); void UpdateDescriptorSet(std::size_t image_index, VkImageView image_view) const; - void SetUniformData(BufferData& data, const Tegra::FramebufferConfig& framebuffer) const; - void SetVertexData(BufferData& data, const Tegra::FramebufferConfig& framebuffer) const; + void SetUniformData(BufferData& data, const Layout::FramebufferLayout layout) const; + void SetVertexData(BufferData& data, const Tegra::FramebufferConfig& framebuffer, + const Layout::FramebufferLayout layout) const; u64 CalculateBufferSize(const Tegra::FramebufferConfig& framebuffer) const; u64 GetRawImageOffset(const Tegra::FramebufferConfig& framebuffer, diff --git a/src/yuzu/bootmanager.cpp b/src/yuzu/bootmanager.cpp index 484b6d71b..1519a46ed 100755 --- a/src/yuzu/bootmanager.cpp +++ b/src/yuzu/bootmanager.cpp @@ -634,9 +634,9 @@ void GRenderWindow::CaptureScreenshot(u32 res_scale, const QString& screenshot_p screenshot_image = QImage(QSize(layout.width, layout.height), QImage::Format_RGB32); renderer.RequestScreenshot( screenshot_image.bits(), - [=, this] { + [=, this](bool invert_y) { const std::string std_screenshot_path = screenshot_path.toStdString(); - if (screenshot_image.mirrored(false, true).save(screenshot_path)) { + if (screenshot_image.mirrored(false, invert_y).save(screenshot_path)) { LOG_INFO(Frontend, "Screenshot saved to \"{}\"", std_screenshot_path); } else { LOG_ERROR(Frontend, "Failed to save screenshot to \"{}\"", std_screenshot_path);