early-access version 1936

This commit is contained in:
pineappleEA 2021-07-28 04:31:09 +02:00
parent b6ccebf4de
commit 6ee725cc08
12 changed files with 443 additions and 77 deletions

View file

@ -496,7 +496,7 @@ endif()
# Ensure libusb is properly configured (based on dolphin libusb include) # Ensure libusb is properly configured (based on dolphin libusb include)
if(NOT APPLE AND NOT YUZU_USE_BUNDLED_LIBUSB) if(NOT APPLE AND NOT YUZU_USE_BUNDLED_LIBUSB)
include(FindPkgConfig) include(FindPkgConfig)
if (PKG_CONFIG_FOUND) if (PKG_CONFIG_FOUND AND NOT CMAKE_SYSTEM_NAME MATCHES "DragonFly|FreeBSD")
pkg_check_modules(LIBUSB QUIET libusb-1.0>=1.0.24) pkg_check_modules(LIBUSB QUIET libusb-1.0>=1.0.24)
else() else()
find_package(LibUSB) find_package(LibUSB)

View file

@ -1,7 +1,7 @@
yuzu emulator early access yuzu emulator early access
============= =============
This is the source code for early-access 1935. This is the source code for early-access 1936.
## Legal Notice ## Legal Notice

View file

@ -57,6 +57,7 @@ public:
[[nodiscard]] IR::Inst* Inst() const; [[nodiscard]] IR::Inst* Inst() const;
[[nodiscard]] IR::Inst* InstRecursive() const; [[nodiscard]] IR::Inst* InstRecursive() const;
[[nodiscard]] IR::Inst* TryInstRecursive() const;
[[nodiscard]] IR::Value Resolve() const; [[nodiscard]] IR::Value Resolve() const;
[[nodiscard]] IR::Reg Reg() const; [[nodiscard]] IR::Reg Reg() const;
[[nodiscard]] IR::Pred Pred() const; [[nodiscard]] IR::Pred Pred() const;
@ -308,6 +309,13 @@ inline IR::Inst* Value::InstRecursive() const {
return inst; return inst;
} }
inline IR::Inst* Value::TryInstRecursive() const {
if (IsIdentity()) {
return inst->Arg(0).TryInstRecursive();
}
return type == Type::Opaque ? inst : nullptr;
}
inline IR::Value Value::Resolve() const { inline IR::Value Value::Resolve() const {
if (IsIdentity()) { if (IsIdentity()) {
return inst->Arg(0).Resolve(); return inst->Arg(0).Resolve();

View file

@ -3,6 +3,7 @@
// Refer to the license.txt file included. // Refer to the license.txt file included.
#include <algorithm> #include <algorithm>
#include <functional>
#include <tuple> #include <tuple>
#include <type_traits> #include <type_traits>
@ -88,6 +89,26 @@ bool FoldWhenAllImmediates(IR::Inst& inst, Func&& func) {
return true; return true;
} }
/// Return true when all values in a range are equal
template <typename Range>
bool AreEqual(const Range& range) {
auto resolver{[](const auto& value) { return value.Resolve(); }};
auto equal{[](const IR::Value& lhs, const IR::Value& rhs) {
if (lhs == rhs) {
return true;
}
// Not equal, but try to match if they read the same constant buffer
if (!lhs.IsImmediate() && !rhs.IsImmediate() &&
lhs.Inst()->GetOpcode() == IR::Opcode::GetCbufU32 &&
rhs.Inst()->GetOpcode() == IR::Opcode::GetCbufU32 &&
lhs.Inst()->Arg(0) == rhs.Inst()->Arg(0) && lhs.Inst()->Arg(1) == rhs.Inst()->Arg(1)) {
return true;
}
return false;
}};
return std::ranges::adjacent_find(range, std::not_fn(equal), resolver) == std::end(range);
}
void FoldGetRegister(IR::Inst& inst) { void FoldGetRegister(IR::Inst& inst) {
if (inst.Arg(0).Reg() == IR::Reg::RZ) { if (inst.Arg(0).Reg() == IR::Reg::RZ) {
inst.ReplaceUsesWith(IR::Value{u32{0}}); inst.ReplaceUsesWith(IR::Value{u32{0}});
@ -100,6 +121,157 @@ void FoldGetPred(IR::Inst& inst) {
} }
} }
/// Replaces the XMAD pattern generated by an integer FMA
bool FoldXmadMultiplyAdd(IR::Block& block, IR::Inst& inst) {
/*
* We are looking for this specific pattern:
* %6 = BitFieldUExtract %op_b, #0, #16
* %7 = BitFieldUExtract %op_a', #16, #16
* %8 = IMul32 %6, %7
* %10 = BitFieldUExtract %op_a', #0, #16
* %11 = BitFieldInsert %8, %10, #16, #16
* %15 = BitFieldUExtract %op_b, #0, #16
* %16 = BitFieldUExtract %op_a, #0, #16
* %17 = IMul32 %15, %16
* %18 = IAdd32 %17, %op_c
* %22 = BitFieldUExtract %op_b, #16, #16
* %23 = BitFieldUExtract %11, #16, #16
* %24 = IMul32 %22, %23
* %25 = ShiftLeftLogical32 %24, #16
* %26 = ShiftLeftLogical32 %11, #16
* %27 = IAdd32 %26, %18
* %result = IAdd32 %25, %27
*
* And replace it with:
* %temp = IMul32 %op_a, %op_b
* %result = IAdd32 %temp, %op_c
*
* This optimization has been proven safe by Nvidia's compiler logic being reversed.
* (If Nvidia generates this code from 'fma(a, b, c)', we can do the same in the reverse order.)
*/
const IR::Value zero{0u};
const IR::Value sixteen{16u};
IR::Inst* const _25{inst.Arg(0).TryInstRecursive()};
IR::Inst* const _27{inst.Arg(1).TryInstRecursive()};
if (!_25 || !_27) {
return false;
}
if (_27->GetOpcode() != IR::Opcode::IAdd32) {
return false;
}
if (_25->GetOpcode() != IR::Opcode::ShiftLeftLogical32 || _25->Arg(1) != sixteen) {
return false;
}
IR::Inst* const _24{_25->Arg(0).TryInstRecursive()};
if (!_24 || _24->GetOpcode() != IR::Opcode::IMul32) {
return false;
}
IR::Inst* const _22{_24->Arg(0).TryInstRecursive()};
IR::Inst* const _23{_24->Arg(1).TryInstRecursive()};
if (!_22 || !_23) {
return false;
}
if (_22->GetOpcode() != IR::Opcode::BitFieldUExtract) {
return false;
}
if (_23->GetOpcode() != IR::Opcode::BitFieldUExtract) {
return false;
}
if (_22->Arg(1) != sixteen || _22->Arg(2) != sixteen) {
return false;
}
if (_23->Arg(1) != sixteen || _23->Arg(2) != sixteen) {
return false;
}
IR::Inst* const _11{_23->Arg(0).TryInstRecursive()};
if (!_11 || _11->GetOpcode() != IR::Opcode::BitFieldInsert) {
return false;
}
if (_11->Arg(2) != sixteen || _11->Arg(3) != sixteen) {
return false;
}
IR::Inst* const _8{_11->Arg(0).TryInstRecursive()};
IR::Inst* const _10{_11->Arg(1).TryInstRecursive()};
if (!_8 || !_10) {
return false;
}
if (_8->GetOpcode() != IR::Opcode::IMul32) {
return false;
}
if (_10->GetOpcode() != IR::Opcode::BitFieldUExtract) {
return false;
}
IR::Inst* const _6{_8->Arg(0).TryInstRecursive()};
IR::Inst* const _7{_8->Arg(1).TryInstRecursive()};
if (!_6 || !_7) {
return false;
}
if (_6->GetOpcode() != IR::Opcode::BitFieldUExtract) {
return false;
}
if (_7->GetOpcode() != IR::Opcode::BitFieldUExtract) {
return false;
}
if (_6->Arg(1) != zero || _6->Arg(2) != sixteen) {
return false;
}
if (_7->Arg(1) != sixteen || _7->Arg(2) != sixteen) {
return false;
}
IR::Inst* const _26{_27->Arg(0).TryInstRecursive()};
IR::Inst* const _18{_27->Arg(1).TryInstRecursive()};
if (!_26 || !_18) {
return false;
}
if (_26->GetOpcode() != IR::Opcode::ShiftLeftLogical32 || _26->Arg(1) != sixteen) {
return false;
}
if (_26->Arg(0).InstRecursive() != _11) {
return false;
}
if (_18->GetOpcode() != IR::Opcode::IAdd32) {
return false;
}
IR::Inst* const _17{_18->Arg(0).TryInstRecursive()};
if (!_17 || _17->GetOpcode() != IR::Opcode::IMul32) {
return false;
}
IR::Inst* const _15{_17->Arg(0).TryInstRecursive()};
IR::Inst* const _16{_17->Arg(1).TryInstRecursive()};
if (!_15 || !_16) {
return false;
}
if (_15->GetOpcode() != IR::Opcode::BitFieldUExtract) {
return false;
}
if (_16->GetOpcode() != IR::Opcode::BitFieldUExtract) {
return false;
}
if (_15->Arg(1) != zero || _16->Arg(1) != zero || _10->Arg(1) != zero) {
return false;
}
if (_15->Arg(2) != sixteen || _16->Arg(2) != sixteen || _10->Arg(2) != sixteen) {
return false;
}
const std::array<IR::Value, 3> op_as{
_7->Arg(0).Resolve(),
_16->Arg(0).Resolve(),
_10->Arg(0).Resolve(),
};
const std::array<IR::Value, 3> op_bs{
_22->Arg(0).Resolve(),
_6->Arg(0).Resolve(),
_15->Arg(0).Resolve(),
};
const IR::U32 op_c{_18->Arg(1)};
if (!AreEqual(op_as) || !AreEqual(op_bs)) {
return false;
}
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
inst.ReplaceUsesWith(ir.IAdd(ir.IMul(IR::U32{op_as[0]}, IR::U32{op_bs[1]}), op_c));
return true;
}
/// Replaces the pattern generated by two XMAD multiplications /// Replaces the pattern generated by two XMAD multiplications
bool FoldXmadMultiply(IR::Block& block, IR::Inst& inst) { bool FoldXmadMultiply(IR::Block& block, IR::Inst& inst) {
/* /*
@ -116,33 +288,31 @@ bool FoldXmadMultiply(IR::Block& block, IR::Inst& inst) {
* *
* This optimization has been proven safe by LLVM and MSVC. * This optimization has been proven safe by LLVM and MSVC.
*/ */
const IR::Value lhs_arg{inst.Arg(0)}; IR::Inst* const lhs_shl{inst.Arg(0).TryInstRecursive()};
const IR::Value rhs_arg{inst.Arg(1)}; IR::Inst* const rhs_mul{inst.Arg(1).TryInstRecursive()};
if (lhs_arg.IsImmediate() || rhs_arg.IsImmediate()) { if (!lhs_shl || !rhs_mul) {
return false; return false;
} }
IR::Inst* const lhs_shl{lhs_arg.InstRecursive()};
if (lhs_shl->GetOpcode() != IR::Opcode::ShiftLeftLogical32 || if (lhs_shl->GetOpcode() != IR::Opcode::ShiftLeftLogical32 ||
lhs_shl->Arg(1) != IR::Value{16U}) { lhs_shl->Arg(1) != IR::Value{16U}) {
return false; return false;
} }
if (lhs_shl->Arg(0).IsImmediate()) { IR::Inst* const lhs_mul{lhs_shl->Arg(0).TryInstRecursive()};
if (!lhs_mul) {
return false; return false;
} }
IR::Inst* const lhs_mul{lhs_shl->Arg(0).InstRecursive()};
IR::Inst* const rhs_mul{rhs_arg.InstRecursive()};
if (lhs_mul->GetOpcode() != IR::Opcode::IMul32 || rhs_mul->GetOpcode() != IR::Opcode::IMul32) { if (lhs_mul->GetOpcode() != IR::Opcode::IMul32 || rhs_mul->GetOpcode() != IR::Opcode::IMul32) {
return false; return false;
} }
if (lhs_mul->Arg(1).Resolve() != rhs_mul->Arg(1).Resolve()) {
return false;
}
const IR::U32 factor_b{lhs_mul->Arg(1)}; const IR::U32 factor_b{lhs_mul->Arg(1)};
if (lhs_mul->Arg(0).IsImmediate() || rhs_mul->Arg(0).IsImmediate()) { if (factor_b.Resolve() != rhs_mul->Arg(1).Resolve()) {
return false;
}
IR::Inst* const lhs_bfe{lhs_mul->Arg(0).TryInstRecursive()};
IR::Inst* const rhs_bfe{rhs_mul->Arg(0).TryInstRecursive()};
if (!lhs_bfe || !rhs_bfe) {
return false; return false;
} }
IR::Inst* const lhs_bfe{lhs_mul->Arg(0).InstRecursive()};
IR::Inst* const rhs_bfe{rhs_mul->Arg(0).InstRecursive()};
if (lhs_bfe->GetOpcode() != IR::Opcode::BitFieldUExtract) { if (lhs_bfe->GetOpcode() != IR::Opcode::BitFieldUExtract) {
return false; return false;
} }
@ -155,10 +325,10 @@ bool FoldXmadMultiply(IR::Block& block, IR::Inst& inst) {
if (rhs_bfe->Arg(1) != IR::Value{0U} || rhs_bfe->Arg(2) != IR::Value{16U}) { if (rhs_bfe->Arg(1) != IR::Value{0U} || rhs_bfe->Arg(2) != IR::Value{16U}) {
return false; return false;
} }
if (lhs_bfe->Arg(0).Resolve() != rhs_bfe->Arg(0).Resolve()) { const IR::U32 factor_a{lhs_bfe->Arg(0)};
if (factor_a.Resolve() != rhs_bfe->Arg(0).Resolve()) {
return false; return false;
} }
const IR::U32 factor_a{lhs_bfe->Arg(0)};
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)}; IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
inst.ReplaceUsesWith(ir.IMul(factor_a, factor_b)); inst.ReplaceUsesWith(ir.IMul(factor_a, factor_b));
return true; return true;
@ -181,6 +351,9 @@ void FoldAdd(IR::Block& block, IR::Inst& inst) {
if (FoldXmadMultiply(block, inst)) { if (FoldXmadMultiply(block, inst)) {
return; return;
} }
if (FoldXmadMultiplyAdd(block, inst)) {
return;
}
} }
} }

View file

@ -27,7 +27,7 @@ void RendererBase::UpdateCurrentFramebufferLayout() {
render_window.UpdateCurrentFramebufferLayout(layout.width, layout.height); render_window.UpdateCurrentFramebufferLayout(layout.width, layout.height);
} }
void RendererBase::RequestScreenshot(void* data, std::function<void()> callback, void RendererBase::RequestScreenshot(void* data, std::function<void(bool)> callback,
const Layout::FramebufferLayout& layout) { const Layout::FramebufferLayout& layout) {
if (renderer_settings.screenshot_requested) { if (renderer_settings.screenshot_requested) {
LOG_ERROR(Render, "A screenshot is already requested or in progress, ignoring the request"); LOG_ERROR(Render, "A screenshot is already requested or in progress, ignoring the request");

View file

@ -24,7 +24,7 @@ struct RendererSettings {
// Screenshot // Screenshot
std::atomic<bool> screenshot_requested{false}; std::atomic<bool> screenshot_requested{false};
void* screenshot_bits{}; void* screenshot_bits{};
std::function<void()> screenshot_complete_callback; std::function<void(bool)> screenshot_complete_callback;
Layout::FramebufferLayout screenshot_framebuffer_layout; Layout::FramebufferLayout screenshot_framebuffer_layout;
}; };
@ -80,7 +80,7 @@ public:
void RefreshBaseSettings(); void RefreshBaseSettings();
/// Request a screenshot of the next frame /// Request a screenshot of the next frame
void RequestScreenshot(void* data, std::function<void()> callback, void RequestScreenshot(void* data, std::function<void(bool)> callback,
const Layout::FramebufferLayout& layout); const Layout::FramebufferLayout& layout);
protected: protected:

View file

@ -486,7 +486,7 @@ void RendererOpenGL::RenderScreenshot() {
glBindFramebuffer(GL_READ_FRAMEBUFFER, old_read_fb); glBindFramebuffer(GL_READ_FRAMEBUFFER, old_read_fb);
glBindFramebuffer(GL_DRAW_FRAMEBUFFER, old_draw_fb); glBindFramebuffer(GL_DRAW_FRAMEBUFFER, old_draw_fb);
renderer_settings.screenshot_complete_callback(); renderer_settings.screenshot_complete_callback(true);
renderer_settings.screenshot_requested = false; renderer_settings.screenshot_requested = false;
} }

View file

@ -138,6 +138,7 @@ void RendererVulkan::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
const bool use_accelerated = const bool use_accelerated =
rasterizer.AccelerateDisplay(*framebuffer, framebuffer_addr, framebuffer->stride); rasterizer.AccelerateDisplay(*framebuffer, framebuffer_addr, framebuffer->stride);
const bool is_srgb = use_accelerated && screen_info.is_srgb; const bool is_srgb = use_accelerated && screen_info.is_srgb;
RenderScreenshot(*framebuffer, use_accelerated);
bool has_been_recreated = false; bool has_been_recreated = false;
const auto recreate_swapchain = [&] { const auto recreate_swapchain = [&] {
@ -162,7 +163,7 @@ void RendererVulkan::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
if (has_been_recreated) { if (has_been_recreated) {
blit_screen.Recreate(); blit_screen.Recreate();
} }
const VkSemaphore render_semaphore = blit_screen.Draw(*framebuffer, use_accelerated); const VkSemaphore render_semaphore = blit_screen.DrawToSwapchain(*framebuffer, use_accelerated);
scheduler.Flush(render_semaphore); scheduler.Flush(render_semaphore);
scheduler.WaitWorker(); scheduler.WaitWorker();
swapchain.Present(render_semaphore); swapchain.Present(render_semaphore);
@ -193,4 +194,153 @@ void RendererVulkan::Report() const {
telemetry_session.AddField(field, "GPU_Vulkan_Extensions", extensions); telemetry_session.AddField(field, "GPU_Vulkan_Extensions", extensions);
} }
void Vulkan::RendererVulkan::RenderScreenshot(const Tegra::FramebufferConfig& framebuffer,
bool use_accelerated) {
if (!renderer_settings.screenshot_requested) {
return;
}
const Layout::FramebufferLayout layout{renderer_settings.screenshot_framebuffer_layout};
vk::Image staging_image = device.GetLogical().CreateImage(VkImageCreateInfo{
.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
.pNext = nullptr,
.flags = VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT,
.imageType = VK_IMAGE_TYPE_2D,
.format = VK_FORMAT_B8G8R8A8_UNORM,
.extent =
{
.width = layout.width,
.height = layout.height,
.depth = 1,
},
.mipLevels = 1,
.arrayLayers = 1,
.samples = VK_SAMPLE_COUNT_1_BIT,
.tiling = VK_IMAGE_TILING_OPTIMAL,
.usage = VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT |
VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT,
.sharingMode = VK_SHARING_MODE_EXCLUSIVE,
.queueFamilyIndexCount = 0,
.pQueueFamilyIndices = nullptr,
.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED,
});
const auto image_commit = memory_allocator.Commit(staging_image, MemoryUsage::DeviceLocal);
const vk::ImageView dst_view = device.GetLogical().CreateImageView(VkImageViewCreateInfo{
.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.image = *staging_image,
.viewType = VK_IMAGE_VIEW_TYPE_2D,
.format = screen_info.is_srgb ? VK_FORMAT_B8G8R8A8_SRGB : VK_FORMAT_B8G8R8A8_UNORM,
.components{
.r = VK_COMPONENT_SWIZZLE_IDENTITY,
.g = VK_COMPONENT_SWIZZLE_IDENTITY,
.b = VK_COMPONENT_SWIZZLE_IDENTITY,
.a = VK_COMPONENT_SWIZZLE_IDENTITY,
},
.subresourceRange{
.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
.baseMipLevel = 0,
.levelCount = 1,
.baseArrayLayer = 0,
.layerCount = VK_REMAINING_ARRAY_LAYERS,
},
});
const VkExtent2D render_area{.width = layout.width, .height = layout.height};
const vk::Framebuffer screenshot_fb = blit_screen.CreateFramebuffer(*dst_view, render_area);
// Since we're not rendering to the screen, ignore the render semaphore.
void(blit_screen.Draw(framebuffer, *screenshot_fb, layout, render_area, use_accelerated));
const auto buffer_size = static_cast<VkDeviceSize>(layout.width * layout.height * 4);
const VkBufferCreateInfo dst_buffer_info{
.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.size = buffer_size,
.usage = VK_BUFFER_USAGE_TRANSFER_DST_BIT,
.sharingMode = VK_SHARING_MODE_EXCLUSIVE,
.queueFamilyIndexCount = 0,
.pQueueFamilyIndices = nullptr,
};
const vk::Buffer dst_buffer = device.GetLogical().CreateBuffer(dst_buffer_info);
MemoryCommit dst_buffer_memory = memory_allocator.Commit(dst_buffer, MemoryUsage::Download);
scheduler.RequestOutsideRenderPassOperationContext();
scheduler.Record([&](vk::CommandBuffer cmdbuf) {
const VkImageMemoryBarrier read_barrier{
.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
.pNext = nullptr,
.srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT,
.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT,
.oldLayout = VK_IMAGE_LAYOUT_PRESENT_SRC_KHR,
.newLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.image = *staging_image,
.subresourceRange{
.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
.baseMipLevel = 0,
.levelCount = VK_REMAINING_MIP_LEVELS,
.baseArrayLayer = 0,
.layerCount = VK_REMAINING_ARRAY_LAYERS,
},
};
const VkImageMemoryBarrier image_write_barrier{
.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
.pNext = nullptr,
.srcAccessMask = 0,
.dstAccessMask = VK_ACCESS_MEMORY_WRITE_BIT,
.oldLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
.newLayout = VK_IMAGE_LAYOUT_GENERAL,
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.image = *staging_image,
.subresourceRange{
.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
.baseMipLevel = 0,
.levelCount = VK_REMAINING_MIP_LEVELS,
.baseArrayLayer = 0,
.layerCount = VK_REMAINING_ARRAY_LAYERS,
},
};
static constexpr VkMemoryBarrier memory_write_barrier{
.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER,
.pNext = nullptr,
.srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT,
.dstAccessMask = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT,
};
const VkBufferImageCopy copy{
.bufferOffset = 0,
.bufferRowLength = 0,
.bufferImageHeight = 0,
.imageSubresource{
.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
.mipLevel = 0,
.baseArrayLayer = 0,
.layerCount = 1,
},
.imageOffset{.x = 0, .y = 0, .z = 0},
.imageExtent{
.width = layout.width,
.height = layout.height,
.depth = 1,
},
};
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT,
0, read_barrier);
cmdbuf.CopyImageToBuffer(*staging_image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, *dst_buffer,
copy);
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
0, memory_write_barrier, nullptr, image_write_barrier);
});
// Ensure the copy is fully completed before saving the screenshot
scheduler.Finish();
// Copy backing image data to the QImage screenshot buffer
const auto dst_memory_map = dst_buffer_memory.Map();
std::memcpy(renderer_settings.screenshot_bits, dst_memory_map.data(), dst_memory_map.size());
renderer_settings.screenshot_complete_callback(false);
renderer_settings.screenshot_requested = false;
}
} // namespace Vulkan } // namespace Vulkan

View file

@ -54,6 +54,8 @@ public:
private: private:
void Report() const; void Report() const;
void RenderScreenshot(const Tegra::FramebufferConfig& framebuffer, bool use_accelerated);
Core::TelemetrySession& telemetry_session; Core::TelemetrySession& telemetry_session;
Core::Memory::Memory& cpu_memory; Core::Memory::Memory& cpu_memory;
Tegra::GPU& gpu; Tegra::GPU& gpu;

View file

@ -130,7 +130,10 @@ void VKBlitScreen::Recreate() {
CreateDynamicResources(); CreateDynamicResources();
} }
VkSemaphore VKBlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer, bool use_accelerated) { VkSemaphore VKBlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer,
const VkFramebuffer& host_framebuffer,
const Layout::FramebufferLayout layout, VkExtent2D render_area,
bool use_accelerated) {
RefreshResources(framebuffer); RefreshResources(framebuffer);
// Finish any pending renderpass // Finish any pending renderpass
@ -145,8 +148,8 @@ VkSemaphore VKBlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer, bool
use_accelerated ? screen_info.image_view : *raw_image_views[image_index]); use_accelerated ? screen_info.image_view : *raw_image_views[image_index]);
BufferData data; BufferData data;
SetUniformData(data, framebuffer); SetUniformData(data, layout);
SetVertexData(data, framebuffer); SetVertexData(data, framebuffer, layout);
const std::span<u8> mapped_span = buffer_commit.Map(); const std::span<u8> mapped_span = buffer_commit.Map();
std::memcpy(mapped_span.data(), &data, sizeof(data)); std::memcpy(mapped_span.data(), &data, sizeof(data));
@ -220,52 +223,75 @@ VkSemaphore VKBlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer, bool
VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, 0, write_barrier); VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, 0, write_barrier);
}); });
} }
scheduler.Record([this, image_index, size = swapchain.GetSize()](vk::CommandBuffer cmdbuf) { scheduler.Record(
const f32 bg_red = Settings::values.bg_red.GetValue() / 255.0f; [this, host_framebuffer, image_index, size = render_area](vk::CommandBuffer cmdbuf) {
const f32 bg_green = Settings::values.bg_green.GetValue() / 255.0f; const f32 bg_red = Settings::values.bg_red.GetValue() / 255.0f;
const f32 bg_blue = Settings::values.bg_blue.GetValue() / 255.0f; const f32 bg_green = Settings::values.bg_green.GetValue() / 255.0f;
const VkClearValue clear_color{ const f32 bg_blue = Settings::values.bg_blue.GetValue() / 255.0f;
.color = {.float32 = {bg_red, bg_green, bg_blue, 1.0f}}, const VkClearValue clear_color{
}; .color = {.float32 = {bg_red, bg_green, bg_blue, 1.0f}},
const VkRenderPassBeginInfo renderpass_bi{ };
.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO, const VkRenderPassBeginInfo renderpass_bi{
.pNext = nullptr, .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
.renderPass = *renderpass, .pNext = nullptr,
.framebuffer = *framebuffers[image_index], .renderPass = *renderpass,
.renderArea = .framebuffer = host_framebuffer,
{ .renderArea =
.offset = {0, 0}, {
.extent = size, .offset = {0, 0},
}, .extent = size,
.clearValueCount = 1, },
.pClearValues = &clear_color, .clearValueCount = 1,
}; .pClearValues = &clear_color,
const VkViewport viewport{ };
.x = 0.0f, const VkViewport viewport{
.y = 0.0f, .x = 0.0f,
.width = static_cast<float>(size.width), .y = 0.0f,
.height = static_cast<float>(size.height), .width = static_cast<float>(size.width),
.minDepth = 0.0f, .height = static_cast<float>(size.height),
.maxDepth = 1.0f, .minDepth = 0.0f,
}; .maxDepth = 1.0f,
const VkRect2D scissor{ };
.offset = {0, 0}, const VkRect2D scissor{
.extent = size, .offset = {0, 0},
}; .extent = size,
cmdbuf.BeginRenderPass(renderpass_bi, VK_SUBPASS_CONTENTS_INLINE); };
cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline); cmdbuf.BeginRenderPass(renderpass_bi, VK_SUBPASS_CONTENTS_INLINE);
cmdbuf.SetViewport(0, viewport); cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline);
cmdbuf.SetScissor(0, scissor); cmdbuf.SetViewport(0, viewport);
cmdbuf.SetScissor(0, scissor);
cmdbuf.BindVertexBuffer(0, *buffer, offsetof(BufferData, vertices)); cmdbuf.BindVertexBuffer(0, *buffer, offsetof(BufferData, vertices));
cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline_layout, 0, cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline_layout, 0,
descriptor_sets[image_index], {}); descriptor_sets[image_index], {});
cmdbuf.Draw(4, 1, 0, 0); cmdbuf.Draw(4, 1, 0, 0);
cmdbuf.EndRenderPass(); cmdbuf.EndRenderPass();
}); });
return *semaphores[image_index]; return *semaphores[image_index];
} }
VkSemaphore VKBlitScreen::DrawToSwapchain(const Tegra::FramebufferConfig& framebuffer,
bool use_accelerated) {
const std::size_t image_index = swapchain.GetImageIndex();
const VkExtent2D render_area = swapchain.GetSize();
const Layout::FramebufferLayout layout = render_window.GetFramebufferLayout();
return Draw(framebuffer, *framebuffers[image_index], layout, render_area, use_accelerated);
}
vk::Framebuffer VKBlitScreen::CreateFramebuffer(const VkImageView& image_view, VkExtent2D extent) {
return device.GetLogical().CreateFramebuffer(VkFramebufferCreateInfo{
.sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.renderPass = *renderpass,
.attachmentCount = 1,
.pAttachments = &image_view,
.width = extent.width,
.height = extent.height,
.layers = 1,
});
}
void VKBlitScreen::CreateStaticResources() { void VKBlitScreen::CreateStaticResources() {
CreateShaders(); CreateShaders();
CreateSemaphores(); CreateSemaphores();
@ -752,15 +778,13 @@ void VKBlitScreen::UpdateDescriptorSet(std::size_t image_index, VkImageView imag
device.GetLogical().UpdateDescriptorSets(std::array{ubo_write, sampler_write}, {}); device.GetLogical().UpdateDescriptorSets(std::array{ubo_write, sampler_write}, {});
} }
void VKBlitScreen::SetUniformData(BufferData& data, void VKBlitScreen::SetUniformData(BufferData& data, const Layout::FramebufferLayout layout) const {
const Tegra::FramebufferConfig& framebuffer) const {
const auto& layout = render_window.GetFramebufferLayout();
data.uniform.modelview_matrix = data.uniform.modelview_matrix =
MakeOrthographicMatrix(static_cast<f32>(layout.width), static_cast<f32>(layout.height)); MakeOrthographicMatrix(static_cast<f32>(layout.width), static_cast<f32>(layout.height));
} }
void VKBlitScreen::SetVertexData(BufferData& data, void VKBlitScreen::SetVertexData(BufferData& data, const Tegra::FramebufferConfig& framebuffer,
const Tegra::FramebufferConfig& framebuffer) const { const Layout::FramebufferLayout layout) const {
const auto& framebuffer_transform_flags = framebuffer.transform_flags; const auto& framebuffer_transform_flags = framebuffer.transform_flags;
const auto& framebuffer_crop_rect = framebuffer.crop_rect; const auto& framebuffer_crop_rect = framebuffer.crop_rect;
@ -798,7 +822,7 @@ void VKBlitScreen::SetVertexData(BufferData& data,
static_cast<f32>(screen_info.height); static_cast<f32>(screen_info.height);
} }
const auto& screen = render_window.GetFramebufferLayout().screen; const auto& screen = layout.screen;
const auto x = static_cast<f32>(screen.left); const auto x = static_cast<f32>(screen.left);
const auto y = static_cast<f32>(screen.top); const auto y = static_cast<f32>(screen.top);
const auto w = static_cast<f32>(screen.GetWidth()); const auto w = static_cast<f32>(screen.GetWidth());

View file

@ -56,8 +56,16 @@ public:
void Recreate(); void Recreate();
[[nodiscard]] VkSemaphore Draw(const Tegra::FramebufferConfig& framebuffer, [[nodiscard]] VkSemaphore Draw(const Tegra::FramebufferConfig& framebuffer,
const VkFramebuffer& host_framebuffer,
const Layout::FramebufferLayout layout, VkExtent2D render_area,
bool use_accelerated); bool use_accelerated);
[[nodiscard]] VkSemaphore DrawToSwapchain(const Tegra::FramebufferConfig& framebuffer,
bool use_accelerated);
[[nodiscard]] vk::Framebuffer CreateFramebuffer(const VkImageView& image_view,
VkExtent2D extent);
private: private:
struct BufferData; struct BufferData;
@ -81,8 +89,9 @@ private:
void CreateRawImages(const Tegra::FramebufferConfig& framebuffer); void CreateRawImages(const Tegra::FramebufferConfig& framebuffer);
void UpdateDescriptorSet(std::size_t image_index, VkImageView image_view) const; void UpdateDescriptorSet(std::size_t image_index, VkImageView image_view) const;
void SetUniformData(BufferData& data, const Tegra::FramebufferConfig& framebuffer) const; void SetUniformData(BufferData& data, const Layout::FramebufferLayout layout) const;
void SetVertexData(BufferData& data, const Tegra::FramebufferConfig& framebuffer) const; void SetVertexData(BufferData& data, const Tegra::FramebufferConfig& framebuffer,
const Layout::FramebufferLayout layout) const;
u64 CalculateBufferSize(const Tegra::FramebufferConfig& framebuffer) const; u64 CalculateBufferSize(const Tegra::FramebufferConfig& framebuffer) const;
u64 GetRawImageOffset(const Tegra::FramebufferConfig& framebuffer, u64 GetRawImageOffset(const Tegra::FramebufferConfig& framebuffer,

View file

@ -634,9 +634,9 @@ void GRenderWindow::CaptureScreenshot(u32 res_scale, const QString& screenshot_p
screenshot_image = QImage(QSize(layout.width, layout.height), QImage::Format_RGB32); screenshot_image = QImage(QSize(layout.width, layout.height), QImage::Format_RGB32);
renderer.RequestScreenshot( renderer.RequestScreenshot(
screenshot_image.bits(), screenshot_image.bits(),
[=, this] { [=, this](bool invert_y) {
const std::string std_screenshot_path = screenshot_path.toStdString(); const std::string std_screenshot_path = screenshot_path.toStdString();
if (screenshot_image.mirrored(false, true).save(screenshot_path)) { if (screenshot_image.mirrored(false, invert_y).save(screenshot_path)) {
LOG_INFO(Frontend, "Screenshot saved to \"{}\"", std_screenshot_path); LOG_INFO(Frontend, "Screenshot saved to \"{}\"", std_screenshot_path);
} else { } else {
LOG_ERROR(Frontend, "Failed to save screenshot to \"{}\"", std_screenshot_path); LOG_ERROR(Frontend, "Failed to save screenshot to \"{}\"", std_screenshot_path);