early-access version 1936
This commit is contained in:
parent
b6ccebf4de
commit
6ee725cc08
12 changed files with 443 additions and 77 deletions
|
@ -496,7 +496,7 @@ endif()
|
||||||
# Ensure libusb is properly configured (based on dolphin libusb include)
|
# Ensure libusb is properly configured (based on dolphin libusb include)
|
||||||
if(NOT APPLE AND NOT YUZU_USE_BUNDLED_LIBUSB)
|
if(NOT APPLE AND NOT YUZU_USE_BUNDLED_LIBUSB)
|
||||||
include(FindPkgConfig)
|
include(FindPkgConfig)
|
||||||
if (PKG_CONFIG_FOUND)
|
if (PKG_CONFIG_FOUND AND NOT CMAKE_SYSTEM_NAME MATCHES "DragonFly|FreeBSD")
|
||||||
pkg_check_modules(LIBUSB QUIET libusb-1.0>=1.0.24)
|
pkg_check_modules(LIBUSB QUIET libusb-1.0>=1.0.24)
|
||||||
else()
|
else()
|
||||||
find_package(LibUSB)
|
find_package(LibUSB)
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
yuzu emulator early access
|
yuzu emulator early access
|
||||||
=============
|
=============
|
||||||
|
|
||||||
This is the source code for early-access 1935.
|
This is the source code for early-access 1936.
|
||||||
|
|
||||||
## Legal Notice
|
## Legal Notice
|
||||||
|
|
||||||
|
|
|
@ -57,6 +57,7 @@ public:
|
||||||
|
|
||||||
[[nodiscard]] IR::Inst* Inst() const;
|
[[nodiscard]] IR::Inst* Inst() const;
|
||||||
[[nodiscard]] IR::Inst* InstRecursive() const;
|
[[nodiscard]] IR::Inst* InstRecursive() const;
|
||||||
|
[[nodiscard]] IR::Inst* TryInstRecursive() const;
|
||||||
[[nodiscard]] IR::Value Resolve() const;
|
[[nodiscard]] IR::Value Resolve() const;
|
||||||
[[nodiscard]] IR::Reg Reg() const;
|
[[nodiscard]] IR::Reg Reg() const;
|
||||||
[[nodiscard]] IR::Pred Pred() const;
|
[[nodiscard]] IR::Pred Pred() const;
|
||||||
|
@ -308,6 +309,13 @@ inline IR::Inst* Value::InstRecursive() const {
|
||||||
return inst;
|
return inst;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
inline IR::Inst* Value::TryInstRecursive() const {
|
||||||
|
if (IsIdentity()) {
|
||||||
|
return inst->Arg(0).TryInstRecursive();
|
||||||
|
}
|
||||||
|
return type == Type::Opaque ? inst : nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
inline IR::Value Value::Resolve() const {
|
inline IR::Value Value::Resolve() const {
|
||||||
if (IsIdentity()) {
|
if (IsIdentity()) {
|
||||||
return inst->Arg(0).Resolve();
|
return inst->Arg(0).Resolve();
|
||||||
|
|
|
@ -3,6 +3,7 @@
|
||||||
// Refer to the license.txt file included.
|
// Refer to the license.txt file included.
|
||||||
|
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
|
#include <functional>
|
||||||
#include <tuple>
|
#include <tuple>
|
||||||
#include <type_traits>
|
#include <type_traits>
|
||||||
|
|
||||||
|
@ -88,6 +89,26 @@ bool FoldWhenAllImmediates(IR::Inst& inst, Func&& func) {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Return true when all values in a range are equal
|
||||||
|
template <typename Range>
|
||||||
|
bool AreEqual(const Range& range) {
|
||||||
|
auto resolver{[](const auto& value) { return value.Resolve(); }};
|
||||||
|
auto equal{[](const IR::Value& lhs, const IR::Value& rhs) {
|
||||||
|
if (lhs == rhs) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
// Not equal, but try to match if they read the same constant buffer
|
||||||
|
if (!lhs.IsImmediate() && !rhs.IsImmediate() &&
|
||||||
|
lhs.Inst()->GetOpcode() == IR::Opcode::GetCbufU32 &&
|
||||||
|
rhs.Inst()->GetOpcode() == IR::Opcode::GetCbufU32 &&
|
||||||
|
lhs.Inst()->Arg(0) == rhs.Inst()->Arg(0) && lhs.Inst()->Arg(1) == rhs.Inst()->Arg(1)) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}};
|
||||||
|
return std::ranges::adjacent_find(range, std::not_fn(equal), resolver) == std::end(range);
|
||||||
|
}
|
||||||
|
|
||||||
void FoldGetRegister(IR::Inst& inst) {
|
void FoldGetRegister(IR::Inst& inst) {
|
||||||
if (inst.Arg(0).Reg() == IR::Reg::RZ) {
|
if (inst.Arg(0).Reg() == IR::Reg::RZ) {
|
||||||
inst.ReplaceUsesWith(IR::Value{u32{0}});
|
inst.ReplaceUsesWith(IR::Value{u32{0}});
|
||||||
|
@ -100,6 +121,157 @@ void FoldGetPred(IR::Inst& inst) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Replaces the XMAD pattern generated by an integer FMA
|
||||||
|
bool FoldXmadMultiplyAdd(IR::Block& block, IR::Inst& inst) {
|
||||||
|
/*
|
||||||
|
* We are looking for this specific pattern:
|
||||||
|
* %6 = BitFieldUExtract %op_b, #0, #16
|
||||||
|
* %7 = BitFieldUExtract %op_a', #16, #16
|
||||||
|
* %8 = IMul32 %6, %7
|
||||||
|
* %10 = BitFieldUExtract %op_a', #0, #16
|
||||||
|
* %11 = BitFieldInsert %8, %10, #16, #16
|
||||||
|
* %15 = BitFieldUExtract %op_b, #0, #16
|
||||||
|
* %16 = BitFieldUExtract %op_a, #0, #16
|
||||||
|
* %17 = IMul32 %15, %16
|
||||||
|
* %18 = IAdd32 %17, %op_c
|
||||||
|
* %22 = BitFieldUExtract %op_b, #16, #16
|
||||||
|
* %23 = BitFieldUExtract %11, #16, #16
|
||||||
|
* %24 = IMul32 %22, %23
|
||||||
|
* %25 = ShiftLeftLogical32 %24, #16
|
||||||
|
* %26 = ShiftLeftLogical32 %11, #16
|
||||||
|
* %27 = IAdd32 %26, %18
|
||||||
|
* %result = IAdd32 %25, %27
|
||||||
|
*
|
||||||
|
* And replace it with:
|
||||||
|
* %temp = IMul32 %op_a, %op_b
|
||||||
|
* %result = IAdd32 %temp, %op_c
|
||||||
|
*
|
||||||
|
* This optimization has been proven safe by Nvidia's compiler logic being reversed.
|
||||||
|
* (If Nvidia generates this code from 'fma(a, b, c)', we can do the same in the reverse order.)
|
||||||
|
*/
|
||||||
|
const IR::Value zero{0u};
|
||||||
|
const IR::Value sixteen{16u};
|
||||||
|
IR::Inst* const _25{inst.Arg(0).TryInstRecursive()};
|
||||||
|
IR::Inst* const _27{inst.Arg(1).TryInstRecursive()};
|
||||||
|
if (!_25 || !_27) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
if (_27->GetOpcode() != IR::Opcode::IAdd32) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
if (_25->GetOpcode() != IR::Opcode::ShiftLeftLogical32 || _25->Arg(1) != sixteen) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
IR::Inst* const _24{_25->Arg(0).TryInstRecursive()};
|
||||||
|
if (!_24 || _24->GetOpcode() != IR::Opcode::IMul32) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
IR::Inst* const _22{_24->Arg(0).TryInstRecursive()};
|
||||||
|
IR::Inst* const _23{_24->Arg(1).TryInstRecursive()};
|
||||||
|
if (!_22 || !_23) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
if (_22->GetOpcode() != IR::Opcode::BitFieldUExtract) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
if (_23->GetOpcode() != IR::Opcode::BitFieldUExtract) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
if (_22->Arg(1) != sixteen || _22->Arg(2) != sixteen) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
if (_23->Arg(1) != sixteen || _23->Arg(2) != sixteen) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
IR::Inst* const _11{_23->Arg(0).TryInstRecursive()};
|
||||||
|
if (!_11 || _11->GetOpcode() != IR::Opcode::BitFieldInsert) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
if (_11->Arg(2) != sixteen || _11->Arg(3) != sixteen) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
IR::Inst* const _8{_11->Arg(0).TryInstRecursive()};
|
||||||
|
IR::Inst* const _10{_11->Arg(1).TryInstRecursive()};
|
||||||
|
if (!_8 || !_10) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
if (_8->GetOpcode() != IR::Opcode::IMul32) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
if (_10->GetOpcode() != IR::Opcode::BitFieldUExtract) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
IR::Inst* const _6{_8->Arg(0).TryInstRecursive()};
|
||||||
|
IR::Inst* const _7{_8->Arg(1).TryInstRecursive()};
|
||||||
|
if (!_6 || !_7) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
if (_6->GetOpcode() != IR::Opcode::BitFieldUExtract) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
if (_7->GetOpcode() != IR::Opcode::BitFieldUExtract) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
if (_6->Arg(1) != zero || _6->Arg(2) != sixteen) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
if (_7->Arg(1) != sixteen || _7->Arg(2) != sixteen) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
IR::Inst* const _26{_27->Arg(0).TryInstRecursive()};
|
||||||
|
IR::Inst* const _18{_27->Arg(1).TryInstRecursive()};
|
||||||
|
if (!_26 || !_18) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
if (_26->GetOpcode() != IR::Opcode::ShiftLeftLogical32 || _26->Arg(1) != sixteen) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
if (_26->Arg(0).InstRecursive() != _11) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
if (_18->GetOpcode() != IR::Opcode::IAdd32) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
IR::Inst* const _17{_18->Arg(0).TryInstRecursive()};
|
||||||
|
if (!_17 || _17->GetOpcode() != IR::Opcode::IMul32) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
IR::Inst* const _15{_17->Arg(0).TryInstRecursive()};
|
||||||
|
IR::Inst* const _16{_17->Arg(1).TryInstRecursive()};
|
||||||
|
if (!_15 || !_16) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
if (_15->GetOpcode() != IR::Opcode::BitFieldUExtract) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
if (_16->GetOpcode() != IR::Opcode::BitFieldUExtract) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
if (_15->Arg(1) != zero || _16->Arg(1) != zero || _10->Arg(1) != zero) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
if (_15->Arg(2) != sixteen || _16->Arg(2) != sixteen || _10->Arg(2) != sixteen) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
const std::array<IR::Value, 3> op_as{
|
||||||
|
_7->Arg(0).Resolve(),
|
||||||
|
_16->Arg(0).Resolve(),
|
||||||
|
_10->Arg(0).Resolve(),
|
||||||
|
};
|
||||||
|
const std::array<IR::Value, 3> op_bs{
|
||||||
|
_22->Arg(0).Resolve(),
|
||||||
|
_6->Arg(0).Resolve(),
|
||||||
|
_15->Arg(0).Resolve(),
|
||||||
|
};
|
||||||
|
const IR::U32 op_c{_18->Arg(1)};
|
||||||
|
if (!AreEqual(op_as) || !AreEqual(op_bs)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
|
||||||
|
inst.ReplaceUsesWith(ir.IAdd(ir.IMul(IR::U32{op_as[0]}, IR::U32{op_bs[1]}), op_c));
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
/// Replaces the pattern generated by two XMAD multiplications
|
/// Replaces the pattern generated by two XMAD multiplications
|
||||||
bool FoldXmadMultiply(IR::Block& block, IR::Inst& inst) {
|
bool FoldXmadMultiply(IR::Block& block, IR::Inst& inst) {
|
||||||
/*
|
/*
|
||||||
|
@ -116,33 +288,31 @@ bool FoldXmadMultiply(IR::Block& block, IR::Inst& inst) {
|
||||||
*
|
*
|
||||||
* This optimization has been proven safe by LLVM and MSVC.
|
* This optimization has been proven safe by LLVM and MSVC.
|
||||||
*/
|
*/
|
||||||
const IR::Value lhs_arg{inst.Arg(0)};
|
IR::Inst* const lhs_shl{inst.Arg(0).TryInstRecursive()};
|
||||||
const IR::Value rhs_arg{inst.Arg(1)};
|
IR::Inst* const rhs_mul{inst.Arg(1).TryInstRecursive()};
|
||||||
if (lhs_arg.IsImmediate() || rhs_arg.IsImmediate()) {
|
if (!lhs_shl || !rhs_mul) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
IR::Inst* const lhs_shl{lhs_arg.InstRecursive()};
|
|
||||||
if (lhs_shl->GetOpcode() != IR::Opcode::ShiftLeftLogical32 ||
|
if (lhs_shl->GetOpcode() != IR::Opcode::ShiftLeftLogical32 ||
|
||||||
lhs_shl->Arg(1) != IR::Value{16U}) {
|
lhs_shl->Arg(1) != IR::Value{16U}) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
if (lhs_shl->Arg(0).IsImmediate()) {
|
IR::Inst* const lhs_mul{lhs_shl->Arg(0).TryInstRecursive()};
|
||||||
|
if (!lhs_mul) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
IR::Inst* const lhs_mul{lhs_shl->Arg(0).InstRecursive()};
|
|
||||||
IR::Inst* const rhs_mul{rhs_arg.InstRecursive()};
|
|
||||||
if (lhs_mul->GetOpcode() != IR::Opcode::IMul32 || rhs_mul->GetOpcode() != IR::Opcode::IMul32) {
|
if (lhs_mul->GetOpcode() != IR::Opcode::IMul32 || rhs_mul->GetOpcode() != IR::Opcode::IMul32) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
if (lhs_mul->Arg(1).Resolve() != rhs_mul->Arg(1).Resolve()) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
const IR::U32 factor_b{lhs_mul->Arg(1)};
|
const IR::U32 factor_b{lhs_mul->Arg(1)};
|
||||||
if (lhs_mul->Arg(0).IsImmediate() || rhs_mul->Arg(0).IsImmediate()) {
|
if (factor_b.Resolve() != rhs_mul->Arg(1).Resolve()) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
IR::Inst* const lhs_bfe{lhs_mul->Arg(0).TryInstRecursive()};
|
||||||
|
IR::Inst* const rhs_bfe{rhs_mul->Arg(0).TryInstRecursive()};
|
||||||
|
if (!lhs_bfe || !rhs_bfe) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
IR::Inst* const lhs_bfe{lhs_mul->Arg(0).InstRecursive()};
|
|
||||||
IR::Inst* const rhs_bfe{rhs_mul->Arg(0).InstRecursive()};
|
|
||||||
if (lhs_bfe->GetOpcode() != IR::Opcode::BitFieldUExtract) {
|
if (lhs_bfe->GetOpcode() != IR::Opcode::BitFieldUExtract) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
@ -155,10 +325,10 @@ bool FoldXmadMultiply(IR::Block& block, IR::Inst& inst) {
|
||||||
if (rhs_bfe->Arg(1) != IR::Value{0U} || rhs_bfe->Arg(2) != IR::Value{16U}) {
|
if (rhs_bfe->Arg(1) != IR::Value{0U} || rhs_bfe->Arg(2) != IR::Value{16U}) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
if (lhs_bfe->Arg(0).Resolve() != rhs_bfe->Arg(0).Resolve()) {
|
const IR::U32 factor_a{lhs_bfe->Arg(0)};
|
||||||
|
if (factor_a.Resolve() != rhs_bfe->Arg(0).Resolve()) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
const IR::U32 factor_a{lhs_bfe->Arg(0)};
|
|
||||||
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
|
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
|
||||||
inst.ReplaceUsesWith(ir.IMul(factor_a, factor_b));
|
inst.ReplaceUsesWith(ir.IMul(factor_a, factor_b));
|
||||||
return true;
|
return true;
|
||||||
|
@ -181,6 +351,9 @@ void FoldAdd(IR::Block& block, IR::Inst& inst) {
|
||||||
if (FoldXmadMultiply(block, inst)) {
|
if (FoldXmadMultiply(block, inst)) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
if (FoldXmadMultiplyAdd(block, inst)) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -27,7 +27,7 @@ void RendererBase::UpdateCurrentFramebufferLayout() {
|
||||||
render_window.UpdateCurrentFramebufferLayout(layout.width, layout.height);
|
render_window.UpdateCurrentFramebufferLayout(layout.width, layout.height);
|
||||||
}
|
}
|
||||||
|
|
||||||
void RendererBase::RequestScreenshot(void* data, std::function<void()> callback,
|
void RendererBase::RequestScreenshot(void* data, std::function<void(bool)> callback,
|
||||||
const Layout::FramebufferLayout& layout) {
|
const Layout::FramebufferLayout& layout) {
|
||||||
if (renderer_settings.screenshot_requested) {
|
if (renderer_settings.screenshot_requested) {
|
||||||
LOG_ERROR(Render, "A screenshot is already requested or in progress, ignoring the request");
|
LOG_ERROR(Render, "A screenshot is already requested or in progress, ignoring the request");
|
||||||
|
|
|
@ -24,7 +24,7 @@ struct RendererSettings {
|
||||||
// Screenshot
|
// Screenshot
|
||||||
std::atomic<bool> screenshot_requested{false};
|
std::atomic<bool> screenshot_requested{false};
|
||||||
void* screenshot_bits{};
|
void* screenshot_bits{};
|
||||||
std::function<void()> screenshot_complete_callback;
|
std::function<void(bool)> screenshot_complete_callback;
|
||||||
Layout::FramebufferLayout screenshot_framebuffer_layout;
|
Layout::FramebufferLayout screenshot_framebuffer_layout;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -80,7 +80,7 @@ public:
|
||||||
void RefreshBaseSettings();
|
void RefreshBaseSettings();
|
||||||
|
|
||||||
/// Request a screenshot of the next frame
|
/// Request a screenshot of the next frame
|
||||||
void RequestScreenshot(void* data, std::function<void()> callback,
|
void RequestScreenshot(void* data, std::function<void(bool)> callback,
|
||||||
const Layout::FramebufferLayout& layout);
|
const Layout::FramebufferLayout& layout);
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
|
|
|
@ -486,7 +486,7 @@ void RendererOpenGL::RenderScreenshot() {
|
||||||
glBindFramebuffer(GL_READ_FRAMEBUFFER, old_read_fb);
|
glBindFramebuffer(GL_READ_FRAMEBUFFER, old_read_fb);
|
||||||
glBindFramebuffer(GL_DRAW_FRAMEBUFFER, old_draw_fb);
|
glBindFramebuffer(GL_DRAW_FRAMEBUFFER, old_draw_fb);
|
||||||
|
|
||||||
renderer_settings.screenshot_complete_callback();
|
renderer_settings.screenshot_complete_callback(true);
|
||||||
renderer_settings.screenshot_requested = false;
|
renderer_settings.screenshot_requested = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -138,6 +138,7 @@ void RendererVulkan::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
|
||||||
const bool use_accelerated =
|
const bool use_accelerated =
|
||||||
rasterizer.AccelerateDisplay(*framebuffer, framebuffer_addr, framebuffer->stride);
|
rasterizer.AccelerateDisplay(*framebuffer, framebuffer_addr, framebuffer->stride);
|
||||||
const bool is_srgb = use_accelerated && screen_info.is_srgb;
|
const bool is_srgb = use_accelerated && screen_info.is_srgb;
|
||||||
|
RenderScreenshot(*framebuffer, use_accelerated);
|
||||||
|
|
||||||
bool has_been_recreated = false;
|
bool has_been_recreated = false;
|
||||||
const auto recreate_swapchain = [&] {
|
const auto recreate_swapchain = [&] {
|
||||||
|
@ -162,7 +163,7 @@ void RendererVulkan::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
|
||||||
if (has_been_recreated) {
|
if (has_been_recreated) {
|
||||||
blit_screen.Recreate();
|
blit_screen.Recreate();
|
||||||
}
|
}
|
||||||
const VkSemaphore render_semaphore = blit_screen.Draw(*framebuffer, use_accelerated);
|
const VkSemaphore render_semaphore = blit_screen.DrawToSwapchain(*framebuffer, use_accelerated);
|
||||||
scheduler.Flush(render_semaphore);
|
scheduler.Flush(render_semaphore);
|
||||||
scheduler.WaitWorker();
|
scheduler.WaitWorker();
|
||||||
swapchain.Present(render_semaphore);
|
swapchain.Present(render_semaphore);
|
||||||
|
@ -193,4 +194,153 @@ void RendererVulkan::Report() const {
|
||||||
telemetry_session.AddField(field, "GPU_Vulkan_Extensions", extensions);
|
telemetry_session.AddField(field, "GPU_Vulkan_Extensions", extensions);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void Vulkan::RendererVulkan::RenderScreenshot(const Tegra::FramebufferConfig& framebuffer,
|
||||||
|
bool use_accelerated) {
|
||||||
|
if (!renderer_settings.screenshot_requested) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
const Layout::FramebufferLayout layout{renderer_settings.screenshot_framebuffer_layout};
|
||||||
|
vk::Image staging_image = device.GetLogical().CreateImage(VkImageCreateInfo{
|
||||||
|
.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
|
||||||
|
.pNext = nullptr,
|
||||||
|
.flags = VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT,
|
||||||
|
.imageType = VK_IMAGE_TYPE_2D,
|
||||||
|
.format = VK_FORMAT_B8G8R8A8_UNORM,
|
||||||
|
.extent =
|
||||||
|
{
|
||||||
|
.width = layout.width,
|
||||||
|
.height = layout.height,
|
||||||
|
.depth = 1,
|
||||||
|
},
|
||||||
|
.mipLevels = 1,
|
||||||
|
.arrayLayers = 1,
|
||||||
|
.samples = VK_SAMPLE_COUNT_1_BIT,
|
||||||
|
.tiling = VK_IMAGE_TILING_OPTIMAL,
|
||||||
|
.usage = VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT |
|
||||||
|
VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT,
|
||||||
|
.sharingMode = VK_SHARING_MODE_EXCLUSIVE,
|
||||||
|
.queueFamilyIndexCount = 0,
|
||||||
|
.pQueueFamilyIndices = nullptr,
|
||||||
|
.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED,
|
||||||
|
});
|
||||||
|
const auto image_commit = memory_allocator.Commit(staging_image, MemoryUsage::DeviceLocal);
|
||||||
|
|
||||||
|
const vk::ImageView dst_view = device.GetLogical().CreateImageView(VkImageViewCreateInfo{
|
||||||
|
.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
|
||||||
|
.pNext = nullptr,
|
||||||
|
.flags = 0,
|
||||||
|
.image = *staging_image,
|
||||||
|
.viewType = VK_IMAGE_VIEW_TYPE_2D,
|
||||||
|
.format = screen_info.is_srgb ? VK_FORMAT_B8G8R8A8_SRGB : VK_FORMAT_B8G8R8A8_UNORM,
|
||||||
|
.components{
|
||||||
|
.r = VK_COMPONENT_SWIZZLE_IDENTITY,
|
||||||
|
.g = VK_COMPONENT_SWIZZLE_IDENTITY,
|
||||||
|
.b = VK_COMPONENT_SWIZZLE_IDENTITY,
|
||||||
|
.a = VK_COMPONENT_SWIZZLE_IDENTITY,
|
||||||
|
},
|
||||||
|
.subresourceRange{
|
||||||
|
.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
|
||||||
|
.baseMipLevel = 0,
|
||||||
|
.levelCount = 1,
|
||||||
|
.baseArrayLayer = 0,
|
||||||
|
.layerCount = VK_REMAINING_ARRAY_LAYERS,
|
||||||
|
},
|
||||||
|
});
|
||||||
|
const VkExtent2D render_area{.width = layout.width, .height = layout.height};
|
||||||
|
const vk::Framebuffer screenshot_fb = blit_screen.CreateFramebuffer(*dst_view, render_area);
|
||||||
|
// Since we're not rendering to the screen, ignore the render semaphore.
|
||||||
|
void(blit_screen.Draw(framebuffer, *screenshot_fb, layout, render_area, use_accelerated));
|
||||||
|
|
||||||
|
const auto buffer_size = static_cast<VkDeviceSize>(layout.width * layout.height * 4);
|
||||||
|
const VkBufferCreateInfo dst_buffer_info{
|
||||||
|
.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
|
||||||
|
.pNext = nullptr,
|
||||||
|
.flags = 0,
|
||||||
|
.size = buffer_size,
|
||||||
|
.usage = VK_BUFFER_USAGE_TRANSFER_DST_BIT,
|
||||||
|
.sharingMode = VK_SHARING_MODE_EXCLUSIVE,
|
||||||
|
.queueFamilyIndexCount = 0,
|
||||||
|
.pQueueFamilyIndices = nullptr,
|
||||||
|
};
|
||||||
|
const vk::Buffer dst_buffer = device.GetLogical().CreateBuffer(dst_buffer_info);
|
||||||
|
MemoryCommit dst_buffer_memory = memory_allocator.Commit(dst_buffer, MemoryUsage::Download);
|
||||||
|
|
||||||
|
scheduler.RequestOutsideRenderPassOperationContext();
|
||||||
|
scheduler.Record([&](vk::CommandBuffer cmdbuf) {
|
||||||
|
const VkImageMemoryBarrier read_barrier{
|
||||||
|
.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
|
||||||
|
.pNext = nullptr,
|
||||||
|
.srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT,
|
||||||
|
.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT,
|
||||||
|
.oldLayout = VK_IMAGE_LAYOUT_PRESENT_SRC_KHR,
|
||||||
|
.newLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
|
||||||
|
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
||||||
|
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
||||||
|
.image = *staging_image,
|
||||||
|
.subresourceRange{
|
||||||
|
.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
|
||||||
|
.baseMipLevel = 0,
|
||||||
|
.levelCount = VK_REMAINING_MIP_LEVELS,
|
||||||
|
.baseArrayLayer = 0,
|
||||||
|
.layerCount = VK_REMAINING_ARRAY_LAYERS,
|
||||||
|
},
|
||||||
|
};
|
||||||
|
const VkImageMemoryBarrier image_write_barrier{
|
||||||
|
.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
|
||||||
|
.pNext = nullptr,
|
||||||
|
.srcAccessMask = 0,
|
||||||
|
.dstAccessMask = VK_ACCESS_MEMORY_WRITE_BIT,
|
||||||
|
.oldLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
|
||||||
|
.newLayout = VK_IMAGE_LAYOUT_GENERAL,
|
||||||
|
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
||||||
|
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
||||||
|
.image = *staging_image,
|
||||||
|
.subresourceRange{
|
||||||
|
.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
|
||||||
|
.baseMipLevel = 0,
|
||||||
|
.levelCount = VK_REMAINING_MIP_LEVELS,
|
||||||
|
.baseArrayLayer = 0,
|
||||||
|
.layerCount = VK_REMAINING_ARRAY_LAYERS,
|
||||||
|
},
|
||||||
|
};
|
||||||
|
static constexpr VkMemoryBarrier memory_write_barrier{
|
||||||
|
.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER,
|
||||||
|
.pNext = nullptr,
|
||||||
|
.srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT,
|
||||||
|
.dstAccessMask = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT,
|
||||||
|
};
|
||||||
|
const VkBufferImageCopy copy{
|
||||||
|
.bufferOffset = 0,
|
||||||
|
.bufferRowLength = 0,
|
||||||
|
.bufferImageHeight = 0,
|
||||||
|
.imageSubresource{
|
||||||
|
.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
|
||||||
|
.mipLevel = 0,
|
||||||
|
.baseArrayLayer = 0,
|
||||||
|
.layerCount = 1,
|
||||||
|
},
|
||||||
|
.imageOffset{.x = 0, .y = 0, .z = 0},
|
||||||
|
.imageExtent{
|
||||||
|
.width = layout.width,
|
||||||
|
.height = layout.height,
|
||||||
|
.depth = 1,
|
||||||
|
},
|
||||||
|
};
|
||||||
|
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT,
|
||||||
|
0, read_barrier);
|
||||||
|
cmdbuf.CopyImageToBuffer(*staging_image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, *dst_buffer,
|
||||||
|
copy);
|
||||||
|
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
|
||||||
|
0, memory_write_barrier, nullptr, image_write_barrier);
|
||||||
|
});
|
||||||
|
// Ensure the copy is fully completed before saving the screenshot
|
||||||
|
scheduler.Finish();
|
||||||
|
|
||||||
|
// Copy backing image data to the QImage screenshot buffer
|
||||||
|
const auto dst_memory_map = dst_buffer_memory.Map();
|
||||||
|
std::memcpy(renderer_settings.screenshot_bits, dst_memory_map.data(), dst_memory_map.size());
|
||||||
|
renderer_settings.screenshot_complete_callback(false);
|
||||||
|
renderer_settings.screenshot_requested = false;
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace Vulkan
|
} // namespace Vulkan
|
||||||
|
|
|
@ -54,6 +54,8 @@ public:
|
||||||
private:
|
private:
|
||||||
void Report() const;
|
void Report() const;
|
||||||
|
|
||||||
|
void RenderScreenshot(const Tegra::FramebufferConfig& framebuffer, bool use_accelerated);
|
||||||
|
|
||||||
Core::TelemetrySession& telemetry_session;
|
Core::TelemetrySession& telemetry_session;
|
||||||
Core::Memory::Memory& cpu_memory;
|
Core::Memory::Memory& cpu_memory;
|
||||||
Tegra::GPU& gpu;
|
Tegra::GPU& gpu;
|
||||||
|
|
|
@ -130,7 +130,10 @@ void VKBlitScreen::Recreate() {
|
||||||
CreateDynamicResources();
|
CreateDynamicResources();
|
||||||
}
|
}
|
||||||
|
|
||||||
VkSemaphore VKBlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer, bool use_accelerated) {
|
VkSemaphore VKBlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer,
|
||||||
|
const VkFramebuffer& host_framebuffer,
|
||||||
|
const Layout::FramebufferLayout layout, VkExtent2D render_area,
|
||||||
|
bool use_accelerated) {
|
||||||
RefreshResources(framebuffer);
|
RefreshResources(framebuffer);
|
||||||
|
|
||||||
// Finish any pending renderpass
|
// Finish any pending renderpass
|
||||||
|
@ -145,8 +148,8 @@ VkSemaphore VKBlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer, bool
|
||||||
use_accelerated ? screen_info.image_view : *raw_image_views[image_index]);
|
use_accelerated ? screen_info.image_view : *raw_image_views[image_index]);
|
||||||
|
|
||||||
BufferData data;
|
BufferData data;
|
||||||
SetUniformData(data, framebuffer);
|
SetUniformData(data, layout);
|
||||||
SetVertexData(data, framebuffer);
|
SetVertexData(data, framebuffer, layout);
|
||||||
|
|
||||||
const std::span<u8> mapped_span = buffer_commit.Map();
|
const std::span<u8> mapped_span = buffer_commit.Map();
|
||||||
std::memcpy(mapped_span.data(), &data, sizeof(data));
|
std::memcpy(mapped_span.data(), &data, sizeof(data));
|
||||||
|
@ -220,7 +223,8 @@ VkSemaphore VKBlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer, bool
|
||||||
VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, 0, write_barrier);
|
VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, 0, write_barrier);
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
scheduler.Record([this, image_index, size = swapchain.GetSize()](vk::CommandBuffer cmdbuf) {
|
scheduler.Record(
|
||||||
|
[this, host_framebuffer, image_index, size = render_area](vk::CommandBuffer cmdbuf) {
|
||||||
const f32 bg_red = Settings::values.bg_red.GetValue() / 255.0f;
|
const f32 bg_red = Settings::values.bg_red.GetValue() / 255.0f;
|
||||||
const f32 bg_green = Settings::values.bg_green.GetValue() / 255.0f;
|
const f32 bg_green = Settings::values.bg_green.GetValue() / 255.0f;
|
||||||
const f32 bg_blue = Settings::values.bg_blue.GetValue() / 255.0f;
|
const f32 bg_blue = Settings::values.bg_blue.GetValue() / 255.0f;
|
||||||
|
@ -231,7 +235,7 @@ VkSemaphore VKBlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer, bool
|
||||||
.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
|
.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
|
||||||
.pNext = nullptr,
|
.pNext = nullptr,
|
||||||
.renderPass = *renderpass,
|
.renderPass = *renderpass,
|
||||||
.framebuffer = *framebuffers[image_index],
|
.framebuffer = host_framebuffer,
|
||||||
.renderArea =
|
.renderArea =
|
||||||
{
|
{
|
||||||
.offset = {0, 0},
|
.offset = {0, 0},
|
||||||
|
@ -266,6 +270,28 @@ VkSemaphore VKBlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer, bool
|
||||||
return *semaphores[image_index];
|
return *semaphores[image_index];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
VkSemaphore VKBlitScreen::DrawToSwapchain(const Tegra::FramebufferConfig& framebuffer,
|
||||||
|
bool use_accelerated) {
|
||||||
|
const std::size_t image_index = swapchain.GetImageIndex();
|
||||||
|
const VkExtent2D render_area = swapchain.GetSize();
|
||||||
|
const Layout::FramebufferLayout layout = render_window.GetFramebufferLayout();
|
||||||
|
return Draw(framebuffer, *framebuffers[image_index], layout, render_area, use_accelerated);
|
||||||
|
}
|
||||||
|
|
||||||
|
vk::Framebuffer VKBlitScreen::CreateFramebuffer(const VkImageView& image_view, VkExtent2D extent) {
|
||||||
|
return device.GetLogical().CreateFramebuffer(VkFramebufferCreateInfo{
|
||||||
|
.sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO,
|
||||||
|
.pNext = nullptr,
|
||||||
|
.flags = 0,
|
||||||
|
.renderPass = *renderpass,
|
||||||
|
.attachmentCount = 1,
|
||||||
|
.pAttachments = &image_view,
|
||||||
|
.width = extent.width,
|
||||||
|
.height = extent.height,
|
||||||
|
.layers = 1,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
void VKBlitScreen::CreateStaticResources() {
|
void VKBlitScreen::CreateStaticResources() {
|
||||||
CreateShaders();
|
CreateShaders();
|
||||||
CreateSemaphores();
|
CreateSemaphores();
|
||||||
|
@ -752,15 +778,13 @@ void VKBlitScreen::UpdateDescriptorSet(std::size_t image_index, VkImageView imag
|
||||||
device.GetLogical().UpdateDescriptorSets(std::array{ubo_write, sampler_write}, {});
|
device.GetLogical().UpdateDescriptorSets(std::array{ubo_write, sampler_write}, {});
|
||||||
}
|
}
|
||||||
|
|
||||||
void VKBlitScreen::SetUniformData(BufferData& data,
|
void VKBlitScreen::SetUniformData(BufferData& data, const Layout::FramebufferLayout layout) const {
|
||||||
const Tegra::FramebufferConfig& framebuffer) const {
|
|
||||||
const auto& layout = render_window.GetFramebufferLayout();
|
|
||||||
data.uniform.modelview_matrix =
|
data.uniform.modelview_matrix =
|
||||||
MakeOrthographicMatrix(static_cast<f32>(layout.width), static_cast<f32>(layout.height));
|
MakeOrthographicMatrix(static_cast<f32>(layout.width), static_cast<f32>(layout.height));
|
||||||
}
|
}
|
||||||
|
|
||||||
void VKBlitScreen::SetVertexData(BufferData& data,
|
void VKBlitScreen::SetVertexData(BufferData& data, const Tegra::FramebufferConfig& framebuffer,
|
||||||
const Tegra::FramebufferConfig& framebuffer) const {
|
const Layout::FramebufferLayout layout) const {
|
||||||
const auto& framebuffer_transform_flags = framebuffer.transform_flags;
|
const auto& framebuffer_transform_flags = framebuffer.transform_flags;
|
||||||
const auto& framebuffer_crop_rect = framebuffer.crop_rect;
|
const auto& framebuffer_crop_rect = framebuffer.crop_rect;
|
||||||
|
|
||||||
|
@ -798,7 +822,7 @@ void VKBlitScreen::SetVertexData(BufferData& data,
|
||||||
static_cast<f32>(screen_info.height);
|
static_cast<f32>(screen_info.height);
|
||||||
}
|
}
|
||||||
|
|
||||||
const auto& screen = render_window.GetFramebufferLayout().screen;
|
const auto& screen = layout.screen;
|
||||||
const auto x = static_cast<f32>(screen.left);
|
const auto x = static_cast<f32>(screen.left);
|
||||||
const auto y = static_cast<f32>(screen.top);
|
const auto y = static_cast<f32>(screen.top);
|
||||||
const auto w = static_cast<f32>(screen.GetWidth());
|
const auto w = static_cast<f32>(screen.GetWidth());
|
||||||
|
|
|
@ -56,8 +56,16 @@ public:
|
||||||
void Recreate();
|
void Recreate();
|
||||||
|
|
||||||
[[nodiscard]] VkSemaphore Draw(const Tegra::FramebufferConfig& framebuffer,
|
[[nodiscard]] VkSemaphore Draw(const Tegra::FramebufferConfig& framebuffer,
|
||||||
|
const VkFramebuffer& host_framebuffer,
|
||||||
|
const Layout::FramebufferLayout layout, VkExtent2D render_area,
|
||||||
bool use_accelerated);
|
bool use_accelerated);
|
||||||
|
|
||||||
|
[[nodiscard]] VkSemaphore DrawToSwapchain(const Tegra::FramebufferConfig& framebuffer,
|
||||||
|
bool use_accelerated);
|
||||||
|
|
||||||
|
[[nodiscard]] vk::Framebuffer CreateFramebuffer(const VkImageView& image_view,
|
||||||
|
VkExtent2D extent);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
struct BufferData;
|
struct BufferData;
|
||||||
|
|
||||||
|
@ -81,8 +89,9 @@ private:
|
||||||
void CreateRawImages(const Tegra::FramebufferConfig& framebuffer);
|
void CreateRawImages(const Tegra::FramebufferConfig& framebuffer);
|
||||||
|
|
||||||
void UpdateDescriptorSet(std::size_t image_index, VkImageView image_view) const;
|
void UpdateDescriptorSet(std::size_t image_index, VkImageView image_view) const;
|
||||||
void SetUniformData(BufferData& data, const Tegra::FramebufferConfig& framebuffer) const;
|
void SetUniformData(BufferData& data, const Layout::FramebufferLayout layout) const;
|
||||||
void SetVertexData(BufferData& data, const Tegra::FramebufferConfig& framebuffer) const;
|
void SetVertexData(BufferData& data, const Tegra::FramebufferConfig& framebuffer,
|
||||||
|
const Layout::FramebufferLayout layout) const;
|
||||||
|
|
||||||
u64 CalculateBufferSize(const Tegra::FramebufferConfig& framebuffer) const;
|
u64 CalculateBufferSize(const Tegra::FramebufferConfig& framebuffer) const;
|
||||||
u64 GetRawImageOffset(const Tegra::FramebufferConfig& framebuffer,
|
u64 GetRawImageOffset(const Tegra::FramebufferConfig& framebuffer,
|
||||||
|
|
|
@ -634,9 +634,9 @@ void GRenderWindow::CaptureScreenshot(u32 res_scale, const QString& screenshot_p
|
||||||
screenshot_image = QImage(QSize(layout.width, layout.height), QImage::Format_RGB32);
|
screenshot_image = QImage(QSize(layout.width, layout.height), QImage::Format_RGB32);
|
||||||
renderer.RequestScreenshot(
|
renderer.RequestScreenshot(
|
||||||
screenshot_image.bits(),
|
screenshot_image.bits(),
|
||||||
[=, this] {
|
[=, this](bool invert_y) {
|
||||||
const std::string std_screenshot_path = screenshot_path.toStdString();
|
const std::string std_screenshot_path = screenshot_path.toStdString();
|
||||||
if (screenshot_image.mirrored(false, true).save(screenshot_path)) {
|
if (screenshot_image.mirrored(false, invert_y).save(screenshot_path)) {
|
||||||
LOG_INFO(Frontend, "Screenshot saved to \"{}\"", std_screenshot_path);
|
LOG_INFO(Frontend, "Screenshot saved to \"{}\"", std_screenshot_path);
|
||||||
} else {
|
} else {
|
||||||
LOG_ERROR(Frontend, "Failed to save screenshot to \"{}\"", std_screenshot_path);
|
LOG_ERROR(Frontend, "Failed to save screenshot to \"{}\"", std_screenshot_path);
|
||||||
|
|
Loading…
Reference in a new issue