early-access version 4029

This commit is contained in:
pineappleEA 2023-12-20 21:03:12 +01:00
parent 7bd2421ff9
commit 1ff2c37cf5
17 changed files with 89 additions and 20 deletions

View File

@ -305,7 +305,7 @@ find_package(ZLIB 1.2 REQUIRED)
find_package(zstd 1.5 REQUIRED)
if (NOT YUZU_USE_EXTERNAL_VULKAN_HEADERS)
find_package(Vulkan 1.3.256 REQUIRED)
find_package(Vulkan 1.3.274 REQUIRED)
endif()
if (ENABLE_LIBUSB)

View File

@ -1,7 +1,7 @@
yuzu emulator early access
=============
This is the source code for early-access 4028.
This is the source code for early-access 4029.
## Legal Notice

View File

@ -39,7 +39,7 @@ fpsimd_context* GetFloatingPointState(mcontext_t& host_ctx) {
}
using namespace Common::Literals;
constexpr u32 StackSize = 32_KiB;
constexpr u32 StackSize = 128_KiB;
} // namespace

View File

@ -5,8 +5,6 @@
#include "common/bit_cast.h"
#include "core/arm/nce/interpreter_visitor.h"
#include <dynarmic/frontend/A64/decoder/a64.h>
namespace Core {
template <u32 BitSize>
@ -249,6 +247,7 @@ bool InterpreterVisitor::LDR_lit_fpsimd(Imm<2> opc, Imm<19> imm19, Vec Vt) {
return false;
}
// Size in bytes
const u64 size = 4 << opc.ZeroExtend();
const u64 offset = imm19.SignExtend<u64>() << 2;
const u64 address = this->GetPc() + offset;
@ -530,7 +529,7 @@ bool InterpreterVisitor::SIMDImmediate(bool wback, bool postindex, size_t scale,
}
case MemOp::Load: {
u128 data{};
m_memory.ReadBlock(address, &data, datasize);
m_memory.ReadBlock(address, &data, datasize / 8);
this->SetVec(Vt, data);
break;
}

View File

@ -4,9 +4,15 @@
#pragma once
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wshadow"
#include <dynarmic/frontend/A64/a64_types.h>
#include <dynarmic/frontend/A64/decoder/a64.h>
#include <dynarmic/frontend/imm.h>
#pragma GCC diagnostic pop
namespace Core {
class VisitorBase {

View File

@ -74,9 +74,17 @@ spv::ImageFormat GetImageFormat(ImageFormat format) {
throw InvalidArgument("Invalid image format {}", format);
}
Id GetImageSampledType(EmitContext& ctx, const ImageDescriptor& desc) {
if (desc.is_float) {
return ctx.F32[1];
} else {
return ctx.U32[1];
}
}
Id ImageType(EmitContext& ctx, const ImageDescriptor& desc) {
const spv::ImageFormat format{GetImageFormat(desc.format)};
const Id type{ctx.U32[1]};
const Id type{GetImageSampledType(ctx, desc)};
switch (desc.type) {
case TextureType::Color1D:
return ctx.TypeImage(type, spv::Dim::Dim1D, false, false, false, 2, format);

View File

@ -512,7 +512,7 @@ OPCODE(ImageQueryDimensions, U32x4, Opaq
OPCODE(ImageQueryLod, F32x4, Opaque, Opaque, )
OPCODE(ImageGradient, F32x4, Opaque, Opaque, Opaque, Opaque, Opaque, )
OPCODE(ImageRead, U32x4, Opaque, Opaque, )
OPCODE(ImageWrite, Void, Opaque, Opaque, U32x4, )
OPCODE(ImageWrite, Void, Opaque, Opaque, Opaque, )
OPCODE(IsTextureScaled, U1, U32, )
OPCODE(IsImageScaled, U1, U32, )

View File

@ -19,8 +19,10 @@ struct HostTranslateInfo {
u32 min_ssbo_alignment{}; ///< Minimum alignment supported by the device for SSBOs
bool support_geometry_shader_passthrough{}; ///< True when the device supports geometry
///< passthrough shaders
bool support_conditional_barrier{}; ///< True when the device supports barriers in conditional
///< control flow
bool support_conditional_barrier{}; ///< True when the device supports barriers in conditional
///< control flow
bool support_ufloat_write_as_uint{}; ///< True when the device supports writing float images
///< as bitcasts to uint
};
} // namespace Shader

View File

@ -372,6 +372,10 @@ TexturePixelFormat ReadTexturePixelFormat(Environment& env, const ConstBufferAdd
return env.ReadTexturePixelFormat(GetTextureHandle(env, cbuf));
}
bool TexturePixelFormatIsFloat(Environment& env, const ConstBufferAddr& cbuf) {
return ReadTexturePixelFormat(env, cbuf) == TexturePixelFormat::B10G11R11_FLOAT;
}
class Descriptors {
public:
explicit Descriptors(TextureBufferDescriptors& texture_buffer_descriptors_,
@ -428,8 +432,9 @@ public:
return desc.type == existing.type && desc.format == existing.format &&
desc.cbuf_index == existing.cbuf_index &&
desc.cbuf_offset == existing.cbuf_offset && desc.count == existing.count &&
desc.size_shift == existing.size_shift;
desc.size_shift == existing.size_shift && desc.is_float == existing.is_float;
})};
// TODO: handle is_float?
image_descriptors[index].is_written |= desc.is_written;
image_descriptors[index].is_read |= desc.is_read;
return index;
@ -500,6 +505,19 @@ void PatchTexelFetch(IR::Block& block, IR::Inst& inst, TexturePixelFormat pixel_
ir.FPMul(ir.ConvertSToF(32, 32, ir.BitCast<IR::U32>(w)), max_value));
inst.ReplaceUsesWith(converted);
}
void PatchSmallFloatImageWrite(IR::Block& block, IR::Inst& inst) {
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
const IR::Value old_value{inst.Arg(2)};
const IR::F32 x(ir.BitCast<IR::F32>(IR::U32(ir.CompositeExtract(old_value, 0))));
const IR::F32 y(ir.BitCast<IR::F32>(IR::U32(ir.CompositeExtract(old_value, 1))));
const IR::F32 z(ir.BitCast<IR::F32>(IR::U32(ir.CompositeExtract(old_value, 2))));
const IR::F32 w(ir.BitCast<IR::F32>(IR::U32(ir.CompositeExtract(old_value, 3))));
const IR::Value converted = ir.CompositeConstruct(x, y, z, w);
inst.SetArg(2, converted);
}
} // Anonymous namespace
void TexturePass(Environment& env, IR::Program& program, const HostTranslateInfo& host_info) {
@ -531,6 +549,9 @@ void TexturePass(Environment& env, IR::Program& program, const HostTranslateInfo
inst->ReplaceOpcode(IndexedInstruction(*inst));
const auto& cbuf{texture_inst.cbuf};
const bool is_float_write{!host_info.support_ufloat_write_as_uint &&
inst->GetOpcode() == IR::Opcode::ImageWrite &&
TexturePixelFormatIsFloat(env, cbuf)};
auto flags{inst->Flags<IR::TextureInstInfo>()};
bool is_multisample{false};
switch (inst->GetOpcode()) {
@ -603,6 +624,7 @@ void TexturePass(Environment& env, IR::Program& program, const HostTranslateInfo
.format = flags.image_format,
.is_written = is_written,
.is_read = is_read,
.is_float = is_float_write,
.cbuf_index = cbuf.index,
.cbuf_offset = cbuf.offset,
.count = cbuf.count,
@ -662,6 +684,10 @@ void TexturePass(Environment& env, IR::Program& program, const HostTranslateInfo
PatchTexelFetch(*texture_inst.block, *texture_inst.inst, pixel_format);
}
}
if (is_float_write) {
PatchSmallFloatImageWrite(*texture_inst.block, *inst);
}
}
}

View File

@ -42,6 +42,7 @@ enum class TexturePixelFormat : u32 {
R16G16B16A16_SNORM,
R16G16_SNORM,
R16_SNORM,
B10G11R11_FLOAT,
OTHER
};
@ -129,6 +130,7 @@ struct ImageDescriptor {
ImageFormat format;
bool is_written;
bool is_read;
bool is_float;
u32 cbuf_index;
u32 cbuf_offset;
u32 count;

View File

@ -417,7 +417,7 @@ void RasterizerOpenGL::DispatchCompute() {
void RasterizerOpenGL::ResetCounter(VideoCommon::QueryType type) {
const auto query_cache_type = MaxwellToVideoCoreQuery(type);
if (!query_cache_type.has_value()) {
UNIMPLEMENTED_MSG("Reset query type: {}", type);
UNIMPLEMENTED_IF_MSG(type != VideoCommon::QueryType::Payload, "Reset query type: {}", type);
return;
}
query_cache.ResetCounter(*query_cache_type);

View File

@ -245,6 +245,7 @@ ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindo
.min_ssbo_alignment = static_cast<u32>(device.GetShaderStorageBufferAlignment()),
.support_geometry_shader_passthrough = device.HasGeometryShaderPassthrough(),
.support_conditional_barrier = device.SupportsConditionalBarriers(),
.support_ufloat_write_as_uint = true,
} {
if (use_asynchronous_shaders) {
workers = CreateWorkers();

View File

@ -78,8 +78,15 @@ vk::Buffer CreateBuffer(const Device& device, const MemoryAllocator& memory_allo
}
} // Anonymous namespace
Buffer::Buffer(BufferCacheRuntime&, VideoCommon::NullBufferParams null_params)
: VideoCommon::BufferBase<VideoCore::RasterizerInterface>(null_params), tracker{4096} {}
Buffer::Buffer(BufferCacheRuntime& runtime, VideoCommon::NullBufferParams null_params)
: VideoCommon::BufferBase<VideoCore::RasterizerInterface>(null_params), tracker{4096} {
if (runtime.device.HasNullDescriptor()) {
return;
}
device = &runtime.device;
buffer = runtime.CreateNullBuffer();
is_null = true;
}
Buffer::Buffer(BufferCacheRuntime& runtime, VideoCore::RasterizerInterface& rasterizer_,
VAddr cpu_addr_, u64 size_bytes_)
@ -93,8 +100,12 @@ Buffer::Buffer(BufferCacheRuntime& runtime, VideoCore::RasterizerInterface& rast
VkBufferView Buffer::View(u32 offset, u32 size, VideoCore::Surface::PixelFormat format) {
if (!device) {
// Null buffer, return a null descriptor
// Null buffer supported, return a null descriptor
return VK_NULL_HANDLE;
} else if (is_null) {
// Null buffer not supported, adjust offset and size
offset = 0;
size = 0;
}
const auto it{std::ranges::find_if(views, [offset, size, format](const BufferView& view) {
return offset == view.offset && size == view.size && format == view.format;
@ -627,9 +638,12 @@ void BufferCacheRuntime::BindTransformFeedbackBuffers(VideoCommon::HostBindings<
}
void BufferCacheRuntime::ReserveNullBuffer() {
if (null_buffer) {
return;
if (!null_buffer) {
null_buffer = CreateNullBuffer();
}
}
vk::Buffer BufferCacheRuntime::CreateNullBuffer() {
VkBufferCreateInfo create_info{
.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
.pNext = nullptr,
@ -644,15 +658,17 @@ void BufferCacheRuntime::ReserveNullBuffer() {
if (device.IsExtTransformFeedbackSupported()) {
create_info.usage |= VK_BUFFER_USAGE_TRANSFORM_FEEDBACK_BUFFER_BIT_EXT;
}
null_buffer = memory_allocator.CreateBuffer(create_info, MemoryUsage::DeviceLocal);
vk::Buffer ret = memory_allocator.CreateBuffer(create_info, MemoryUsage::DeviceLocal);
if (device.HasDebuggingToolAttached()) {
null_buffer.SetObjectNameEXT("Null buffer");
ret.SetObjectNameEXT("Null buffer");
}
scheduler.RequestOutsideRenderPassOperationContext();
scheduler.Record([buffer = *null_buffer](vk::CommandBuffer cmdbuf) {
scheduler.Record([buffer = *ret](vk::CommandBuffer cmdbuf) {
cmdbuf.FillBuffer(buffer, 0, VK_WHOLE_SIZE, 0);
});
return ret;
}
} // namespace Vulkan

View File

@ -63,6 +63,7 @@ private:
vk::Buffer buffer;
std::vector<BufferView> views;
VideoCommon::UsageTracker tracker;
bool is_null{};
};
class QuadArrayIndexBuffer;
@ -151,6 +152,7 @@ private:
}
void ReserveNullBuffer();
vk::Buffer CreateNullBuffer();
const Device& device;
MemoryAllocator& memory_allocator;

View File

@ -388,6 +388,9 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, const Device& device
.min_ssbo_alignment = static_cast<u32>(device.GetStorageBufferAlignment()),
.support_geometry_shader_passthrough = device.IsNvGeometryShaderPassthroughSupported(),
.support_conditional_barrier = device.SupportsConditionalBarriers(),
.support_ufloat_write_as_uint = driver_id != VK_DRIVER_ID_QUALCOMM_PROPRIETARY &&
driver_id != VK_DRIVER_ID_MESA_TURNIP &&
driver_id != VK_DRIVER_ID_ARM_PROPRIETARY,
};
if (device.GetMaxVertexInputAttributes() < Maxwell::NumVertexAttributes) {

View File

@ -76,6 +76,8 @@ static Shader::TexturePixelFormat ConvertTexturePixelFormat(const Tegra::Texture
return Shader::TexturePixelFormat::R16G16_SNORM;
case VideoCore::Surface::PixelFormat::R16_SNORM:
return Shader::TexturePixelFormat::R16_SNORM;
case VideoCore::Surface::PixelFormat::B10G11R11_FLOAT:
return Shader::TexturePixelFormat::B10G11R11_FLOAT;
default:
return Shader::TexturePixelFormat::OTHER;
}

View File

@ -377,6 +377,8 @@ const char* ToString(VkResult result) noexcept {
return "VK_OPERATION_DEFERRED_KHR";
case VkResult::VK_OPERATION_NOT_DEFERRED_KHR:
return "VK_OPERATION_NOT_DEFERRED_KHR";
case VkResult::VK_ERROR_INVALID_VIDEO_STD_PARAMETERS_KHR:
return "VK_ERROR_INVALID_VIDEO_STD_PARAMETERS_KHR";
case VkResult::VK_PIPELINE_COMPILE_REQUIRED_EXT:
return "VK_PIPELINE_COMPILE_REQUIRED_EXT";
case VkResult::VK_RESULT_MAX_ENUM: