Merge pull request #4967 from ReinUsesLisp/new-texcache
video_core/texture_cache: Rewrite the texture cache
This commit is contained in:
commit
d5fe722a30
165 changed files with 11189 additions and 8240 deletions
|
@ -25,6 +25,7 @@ add_library(video_core STATIC
|
|||
command_classes/vic.h
|
||||
compatible_formats.cpp
|
||||
compatible_formats.h
|
||||
delayed_destruction_ring.h
|
||||
dirty_flags.cpp
|
||||
dirty_flags.h
|
||||
dma_pusher.cpp
|
||||
|
@ -84,14 +85,10 @@ add_library(video_core STATIC
|
|||
renderer_opengl/gl_device.h
|
||||
renderer_opengl/gl_fence_manager.cpp
|
||||
renderer_opengl/gl_fence_manager.h
|
||||
renderer_opengl/gl_framebuffer_cache.cpp
|
||||
renderer_opengl/gl_framebuffer_cache.h
|
||||
renderer_opengl/gl_rasterizer.cpp
|
||||
renderer_opengl/gl_rasterizer.h
|
||||
renderer_opengl/gl_resource_manager.cpp
|
||||
renderer_opengl/gl_resource_manager.h
|
||||
renderer_opengl/gl_sampler_cache.cpp
|
||||
renderer_opengl/gl_sampler_cache.h
|
||||
renderer_opengl/gl_shader_cache.cpp
|
||||
renderer_opengl/gl_shader_cache.h
|
||||
renderer_opengl/gl_shader_decompiler.cpp
|
||||
|
@ -113,8 +110,10 @@ add_library(video_core STATIC
|
|||
renderer_opengl/maxwell_to_gl.h
|
||||
renderer_opengl/renderer_opengl.cpp
|
||||
renderer_opengl/renderer_opengl.h
|
||||
renderer_opengl/utils.cpp
|
||||
renderer_opengl/utils.h
|
||||
renderer_opengl/util_shaders.cpp
|
||||
renderer_opengl/util_shaders.h
|
||||
renderer_vulkan/blit_image.cpp
|
||||
renderer_vulkan/blit_image.h
|
||||
renderer_vulkan/fixed_pipeline_state.cpp
|
||||
renderer_vulkan/fixed_pipeline_state.h
|
||||
renderer_vulkan/maxwell_to_vk.cpp
|
||||
|
@ -141,8 +140,6 @@ add_library(video_core STATIC
|
|||
renderer_vulkan/vk_fence_manager.h
|
||||
renderer_vulkan/vk_graphics_pipeline.cpp
|
||||
renderer_vulkan/vk_graphics_pipeline.h
|
||||
renderer_vulkan/vk_image.cpp
|
||||
renderer_vulkan/vk_image.h
|
||||
renderer_vulkan/vk_master_semaphore.cpp
|
||||
renderer_vulkan/vk_master_semaphore.h
|
||||
renderer_vulkan/vk_memory_manager.cpp
|
||||
|
@ -153,12 +150,8 @@ add_library(video_core STATIC
|
|||
renderer_vulkan/vk_query_cache.h
|
||||
renderer_vulkan/vk_rasterizer.cpp
|
||||
renderer_vulkan/vk_rasterizer.h
|
||||
renderer_vulkan/vk_renderpass_cache.cpp
|
||||
renderer_vulkan/vk_renderpass_cache.h
|
||||
renderer_vulkan/vk_resource_pool.cpp
|
||||
renderer_vulkan/vk_resource_pool.h
|
||||
renderer_vulkan/vk_sampler_cache.cpp
|
||||
renderer_vulkan/vk_sampler_cache.h
|
||||
renderer_vulkan/vk_scheduler.cpp
|
||||
renderer_vulkan/vk_scheduler.h
|
||||
renderer_vulkan/vk_shader_decompiler.cpp
|
||||
|
@ -179,8 +172,6 @@ add_library(video_core STATIC
|
|||
renderer_vulkan/vk_update_descriptor.h
|
||||
renderer_vulkan/wrapper.cpp
|
||||
renderer_vulkan/wrapper.h
|
||||
sampler_cache.cpp
|
||||
sampler_cache.h
|
||||
shader_cache.h
|
||||
shader_notify.cpp
|
||||
shader_notify.h
|
||||
|
@ -237,19 +228,32 @@ add_library(video_core STATIC
|
|||
shader/transform_feedback.h
|
||||
surface.cpp
|
||||
surface.h
|
||||
texture_cache/accelerated_swizzle.cpp
|
||||
texture_cache/accelerated_swizzle.h
|
||||
texture_cache/decode_bc4.cpp
|
||||
texture_cache/decode_bc4.h
|
||||
texture_cache/descriptor_table.h
|
||||
texture_cache/formatter.cpp
|
||||
texture_cache/formatter.h
|
||||
texture_cache/format_lookup_table.cpp
|
||||
texture_cache/format_lookup_table.h
|
||||
texture_cache/surface_base.cpp
|
||||
texture_cache/surface_base.h
|
||||
texture_cache/surface_params.cpp
|
||||
texture_cache/surface_params.h
|
||||
texture_cache/surface_view.cpp
|
||||
texture_cache/surface_view.h
|
||||
texture_cache/image_base.cpp
|
||||
texture_cache/image_base.h
|
||||
texture_cache/image_info.cpp
|
||||
texture_cache/image_info.h
|
||||
texture_cache/image_view_base.cpp
|
||||
texture_cache/image_view_base.h
|
||||
texture_cache/image_view_info.cpp
|
||||
texture_cache/image_view_info.h
|
||||
texture_cache/render_targets.h
|
||||
texture_cache/samples_helper.h
|
||||
texture_cache/slot_vector.h
|
||||
texture_cache/texture_cache.h
|
||||
texture_cache/types.h
|
||||
texture_cache/util.cpp
|
||||
texture_cache/util.h
|
||||
textures/astc.cpp
|
||||
textures/astc.h
|
||||
textures/convert.cpp
|
||||
textures/convert.h
|
||||
textures/decoders.cpp
|
||||
textures/decoders.h
|
||||
textures/texture.cpp
|
||||
|
|
|
@ -118,20 +118,17 @@ public:
|
|||
/// Prepares the buffer cache for data uploading
|
||||
/// @param max_size Maximum number of bytes that will be uploaded
|
||||
/// @return True when a stream buffer invalidation was required, false otherwise
|
||||
bool Map(std::size_t max_size) {
|
||||
void Map(std::size_t max_size) {
|
||||
std::lock_guard lock{mutex};
|
||||
|
||||
bool invalidated;
|
||||
std::tie(buffer_ptr, buffer_offset_base, invalidated) = stream_buffer->Map(max_size, 4);
|
||||
std::tie(buffer_ptr, buffer_offset_base) = stream_buffer.Map(max_size, 4);
|
||||
buffer_offset = buffer_offset_base;
|
||||
|
||||
return invalidated;
|
||||
}
|
||||
|
||||
/// Finishes the upload stream
|
||||
void Unmap() {
|
||||
std::lock_guard lock{mutex};
|
||||
stream_buffer->Unmap(buffer_offset - buffer_offset_base);
|
||||
stream_buffer.Unmap(buffer_offset - buffer_offset_base);
|
||||
}
|
||||
|
||||
/// Function called at the end of each frame, inteded for deferred operations
|
||||
|
@ -261,9 +258,9 @@ public:
|
|||
protected:
|
||||
explicit BufferCache(VideoCore::RasterizerInterface& rasterizer_,
|
||||
Tegra::MemoryManager& gpu_memory_, Core::Memory::Memory& cpu_memory_,
|
||||
std::unique_ptr<StreamBuffer> stream_buffer_)
|
||||
StreamBuffer& stream_buffer_)
|
||||
: rasterizer{rasterizer_}, gpu_memory{gpu_memory_}, cpu_memory{cpu_memory_},
|
||||
stream_buffer{std::move(stream_buffer_)}, stream_buffer_handle{stream_buffer->Handle()} {}
|
||||
stream_buffer{stream_buffer_} {}
|
||||
|
||||
~BufferCache() = default;
|
||||
|
||||
|
@ -441,7 +438,7 @@ private:
|
|||
|
||||
buffer_ptr += size;
|
||||
buffer_offset += size;
|
||||
return BufferInfo{stream_buffer->Handle(), uploaded_offset, stream_buffer->Address()};
|
||||
return BufferInfo{stream_buffer.Handle(), uploaded_offset, stream_buffer.Address()};
|
||||
}
|
||||
|
||||
void AlignBuffer(std::size_t alignment) {
|
||||
|
@ -567,9 +564,7 @@ private:
|
|||
VideoCore::RasterizerInterface& rasterizer;
|
||||
Tegra::MemoryManager& gpu_memory;
|
||||
Core::Memory::Memory& cpu_memory;
|
||||
|
||||
std::unique_ptr<StreamBuffer> stream_buffer;
|
||||
BufferType stream_buffer_handle;
|
||||
StreamBuffer& stream_buffer;
|
||||
|
||||
u8* buffer_ptr = nullptr;
|
||||
u64 buffer_offset = 0;
|
||||
|
|
|
@ -9,7 +9,7 @@
|
|||
#include "video_core/engines/maxwell_3d.h"
|
||||
#include "video_core/gpu.h"
|
||||
#include "video_core/memory_manager.h"
|
||||
#include "video_core/texture_cache/surface_params.h"
|
||||
#include "video_core/textures/decoders.h"
|
||||
|
||||
extern "C" {
|
||||
#include <libswscale/swscale.h>
|
||||
|
@ -105,9 +105,9 @@ void Vic::Execute() {
|
|||
const auto size = Tegra::Texture::CalculateSize(true, 4, frame->width, frame->height, 1,
|
||||
block_height, 0);
|
||||
std::vector<u8> swizzled_data(size);
|
||||
Tegra::Texture::CopySwizzledData(frame->width, frame->height, 1, 4, 4,
|
||||
swizzled_data.data(), converted_frame_buffer.get(),
|
||||
false, block_height, 0, 1);
|
||||
Tegra::Texture::SwizzleSubrect(frame->width, frame->height, frame->width * 4,
|
||||
frame->width, 4, swizzled_data.data(),
|
||||
converted_frame_buffer.get(), block_height, 0, 0);
|
||||
|
||||
gpu.MemoryManager().WriteBlock(output_surface_luma_address, swizzled_data.data(), size);
|
||||
gpu.Maxwell3D().OnMemoryWrite();
|
||||
|
|
|
@ -3,9 +3,9 @@
|
|||
// Refer to the license.txt file included.
|
||||
|
||||
#include <array>
|
||||
#include <bitset>
|
||||
#include <cstddef>
|
||||
|
||||
#include "common/common_types.h"
|
||||
#include "video_core/compatible_formats.h"
|
||||
#include "video_core/surface.h"
|
||||
|
||||
|
@ -13,23 +13,25 @@ namespace VideoCore::Surface {
|
|||
|
||||
namespace {
|
||||
|
||||
using Table = std::array<std::array<u64, 2>, MaxPixelFormat>;
|
||||
|
||||
// Compatibility table taken from Table 3.X.2 in:
|
||||
// https://www.khronos.org/registry/OpenGL/extensions/ARB/ARB_texture_view.txt
|
||||
|
||||
constexpr std::array VIEW_CLASS_128_BITS = {
|
||||
constexpr std::array VIEW_CLASS_128_BITS{
|
||||
PixelFormat::R32G32B32A32_FLOAT,
|
||||
PixelFormat::R32G32B32A32_UINT,
|
||||
PixelFormat::R32G32B32A32_SINT,
|
||||
};
|
||||
|
||||
constexpr std::array VIEW_CLASS_96_BITS = {
|
||||
constexpr std::array VIEW_CLASS_96_BITS{
|
||||
PixelFormat::R32G32B32_FLOAT,
|
||||
};
|
||||
// Missing formats:
|
||||
// PixelFormat::RGB32UI,
|
||||
// PixelFormat::RGB32I,
|
||||
|
||||
constexpr std::array VIEW_CLASS_64_BITS = {
|
||||
constexpr std::array VIEW_CLASS_64_BITS{
|
||||
PixelFormat::R32G32_FLOAT, PixelFormat::R32G32_UINT,
|
||||
PixelFormat::R32G32_SINT, PixelFormat::R16G16B16A16_FLOAT,
|
||||
PixelFormat::R16G16B16A16_UNORM, PixelFormat::R16G16B16A16_SNORM,
|
||||
|
@ -38,7 +40,7 @@ constexpr std::array VIEW_CLASS_64_BITS = {
|
|||
|
||||
// TODO: How should we handle 48 bits?
|
||||
|
||||
constexpr std::array VIEW_CLASS_32_BITS = {
|
||||
constexpr std::array VIEW_CLASS_32_BITS{
|
||||
PixelFormat::R16G16_FLOAT, PixelFormat::B10G11R11_FLOAT, PixelFormat::R32_FLOAT,
|
||||
PixelFormat::A2B10G10R10_UNORM, PixelFormat::R16G16_UINT, PixelFormat::R32_UINT,
|
||||
PixelFormat::R16G16_SINT, PixelFormat::R32_SINT, PixelFormat::A8B8G8R8_UNORM,
|
||||
|
@ -50,43 +52,105 @@ constexpr std::array VIEW_CLASS_32_BITS = {
|
|||
|
||||
// TODO: How should we handle 24 bits?
|
||||
|
||||
constexpr std::array VIEW_CLASS_16_BITS = {
|
||||
constexpr std::array VIEW_CLASS_16_BITS{
|
||||
PixelFormat::R16_FLOAT, PixelFormat::R8G8_UINT, PixelFormat::R16_UINT,
|
||||
PixelFormat::R16_SINT, PixelFormat::R8G8_UNORM, PixelFormat::R16_UNORM,
|
||||
PixelFormat::R8G8_SNORM, PixelFormat::R16_SNORM, PixelFormat::R8G8_SINT,
|
||||
};
|
||||
|
||||
constexpr std::array VIEW_CLASS_8_BITS = {
|
||||
constexpr std::array VIEW_CLASS_8_BITS{
|
||||
PixelFormat::R8_UINT,
|
||||
PixelFormat::R8_UNORM,
|
||||
PixelFormat::R8_SINT,
|
||||
PixelFormat::R8_SNORM,
|
||||
};
|
||||
|
||||
constexpr std::array VIEW_CLASS_RGTC1_RED = {
|
||||
constexpr std::array VIEW_CLASS_RGTC1_RED{
|
||||
PixelFormat::BC4_UNORM,
|
||||
PixelFormat::BC4_SNORM,
|
||||
};
|
||||
|
||||
constexpr std::array VIEW_CLASS_RGTC2_RG = {
|
||||
constexpr std::array VIEW_CLASS_RGTC2_RG{
|
||||
PixelFormat::BC5_UNORM,
|
||||
PixelFormat::BC5_SNORM,
|
||||
};
|
||||
|
||||
constexpr std::array VIEW_CLASS_BPTC_UNORM = {
|
||||
constexpr std::array VIEW_CLASS_BPTC_UNORM{
|
||||
PixelFormat::BC7_UNORM,
|
||||
PixelFormat::BC7_SRGB,
|
||||
};
|
||||
|
||||
constexpr std::array VIEW_CLASS_BPTC_FLOAT = {
|
||||
constexpr std::array VIEW_CLASS_BPTC_FLOAT{
|
||||
PixelFormat::BC6H_SFLOAT,
|
||||
PixelFormat::BC6H_UFLOAT,
|
||||
};
|
||||
|
||||
constexpr std::array VIEW_CLASS_ASTC_4x4_RGBA{
|
||||
PixelFormat::ASTC_2D_4X4_UNORM,
|
||||
PixelFormat::ASTC_2D_4X4_SRGB,
|
||||
};
|
||||
|
||||
constexpr std::array VIEW_CLASS_ASTC_5x4_RGBA{
|
||||
PixelFormat::ASTC_2D_5X4_UNORM,
|
||||
PixelFormat::ASTC_2D_5X4_SRGB,
|
||||
};
|
||||
|
||||
constexpr std::array VIEW_CLASS_ASTC_5x5_RGBA{
|
||||
PixelFormat::ASTC_2D_5X5_UNORM,
|
||||
PixelFormat::ASTC_2D_5X5_SRGB,
|
||||
};
|
||||
|
||||
constexpr std::array VIEW_CLASS_ASTC_6x5_RGBA{
|
||||
PixelFormat::ASTC_2D_6X5_UNORM,
|
||||
PixelFormat::ASTC_2D_6X5_SRGB,
|
||||
};
|
||||
|
||||
constexpr std::array VIEW_CLASS_ASTC_6x6_RGBA{
|
||||
PixelFormat::ASTC_2D_6X6_UNORM,
|
||||
PixelFormat::ASTC_2D_6X6_SRGB,
|
||||
};
|
||||
|
||||
constexpr std::array VIEW_CLASS_ASTC_8x5_RGBA{
|
||||
PixelFormat::ASTC_2D_8X5_UNORM,
|
||||
PixelFormat::ASTC_2D_8X5_SRGB,
|
||||
};
|
||||
|
||||
constexpr std::array VIEW_CLASS_ASTC_8x8_RGBA{
|
||||
PixelFormat::ASTC_2D_8X8_UNORM,
|
||||
PixelFormat::ASTC_2D_8X8_SRGB,
|
||||
};
|
||||
|
||||
// Missing formats:
|
||||
// PixelFormat::ASTC_2D_10X5_UNORM
|
||||
// PixelFormat::ASTC_2D_10X5_SRGB
|
||||
|
||||
// Missing formats:
|
||||
// PixelFormat::ASTC_2D_10X6_UNORM
|
||||
// PixelFormat::ASTC_2D_10X6_SRGB
|
||||
|
||||
constexpr std::array VIEW_CLASS_ASTC_10x8_RGBA{
|
||||
PixelFormat::ASTC_2D_10X8_UNORM,
|
||||
PixelFormat::ASTC_2D_10X8_SRGB,
|
||||
};
|
||||
|
||||
constexpr std::array VIEW_CLASS_ASTC_10x10_RGBA{
|
||||
PixelFormat::ASTC_2D_10X10_UNORM,
|
||||
PixelFormat::ASTC_2D_10X10_SRGB,
|
||||
};
|
||||
|
||||
// Missing formats
|
||||
// ASTC_2D_12X10_UNORM,
|
||||
// ASTC_2D_12X10_SRGB,
|
||||
|
||||
constexpr std::array VIEW_CLASS_ASTC_12x12_RGBA{
|
||||
PixelFormat::ASTC_2D_12X12_UNORM,
|
||||
PixelFormat::ASTC_2D_12X12_SRGB,
|
||||
};
|
||||
|
||||
// Compatibility table taken from Table 4.X.1 in:
|
||||
// https://www.khronos.org/registry/OpenGL/extensions/ARB/ARB_copy_image.txt
|
||||
|
||||
constexpr std::array COPY_CLASS_128_BITS = {
|
||||
constexpr std::array COPY_CLASS_128_BITS{
|
||||
PixelFormat::R32G32B32A32_UINT, PixelFormat::R32G32B32A32_FLOAT, PixelFormat::R32G32B32A32_SINT,
|
||||
PixelFormat::BC2_UNORM, PixelFormat::BC2_SRGB, PixelFormat::BC3_UNORM,
|
||||
PixelFormat::BC3_SRGB, PixelFormat::BC5_UNORM, PixelFormat::BC5_SNORM,
|
||||
|
@ -97,7 +161,7 @@ constexpr std::array COPY_CLASS_128_BITS = {
|
|||
// PixelFormat::RGBA32I
|
||||
// COMPRESSED_RG_RGTC2
|
||||
|
||||
constexpr std::array COPY_CLASS_64_BITS = {
|
||||
constexpr std::array COPY_CLASS_64_BITS{
|
||||
PixelFormat::R16G16B16A16_FLOAT, PixelFormat::R16G16B16A16_UINT,
|
||||
PixelFormat::R16G16B16A16_UNORM, PixelFormat::R16G16B16A16_SNORM,
|
||||
PixelFormat::R16G16B16A16_SINT, PixelFormat::R32G32_UINT,
|
||||
|
@ -110,32 +174,36 @@ constexpr std::array COPY_CLASS_64_BITS = {
|
|||
// COMPRESSED_RGBA_S3TC_DXT1_EXT
|
||||
// COMPRESSED_SIGNED_RED_RGTC1
|
||||
|
||||
void Enable(FormatCompatibility::Table& compatiblity, size_t format_a, size_t format_b) {
|
||||
compatiblity[format_a][format_b] = true;
|
||||
compatiblity[format_b][format_a] = true;
|
||||
constexpr void Enable(Table& table, size_t format_a, size_t format_b) {
|
||||
table[format_a][format_b / 64] |= u64(1) << (format_b % 64);
|
||||
table[format_b][format_a / 64] |= u64(1) << (format_a % 64);
|
||||
}
|
||||
|
||||
void Enable(FormatCompatibility::Table& compatibility, PixelFormat format_a, PixelFormat format_b) {
|
||||
Enable(compatibility, static_cast<size_t>(format_a), static_cast<size_t>(format_b));
|
||||
constexpr void Enable(Table& table, PixelFormat format_a, PixelFormat format_b) {
|
||||
Enable(table, static_cast<size_t>(format_a), static_cast<size_t>(format_b));
|
||||
}
|
||||
|
||||
template <typename Range>
|
||||
void EnableRange(FormatCompatibility::Table& compatibility, const Range& range) {
|
||||
constexpr void EnableRange(Table& table, const Range& range) {
|
||||
for (auto it_a = range.begin(); it_a != range.end(); ++it_a) {
|
||||
for (auto it_b = it_a; it_b != range.end(); ++it_b) {
|
||||
Enable(compatibility, *it_a, *it_b);
|
||||
Enable(table, *it_a, *it_b);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // Anonymous namespace
|
||||
constexpr bool IsSupported(const Table& table, PixelFormat format_a, PixelFormat format_b) {
|
||||
const size_t a = static_cast<size_t>(format_a);
|
||||
const size_t b = static_cast<size_t>(format_b);
|
||||
return ((table[a][b / 64] >> (b % 64)) & 1) != 0;
|
||||
}
|
||||
|
||||
FormatCompatibility::FormatCompatibility() {
|
||||
constexpr Table MakeViewTable() {
|
||||
Table view{};
|
||||
for (size_t i = 0; i < MaxPixelFormat; ++i) {
|
||||
// Identity is allowed
|
||||
Enable(view, i, i);
|
||||
}
|
||||
|
||||
EnableRange(view, VIEW_CLASS_128_BITS);
|
||||
EnableRange(view, VIEW_CLASS_96_BITS);
|
||||
EnableRange(view, VIEW_CLASS_64_BITS);
|
||||
|
@ -146,10 +214,36 @@ FormatCompatibility::FormatCompatibility() {
|
|||
EnableRange(view, VIEW_CLASS_RGTC2_RG);
|
||||
EnableRange(view, VIEW_CLASS_BPTC_UNORM);
|
||||
EnableRange(view, VIEW_CLASS_BPTC_FLOAT);
|
||||
EnableRange(view, VIEW_CLASS_ASTC_4x4_RGBA);
|
||||
EnableRange(view, VIEW_CLASS_ASTC_5x4_RGBA);
|
||||
EnableRange(view, VIEW_CLASS_ASTC_5x5_RGBA);
|
||||
EnableRange(view, VIEW_CLASS_ASTC_6x5_RGBA);
|
||||
EnableRange(view, VIEW_CLASS_ASTC_6x6_RGBA);
|
||||
EnableRange(view, VIEW_CLASS_ASTC_8x5_RGBA);
|
||||
EnableRange(view, VIEW_CLASS_ASTC_8x8_RGBA);
|
||||
EnableRange(view, VIEW_CLASS_ASTC_10x8_RGBA);
|
||||
EnableRange(view, VIEW_CLASS_ASTC_10x10_RGBA);
|
||||
EnableRange(view, VIEW_CLASS_ASTC_12x12_RGBA);
|
||||
return view;
|
||||
}
|
||||
|
||||
copy = view;
|
||||
constexpr Table MakeCopyTable() {
|
||||
Table copy = MakeViewTable();
|
||||
EnableRange(copy, COPY_CLASS_128_BITS);
|
||||
EnableRange(copy, COPY_CLASS_64_BITS);
|
||||
return copy;
|
||||
}
|
||||
|
||||
} // Anonymous namespace
|
||||
|
||||
bool IsViewCompatible(PixelFormat format_a, PixelFormat format_b) {
|
||||
static constexpr Table TABLE = MakeViewTable();
|
||||
return IsSupported(TABLE, format_a, format_b);
|
||||
}
|
||||
|
||||
bool IsCopyCompatible(PixelFormat format_a, PixelFormat format_b) {
|
||||
static constexpr Table TABLE = MakeCopyTable();
|
||||
return IsSupported(TABLE, format_a, format_b);
|
||||
}
|
||||
|
||||
} // namespace VideoCore::Surface
|
||||
|
|
|
@ -4,31 +4,12 @@
|
|||
|
||||
#pragma once
|
||||
|
||||
#include <array>
|
||||
#include <bitset>
|
||||
#include <cstddef>
|
||||
|
||||
#include "video_core/surface.h"
|
||||
|
||||
namespace VideoCore::Surface {
|
||||
|
||||
class FormatCompatibility {
|
||||
public:
|
||||
using Table = std::array<std::bitset<MaxPixelFormat>, MaxPixelFormat>;
|
||||
bool IsViewCompatible(PixelFormat format_a, PixelFormat format_b);
|
||||
|
||||
explicit FormatCompatibility();
|
||||
|
||||
bool TestView(PixelFormat format_a, PixelFormat format_b) const noexcept {
|
||||
return view[static_cast<size_t>(format_a)][static_cast<size_t>(format_b)];
|
||||
}
|
||||
|
||||
bool TestCopy(PixelFormat format_a, PixelFormat format_b) const noexcept {
|
||||
return copy[static_cast<size_t>(format_a)][static_cast<size_t>(format_b)];
|
||||
}
|
||||
|
||||
private:
|
||||
Table view;
|
||||
Table copy;
|
||||
};
|
||||
bool IsCopyCompatible(PixelFormat format_a, PixelFormat format_b);
|
||||
|
||||
} // namespace VideoCore::Surface
|
||||
|
|
32
src/video_core/delayed_destruction_ring.h
Normal file
32
src/video_core/delayed_destruction_ring.h
Normal file
|
@ -0,0 +1,32 @@
|
|||
// Copyright 2020 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <array>
|
||||
#include <cstddef>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
namespace VideoCommon {
|
||||
|
||||
/// Container to push objects to be destroyed a few ticks in the future
|
||||
template <typename T, size_t TICKS_TO_DESTROY>
|
||||
class DelayedDestructionRing {
|
||||
public:
|
||||
void Tick() {
|
||||
index = (index + 1) % TICKS_TO_DESTROY;
|
||||
elements[index].clear();
|
||||
}
|
||||
|
||||
void Push(T&& object) {
|
||||
elements[index].push_back(std::move(object));
|
||||
}
|
||||
|
||||
private:
|
||||
size_t index = 0;
|
||||
std::array<std::vector<T>, TICKS_TO_DESTROY> elements;
|
||||
};
|
||||
|
||||
} // namespace VideoCommon
|
|
@ -16,6 +16,9 @@ namespace VideoCommon::Dirty {
|
|||
using Tegra::Engines::Maxwell3D;
|
||||
|
||||
void SetupDirtyRenderTargets(Tegra::Engines::Maxwell3D::DirtyState::Tables& tables) {
|
||||
FillBlock(tables[0], OFF(tic), NUM(tic), Descriptors);
|
||||
FillBlock(tables[0], OFF(tsc), NUM(tsc), Descriptors);
|
||||
|
||||
static constexpr std::size_t num_per_rt = NUM(rt[0]);
|
||||
static constexpr std::size_t begin = OFF(rt);
|
||||
static constexpr std::size_t num = num_per_rt * Maxwell3D::Regs::NumRenderTargets;
|
||||
|
@ -23,6 +26,10 @@ void SetupDirtyRenderTargets(Tegra::Engines::Maxwell3D::DirtyState::Tables& tabl
|
|||
FillBlock(tables[0], begin + rt * num_per_rt, num_per_rt, ColorBuffer0 + rt);
|
||||
}
|
||||
FillBlock(tables[1], begin, num, RenderTargets);
|
||||
FillBlock(tables[0], OFF(render_area), NUM(render_area), RenderTargets);
|
||||
|
||||
tables[0][OFF(rt_control)] = RenderTargets;
|
||||
tables[1][OFF(rt_control)] = RenderTargetControl;
|
||||
|
||||
static constexpr std::array zeta_flags{ZetaBuffer, RenderTargets};
|
||||
for (std::size_t i = 0; i < std::size(zeta_flags); ++i) {
|
||||
|
|
|
@ -16,7 +16,10 @@ namespace VideoCommon::Dirty {
|
|||
enum : u8 {
|
||||
NullEntry = 0,
|
||||
|
||||
Descriptors,
|
||||
|
||||
RenderTargets,
|
||||
RenderTargetControl,
|
||||
ColorBuffer0,
|
||||
ColorBuffer1,
|
||||
ColorBuffer2,
|
||||
|
|
|
@ -10,7 +10,11 @@
|
|||
|
||||
namespace Tegra::Engines {
|
||||
|
||||
Fermi2D::Fermi2D() = default;
|
||||
Fermi2D::Fermi2D() {
|
||||
// Nvidia's OpenGL driver seems to assume these values
|
||||
regs.src.depth = 1;
|
||||
regs.dst.depth = 1;
|
||||
}
|
||||
|
||||
Fermi2D::~Fermi2D() = default;
|
||||
|
||||
|
@ -21,78 +25,43 @@ void Fermi2D::BindRasterizer(VideoCore::RasterizerInterface& rasterizer_) {
|
|||
void Fermi2D::CallMethod(u32 method, u32 method_argument, bool is_last_call) {
|
||||
ASSERT_MSG(method < Regs::NUM_REGS,
|
||||
"Invalid Fermi2D register, increase the size of the Regs structure");
|
||||
|
||||
regs.reg_array[method] = method_argument;
|
||||
|
||||
switch (method) {
|
||||
// Trigger the surface copy on the last register write. This is blit_src_y, but this is 64-bit,
|
||||
// so trigger on the second 32-bit write.
|
||||
case FERMI2D_REG_INDEX(blit_src_y) + 1: {
|
||||
HandleSurfaceCopy();
|
||||
break;
|
||||
}
|
||||
if (method == FERMI2D_REG_INDEX(pixels_from_memory.src_y0) + 1) {
|
||||
Blit();
|
||||
}
|
||||
}
|
||||
|
||||
void Fermi2D::CallMultiMethod(u32 method, const u32* base_start, u32 amount, u32 methods_pending) {
|
||||
for (std::size_t i = 0; i < amount; i++) {
|
||||
CallMethod(method, base_start[i], methods_pending - static_cast<u32>(i) <= 1);
|
||||
for (u32 i = 0; i < amount; ++i) {
|
||||
CallMethod(method, base_start[i], methods_pending - i <= 1);
|
||||
}
|
||||
}
|
||||
|
||||
static std::pair<u32, u32> DelimitLine(u32 src_1, u32 src_2, u32 dst_1, u32 dst_2, u32 src_line) {
|
||||
const u32 line_a = src_2 - src_1;
|
||||
const u32 line_b = dst_2 - dst_1;
|
||||
const u32 excess = std::max<s32>(0, line_a - src_line + src_1);
|
||||
return {line_b - (excess * line_b) / line_a, excess};
|
||||
}
|
||||
void Fermi2D::Blit() {
|
||||
LOG_DEBUG(HW_GPU, "called. source address=0x{:x}, destination address=0x{:x}",
|
||||
regs.src.Address(), regs.dst.Address());
|
||||
|
||||
void Fermi2D::HandleSurfaceCopy() {
|
||||
LOG_DEBUG(HW_GPU, "Requested a surface copy with operation {}", regs.operation);
|
||||
UNIMPLEMENTED_IF_MSG(regs.operation != Operation::SrcCopy, "Operation is not copy");
|
||||
UNIMPLEMENTED_IF_MSG(regs.src.layer != 0, "Source layer is not zero");
|
||||
UNIMPLEMENTED_IF_MSG(regs.dst.layer != 0, "Destination layer is not zero");
|
||||
UNIMPLEMENTED_IF_MSG(regs.src.depth != 1, "Source depth is not one");
|
||||
UNIMPLEMENTED_IF_MSG(regs.clip_enable != 0, "Clipped blit enabled");
|
||||
|
||||
// TODO(Subv): Only raw copies are implemented.
|
||||
ASSERT(regs.operation == Operation::SrcCopy);
|
||||
|
||||
const u32 src_blit_x1{static_cast<u32>(regs.blit_src_x >> 32)};
|
||||
const u32 src_blit_y1{static_cast<u32>(regs.blit_src_y >> 32)};
|
||||
u32 src_blit_x2, src_blit_y2;
|
||||
if (regs.blit_control.origin == Origin::Corner) {
|
||||
src_blit_x2 =
|
||||
static_cast<u32>((regs.blit_src_x + (regs.blit_du_dx * regs.blit_dst_width)) >> 32);
|
||||
src_blit_y2 =
|
||||
static_cast<u32>((regs.blit_src_y + (regs.blit_dv_dy * regs.blit_dst_height)) >> 32);
|
||||
} else {
|
||||
src_blit_x2 = static_cast<u32>((regs.blit_src_x >> 32) + regs.blit_dst_width);
|
||||
src_blit_y2 = static_cast<u32>((regs.blit_src_y >> 32) + regs.blit_dst_height);
|
||||
}
|
||||
u32 dst_blit_x2 = regs.blit_dst_x + regs.blit_dst_width;
|
||||
u32 dst_blit_y2 = regs.blit_dst_y + regs.blit_dst_height;
|
||||
const auto [new_dst_w, src_excess_x] =
|
||||
DelimitLine(src_blit_x1, src_blit_x2, regs.blit_dst_x, dst_blit_x2, regs.src.width);
|
||||
const auto [new_dst_h, src_excess_y] =
|
||||
DelimitLine(src_blit_y1, src_blit_y2, regs.blit_dst_y, dst_blit_y2, regs.src.height);
|
||||
dst_blit_x2 = new_dst_w + regs.blit_dst_x;
|
||||
src_blit_x2 = src_blit_x2 - src_excess_x;
|
||||
dst_blit_y2 = new_dst_h + regs.blit_dst_y;
|
||||
src_blit_y2 = src_blit_y2 - src_excess_y;
|
||||
const auto [new_src_w, dst_excess_x] =
|
||||
DelimitLine(regs.blit_dst_x, dst_blit_x2, src_blit_x1, src_blit_x2, regs.dst.width);
|
||||
const auto [new_src_h, dst_excess_y] =
|
||||
DelimitLine(regs.blit_dst_y, dst_blit_y2, src_blit_y1, src_blit_y2, regs.dst.height);
|
||||
src_blit_x2 = new_src_w + src_blit_x1;
|
||||
dst_blit_x2 = dst_blit_x2 - dst_excess_x;
|
||||
src_blit_y2 = new_src_h + src_blit_y1;
|
||||
dst_blit_y2 = dst_blit_y2 - dst_excess_y;
|
||||
const Common::Rectangle<u32> src_rect{src_blit_x1, src_blit_y1, src_blit_x2, src_blit_y2};
|
||||
const Common::Rectangle<u32> dst_rect{regs.blit_dst_x, regs.blit_dst_y, dst_blit_x2,
|
||||
dst_blit_y2};
|
||||
const Config copy_config{
|
||||
const auto& args = regs.pixels_from_memory;
|
||||
const Config config{
|
||||
.operation = regs.operation,
|
||||
.filter = regs.blit_control.filter,
|
||||
.src_rect = src_rect,
|
||||
.dst_rect = dst_rect,
|
||||
.filter = args.sample_mode.filter,
|
||||
.dst_x0 = args.dst_x0,
|
||||
.dst_y0 = args.dst_y0,
|
||||
.dst_x1 = args.dst_x0 + args.dst_width,
|
||||
.dst_y1 = args.dst_y0 + args.dst_height,
|
||||
.src_x0 = static_cast<s32>(args.src_x0 >> 32),
|
||||
.src_y0 = static_cast<s32>(args.src_y0 >> 32),
|
||||
.src_x1 = static_cast<s32>((args.du_dx * args.dst_width + args.src_x0) >> 32),
|
||||
.src_y1 = static_cast<s32>((args.dv_dy * args.dst_height + args.src_y0) >> 32),
|
||||
};
|
||||
if (!rasterizer->AccelerateSurfaceCopy(regs.src, regs.dst, copy_config)) {
|
||||
if (!rasterizer->AccelerateSurfaceCopy(regs.src, regs.dst, config)) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -53,8 +53,8 @@ public:
|
|||
};
|
||||
|
||||
enum class Filter : u32 {
|
||||
PointSample = 0, // Nearest
|
||||
Linear = 1,
|
||||
Point = 0,
|
||||
Bilinear = 1,
|
||||
};
|
||||
|
||||
enum class Operation : u32 {
|
||||
|
@ -67,88 +67,235 @@ public:
|
|||
BlendPremult = 6,
|
||||
};
|
||||
|
||||
struct Regs {
|
||||
static constexpr std::size_t NUM_REGS = 0x258;
|
||||
enum class MemoryLayout : u32 {
|
||||
BlockLinear = 0,
|
||||
Pitch = 1,
|
||||
};
|
||||
|
||||
struct Surface {
|
||||
RenderTargetFormat format;
|
||||
BitField<0, 1, u32> linear;
|
||||
union {
|
||||
BitField<0, 4, u32> block_width;
|
||||
BitField<4, 4, u32> block_height;
|
||||
BitField<8, 4, u32> block_depth;
|
||||
};
|
||||
u32 depth;
|
||||
u32 layer;
|
||||
u32 pitch;
|
||||
u32 width;
|
||||
u32 height;
|
||||
u32 address_high;
|
||||
u32 address_low;
|
||||
|
||||
GPUVAddr Address() const {
|
||||
return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) |
|
||||
address_low);
|
||||
}
|
||||
|
||||
u32 BlockWidth() const {
|
||||
return block_width.Value();
|
||||
}
|
||||
|
||||
u32 BlockHeight() const {
|
||||
return block_height.Value();
|
||||
}
|
||||
|
||||
u32 BlockDepth() const {
|
||||
return block_depth.Value();
|
||||
}
|
||||
};
|
||||
static_assert(sizeof(Surface) == 0x28, "Surface has incorrect size");
|
||||
enum class CpuIndexWrap : u32 {
|
||||
Wrap = 0,
|
||||
NoWrap = 1,
|
||||
};
|
||||
|
||||
struct Surface {
|
||||
RenderTargetFormat format;
|
||||
MemoryLayout linear;
|
||||
union {
|
||||
BitField<0, 4, u32> block_width;
|
||||
BitField<4, 4, u32> block_height;
|
||||
BitField<8, 4, u32> block_depth;
|
||||
};
|
||||
u32 depth;
|
||||
u32 layer;
|
||||
u32 pitch;
|
||||
u32 width;
|
||||
u32 height;
|
||||
u32 addr_upper;
|
||||
u32 addr_lower;
|
||||
|
||||
[[nodiscard]] constexpr GPUVAddr Address() const noexcept {
|
||||
return (static_cast<GPUVAddr>(addr_upper) << 32) | static_cast<GPUVAddr>(addr_lower);
|
||||
}
|
||||
};
|
||||
static_assert(sizeof(Surface) == 0x28, "Surface has incorrect size");
|
||||
|
||||
enum class SectorPromotion : u32 {
|
||||
NoPromotion = 0,
|
||||
PromoteTo2V = 1,
|
||||
PromoteTo2H = 2,
|
||||
PromoteTo4 = 3,
|
||||
};
|
||||
|
||||
enum class NumTpcs : u32 {
|
||||
All = 0,
|
||||
One = 1,
|
||||
};
|
||||
|
||||
enum class RenderEnableMode : u32 {
|
||||
False = 0,
|
||||
True = 1,
|
||||
Conditional = 2,
|
||||
RenderIfEqual = 3,
|
||||
RenderIfNotEqual = 4,
|
||||
};
|
||||
|
||||
enum class ColorKeyFormat : u32 {
|
||||
A16R56G6B5 = 0,
|
||||
A1R5G55B5 = 1,
|
||||
A8R8G8B8 = 2,
|
||||
A2R10G10B10 = 3,
|
||||
Y8 = 4,
|
||||
Y16 = 5,
|
||||
Y32 = 6,
|
||||
};
|
||||
|
||||
union Beta4 {
|
||||
BitField<0, 8, u32> b;
|
||||
BitField<8, 8, u32> g;
|
||||
BitField<16, 8, u32> r;
|
||||
BitField<24, 8, u32> a;
|
||||
};
|
||||
|
||||
struct Point {
|
||||
u32 x;
|
||||
u32 y;
|
||||
};
|
||||
|
||||
enum class PatternSelect : u32 {
|
||||
MonoChrome8x8 = 0,
|
||||
MonoChrome64x1 = 1,
|
||||
MonoChrome1x64 = 2,
|
||||
Color = 3,
|
||||
};
|
||||
|
||||
enum class NotifyType : u32 {
|
||||
WriteOnly = 0,
|
||||
WriteThenAwaken = 1,
|
||||
};
|
||||
|
||||
enum class MonochromePatternColorFormat : u32 {
|
||||
A8X8R8G6B5 = 0,
|
||||
A1R5G5B5 = 1,
|
||||
A8R8G8B8 = 2,
|
||||
A8Y8 = 3,
|
||||
A8X8Y16 = 4,
|
||||
Y32 = 5,
|
||||
};
|
||||
|
||||
enum class MonochromePatternFormat : u32 {
|
||||
CGA6_M1 = 0,
|
||||
LE_M1 = 1,
|
||||
};
|
||||
|
||||
union Regs {
|
||||
static constexpr std::size_t NUM_REGS = 0x258;
|
||||
struct {
|
||||
u32 object;
|
||||
INSERT_UNION_PADDING_WORDS(0x3F);
|
||||
u32 no_operation;
|
||||
NotifyType notify;
|
||||
INSERT_UNION_PADDING_WORDS(0x2);
|
||||
u32 wait_for_idle;
|
||||
INSERT_UNION_PADDING_WORDS(0xB);
|
||||
u32 pm_trigger;
|
||||
INSERT_UNION_PADDING_WORDS(0xF);
|
||||
u32 context_dma_notify;
|
||||
u32 dst_context_dma;
|
||||
u32 src_context_dma;
|
||||
u32 semaphore_context_dma;
|
||||
INSERT_UNION_PADDING_WORDS(0x1C);
|
||||
Surface dst;
|
||||
CpuIndexWrap pixels_from_cpu_index_wrap;
|
||||
u32 kind2d_check_enable;
|
||||
Surface src;
|
||||
SectorPromotion pixels_from_memory_sector_promotion;
|
||||
INSERT_UNION_PADDING_WORDS(0x1);
|
||||
NumTpcs num_tpcs;
|
||||
u32 render_enable_addr_upper;
|
||||
u32 render_enable_addr_lower;
|
||||
RenderEnableMode render_enable_mode;
|
||||
INSERT_UNION_PADDING_WORDS(0x4);
|
||||
u32 clip_x0;
|
||||
u32 clip_y0;
|
||||
u32 clip_width;
|
||||
u32 clip_height;
|
||||
BitField<0, 1, u32> clip_enable;
|
||||
BitField<0, 3, ColorKeyFormat> color_key_format;
|
||||
u32 color_key;
|
||||
BitField<0, 1, u32> color_key_enable;
|
||||
BitField<0, 8, u32> rop;
|
||||
u32 beta1;
|
||||
Beta4 beta4;
|
||||
Operation operation;
|
||||
union {
|
||||
BitField<0, 6, u32> x;
|
||||
BitField<8, 6, u32> y;
|
||||
} pattern_offset;
|
||||
BitField<0, 2, PatternSelect> pattern_select;
|
||||
INSERT_UNION_PADDING_WORDS(0xC);
|
||||
struct {
|
||||
INSERT_UNION_PADDING_WORDS(0x80);
|
||||
|
||||
Surface dst;
|
||||
|
||||
INSERT_UNION_PADDING_WORDS(2);
|
||||
|
||||
Surface src;
|
||||
|
||||
INSERT_UNION_PADDING_WORDS(0x15);
|
||||
|
||||
Operation operation;
|
||||
|
||||
INSERT_UNION_PADDING_WORDS(0x177);
|
||||
|
||||
BitField<0, 3, MonochromePatternColorFormat> color_format;
|
||||
BitField<0, 1, MonochromePatternFormat> format;
|
||||
u32 color0;
|
||||
u32 color1;
|
||||
u32 pattern0;
|
||||
u32 pattern1;
|
||||
} monochrome_pattern;
|
||||
struct {
|
||||
std::array<u32, 0x40> X8R8G8B8;
|
||||
std::array<u32, 0x20> R5G6B5;
|
||||
std::array<u32, 0x20> X1R5G5B5;
|
||||
std::array<u32, 0x10> Y8;
|
||||
} color_pattern;
|
||||
INSERT_UNION_PADDING_WORDS(0x10);
|
||||
struct {
|
||||
u32 prim_mode;
|
||||
u32 prim_color_format;
|
||||
u32 prim_color;
|
||||
u32 line_tie_break_bits;
|
||||
INSERT_UNION_PADDING_WORDS(0x14);
|
||||
u32 prim_point_xy;
|
||||
INSERT_UNION_PADDING_WORDS(0x7);
|
||||
std::array<Point, 0x40> prim_point;
|
||||
} render_solid;
|
||||
struct {
|
||||
u32 data_type;
|
||||
u32 color_format;
|
||||
u32 index_format;
|
||||
u32 mono_format;
|
||||
u32 wrap;
|
||||
u32 color0;
|
||||
u32 color1;
|
||||
u32 mono_opacity;
|
||||
INSERT_UNION_PADDING_WORDS(0x6);
|
||||
u32 src_width;
|
||||
u32 src_height;
|
||||
u32 dx_du_frac;
|
||||
u32 dx_du_int;
|
||||
u32 dx_dv_frac;
|
||||
u32 dy_dv_int;
|
||||
u32 dst_x0_frac;
|
||||
u32 dst_x0_int;
|
||||
u32 dst_y0_frac;
|
||||
u32 dst_y0_int;
|
||||
u32 data;
|
||||
} pixels_from_cpu;
|
||||
INSERT_UNION_PADDING_WORDS(0x3);
|
||||
u32 big_endian_control;
|
||||
INSERT_UNION_PADDING_WORDS(0x3);
|
||||
struct {
|
||||
BitField<0, 3, u32> block_shape;
|
||||
BitField<0, 5, u32> corral_size;
|
||||
BitField<0, 1, u32> safe_overlap;
|
||||
union {
|
||||
u32 raw;
|
||||
BitField<0, 1, Origin> origin;
|
||||
BitField<4, 1, Filter> filter;
|
||||
} blit_control;
|
||||
|
||||
} sample_mode;
|
||||
INSERT_UNION_PADDING_WORDS(0x8);
|
||||
|
||||
u32 blit_dst_x;
|
||||
u32 blit_dst_y;
|
||||
u32 blit_dst_width;
|
||||
u32 blit_dst_height;
|
||||
u64 blit_du_dx;
|
||||
u64 blit_dv_dy;
|
||||
u64 blit_src_x;
|
||||
u64 blit_src_y;
|
||||
|
||||
INSERT_UNION_PADDING_WORDS(0x21);
|
||||
};
|
||||
std::array<u32, NUM_REGS> reg_array;
|
||||
s32 dst_x0;
|
||||
s32 dst_y0;
|
||||
s32 dst_width;
|
||||
s32 dst_height;
|
||||
s64 du_dx;
|
||||
s64 dv_dy;
|
||||
s64 src_x0;
|
||||
s64 src_y0;
|
||||
} pixels_from_memory;
|
||||
};
|
||||
std::array<u32, NUM_REGS> reg_array;
|
||||
} regs{};
|
||||
|
||||
struct Config {
|
||||
Operation operation{};
|
||||
Filter filter{};
|
||||
Common::Rectangle<u32> src_rect;
|
||||
Common::Rectangle<u32> dst_rect;
|
||||
Operation operation;
|
||||
Filter filter;
|
||||
s32 dst_x0;
|
||||
s32 dst_y0;
|
||||
s32 dst_x1;
|
||||
s32 dst_y1;
|
||||
s32 src_x0;
|
||||
s32 src_y0;
|
||||
s32 src_x1;
|
||||
s32 src_y1;
|
||||
};
|
||||
|
||||
private:
|
||||
|
@ -156,25 +303,49 @@ private:
|
|||
|
||||
/// Performs the copy from the source surface to the destination surface as configured in the
|
||||
/// registers.
|
||||
void HandleSurfaceCopy();
|
||||
void Blit();
|
||||
};
|
||||
|
||||
#define ASSERT_REG_POSITION(field_name, position) \
|
||||
static_assert(offsetof(Fermi2D::Regs, field_name) == position * 4, \
|
||||
static_assert(offsetof(Fermi2D::Regs, field_name) == position, \
|
||||
"Field " #field_name " has invalid position")
|
||||
|
||||
ASSERT_REG_POSITION(dst, 0x80);
|
||||
ASSERT_REG_POSITION(src, 0x8C);
|
||||
ASSERT_REG_POSITION(operation, 0xAB);
|
||||
ASSERT_REG_POSITION(blit_control, 0x223);
|
||||
ASSERT_REG_POSITION(blit_dst_x, 0x22c);
|
||||
ASSERT_REG_POSITION(blit_dst_y, 0x22d);
|
||||
ASSERT_REG_POSITION(blit_dst_width, 0x22e);
|
||||
ASSERT_REG_POSITION(blit_dst_height, 0x22f);
|
||||
ASSERT_REG_POSITION(blit_du_dx, 0x230);
|
||||
ASSERT_REG_POSITION(blit_dv_dy, 0x232);
|
||||
ASSERT_REG_POSITION(blit_src_x, 0x234);
|
||||
ASSERT_REG_POSITION(blit_src_y, 0x236);
|
||||
ASSERT_REG_POSITION(object, 0x0);
|
||||
ASSERT_REG_POSITION(no_operation, 0x100);
|
||||
ASSERT_REG_POSITION(notify, 0x104);
|
||||
ASSERT_REG_POSITION(wait_for_idle, 0x110);
|
||||
ASSERT_REG_POSITION(pm_trigger, 0x140);
|
||||
ASSERT_REG_POSITION(context_dma_notify, 0x180);
|
||||
ASSERT_REG_POSITION(dst_context_dma, 0x184);
|
||||
ASSERT_REG_POSITION(src_context_dma, 0x188);
|
||||
ASSERT_REG_POSITION(semaphore_context_dma, 0x18C);
|
||||
ASSERT_REG_POSITION(dst, 0x200);
|
||||
ASSERT_REG_POSITION(pixels_from_cpu_index_wrap, 0x228);
|
||||
ASSERT_REG_POSITION(kind2d_check_enable, 0x22C);
|
||||
ASSERT_REG_POSITION(src, 0x230);
|
||||
ASSERT_REG_POSITION(pixels_from_memory_sector_promotion, 0x258);
|
||||
ASSERT_REG_POSITION(num_tpcs, 0x260);
|
||||
ASSERT_REG_POSITION(render_enable_addr_upper, 0x264);
|
||||
ASSERT_REG_POSITION(render_enable_addr_lower, 0x268);
|
||||
ASSERT_REG_POSITION(clip_x0, 0x280);
|
||||
ASSERT_REG_POSITION(clip_y0, 0x284);
|
||||
ASSERT_REG_POSITION(clip_width, 0x288);
|
||||
ASSERT_REG_POSITION(clip_height, 0x28c);
|
||||
ASSERT_REG_POSITION(clip_enable, 0x290);
|
||||
ASSERT_REG_POSITION(color_key_format, 0x294);
|
||||
ASSERT_REG_POSITION(color_key, 0x298);
|
||||
ASSERT_REG_POSITION(rop, 0x2A0);
|
||||
ASSERT_REG_POSITION(beta1, 0x2A4);
|
||||
ASSERT_REG_POSITION(beta4, 0x2A8);
|
||||
ASSERT_REG_POSITION(operation, 0x2AC);
|
||||
ASSERT_REG_POSITION(pattern_offset, 0x2B0);
|
||||
ASSERT_REG_POSITION(pattern_select, 0x2B4);
|
||||
ASSERT_REG_POSITION(monochrome_pattern, 0x2E8);
|
||||
ASSERT_REG_POSITION(color_pattern, 0x300);
|
||||
ASSERT_REG_POSITION(render_solid, 0x580);
|
||||
ASSERT_REG_POSITION(pixels_from_cpu, 0x800);
|
||||
ASSERT_REG_POSITION(big_endian_control, 0x870);
|
||||
ASSERT_REG_POSITION(pixels_from_memory, 0x880);
|
||||
|
||||
#undef ASSERT_REG_POSITION
|
||||
|
||||
|
|
|
@ -58,24 +58,6 @@ void KeplerCompute::CallMultiMethod(u32 method, const u32* base_start, u32 amoun
|
|||
}
|
||||
}
|
||||
|
||||
Texture::FullTextureInfo KeplerCompute::GetTexture(std::size_t offset) const {
|
||||
const std::bitset<8> cbuf_mask = launch_description.const_buffer_enable_mask.Value();
|
||||
ASSERT(cbuf_mask[regs.tex_cb_index]);
|
||||
|
||||
const auto& texinfo = launch_description.const_buffer_config[regs.tex_cb_index];
|
||||
ASSERT(texinfo.Address() != 0);
|
||||
|
||||
const GPUVAddr address = texinfo.Address() + offset * sizeof(Texture::TextureHandle);
|
||||
ASSERT(address < texinfo.Address() + texinfo.size);
|
||||
|
||||
const Texture::TextureHandle tex_handle{memory_manager.Read<u32>(address)};
|
||||
return GetTextureInfo(tex_handle);
|
||||
}
|
||||
|
||||
Texture::FullTextureInfo KeplerCompute::GetTextureInfo(Texture::TextureHandle tex_handle) const {
|
||||
return Texture::FullTextureInfo{GetTICEntry(tex_handle.tic_id), GetTSCEntry(tex_handle.tsc_id)};
|
||||
}
|
||||
|
||||
u32 KeplerCompute::AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offset) const {
|
||||
ASSERT(stage == ShaderType::Compute);
|
||||
const auto& buffer = launch_description.const_buffer_config[const_buffer];
|
||||
|
@ -98,9 +80,11 @@ SamplerDescriptor KeplerCompute::AccessBindlessSampler(ShaderType stage, u64 con
|
|||
|
||||
SamplerDescriptor KeplerCompute::AccessSampler(u32 handle) const {
|
||||
const Texture::TextureHandle tex_handle{handle};
|
||||
const Texture::FullTextureInfo tex_info = GetTextureInfo(tex_handle);
|
||||
SamplerDescriptor result = SamplerDescriptor::FromTIC(tex_info.tic);
|
||||
result.is_shadow.Assign(tex_info.tsc.depth_compare_enabled.Value());
|
||||
const Texture::TICEntry tic = GetTICEntry(tex_handle.tic_id);
|
||||
const Texture::TSCEntry tsc = GetTSCEntry(tex_handle.tsc_id);
|
||||
|
||||
SamplerDescriptor result = SamplerDescriptor::FromTIC(tic);
|
||||
result.is_shadow.Assign(tsc.depth_compare_enabled.Value());
|
||||
return result;
|
||||
}
|
||||
|
||||
|
|
|
@ -209,11 +209,6 @@ public:
|
|||
void CallMultiMethod(u32 method, const u32* base_start, u32 amount,
|
||||
u32 methods_pending) override;
|
||||
|
||||
Texture::FullTextureInfo GetTexture(std::size_t offset) const;
|
||||
|
||||
/// Given a texture handle, returns the TSC and TIC entries.
|
||||
Texture::FullTextureInfo GetTextureInfo(Texture::TextureHandle tex_handle) const;
|
||||
|
||||
u32 AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offset) const override;
|
||||
|
||||
SamplerDescriptor AccessBoundSampler(ShaderType stage, u64 offset) const override;
|
||||
|
|
|
@ -2,7 +2,6 @@
|
|||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include <cinttypes>
|
||||
#include <cstring>
|
||||
#include <optional>
|
||||
#include "common/assert.h"
|
||||
|
@ -227,6 +226,10 @@ void Maxwell3D::ProcessMethodCall(u32 method, u32 argument, u32 nonshadow_argume
|
|||
OnMemoryWrite();
|
||||
}
|
||||
return;
|
||||
case MAXWELL3D_REG_INDEX(fragment_barrier):
|
||||
return rasterizer->FragmentBarrier();
|
||||
case MAXWELL3D_REG_INDEX(tiled_cache_barrier):
|
||||
return rasterizer->TiledCacheBarrier();
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -639,7 +642,7 @@ void Maxwell3D::FinishCBData() {
|
|||
}
|
||||
|
||||
Texture::TICEntry Maxwell3D::GetTICEntry(u32 tic_index) const {
|
||||
const GPUVAddr tic_address_gpu{regs.tic.TICAddress() + tic_index * sizeof(Texture::TICEntry)};
|
||||
const GPUVAddr tic_address_gpu{regs.tic.Address() + tic_index * sizeof(Texture::TICEntry)};
|
||||
|
||||
Texture::TICEntry tic_entry;
|
||||
memory_manager.ReadBlockUnsafe(tic_address_gpu, &tic_entry, sizeof(Texture::TICEntry));
|
||||
|
@ -648,43 +651,19 @@ Texture::TICEntry Maxwell3D::GetTICEntry(u32 tic_index) const {
|
|||
}
|
||||
|
||||
Texture::TSCEntry Maxwell3D::GetTSCEntry(u32 tsc_index) const {
|
||||
const GPUVAddr tsc_address_gpu{regs.tsc.TSCAddress() + tsc_index * sizeof(Texture::TSCEntry)};
|
||||
const GPUVAddr tsc_address_gpu{regs.tsc.Address() + tsc_index * sizeof(Texture::TSCEntry)};
|
||||
|
||||
Texture::TSCEntry tsc_entry;
|
||||
memory_manager.ReadBlockUnsafe(tsc_address_gpu, &tsc_entry, sizeof(Texture::TSCEntry));
|
||||
return tsc_entry;
|
||||
}
|
||||
|
||||
Texture::FullTextureInfo Maxwell3D::GetTextureInfo(Texture::TextureHandle tex_handle) const {
|
||||
return Texture::FullTextureInfo{GetTICEntry(tex_handle.tic_id), GetTSCEntry(tex_handle.tsc_id)};
|
||||
}
|
||||
|
||||
Texture::FullTextureInfo Maxwell3D::GetStageTexture(ShaderType stage, std::size_t offset) const {
|
||||
const auto stage_index = static_cast<std::size_t>(stage);
|
||||
const auto& shader = state.shader_stages[stage_index];
|
||||
const auto& tex_info_buffer = shader.const_buffers[regs.tex_cb_index];
|
||||
ASSERT(tex_info_buffer.enabled && tex_info_buffer.address != 0);
|
||||
|
||||
const GPUVAddr tex_info_address =
|
||||
tex_info_buffer.address + offset * sizeof(Texture::TextureHandle);
|
||||
|
||||
ASSERT(tex_info_address < tex_info_buffer.address + tex_info_buffer.size);
|
||||
|
||||
const Texture::TextureHandle tex_handle{memory_manager.Read<u32>(tex_info_address)};
|
||||
|
||||
return GetTextureInfo(tex_handle);
|
||||
}
|
||||
|
||||
u32 Maxwell3D::GetRegisterValue(u32 method) const {
|
||||
ASSERT_MSG(method < Regs::NUM_REGS, "Invalid Maxwell3D register");
|
||||
return regs.reg_array[method];
|
||||
}
|
||||
|
||||
void Maxwell3D::ProcessClearBuffers() {
|
||||
ASSERT(regs.clear_buffers.R == regs.clear_buffers.G &&
|
||||
regs.clear_buffers.R == regs.clear_buffers.B &&
|
||||
regs.clear_buffers.R == regs.clear_buffers.A);
|
||||
|
||||
rasterizer->Clear();
|
||||
}
|
||||
|
||||
|
@ -692,9 +671,7 @@ u32 Maxwell3D::AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offse
|
|||
ASSERT(stage != ShaderType::Compute);
|
||||
const auto& shader_stage = state.shader_stages[static_cast<std::size_t>(stage)];
|
||||
const auto& buffer = shader_stage.const_buffers[const_buffer];
|
||||
u32 result;
|
||||
std::memcpy(&result, memory_manager.GetPointer(buffer.address + offset), sizeof(u32));
|
||||
return result;
|
||||
return memory_manager.Read<u32>(buffer.address + offset);
|
||||
}
|
||||
|
||||
SamplerDescriptor Maxwell3D::AccessBoundSampler(ShaderType stage, u64 offset) const {
|
||||
|
@ -712,9 +689,11 @@ SamplerDescriptor Maxwell3D::AccessBindlessSampler(ShaderType stage, u64 const_b
|
|||
|
||||
SamplerDescriptor Maxwell3D::AccessSampler(u32 handle) const {
|
||||
const Texture::TextureHandle tex_handle{handle};
|
||||
const Texture::FullTextureInfo tex_info = GetTextureInfo(tex_handle);
|
||||
SamplerDescriptor result = SamplerDescriptor::FromTIC(tex_info.tic);
|
||||
result.is_shadow.Assign(tex_info.tsc.depth_compare_enabled.Value());
|
||||
const Texture::TICEntry tic = GetTICEntry(tex_handle.tic_id);
|
||||
const Texture::TSCEntry tsc = GetTSCEntry(tex_handle.tsc_id);
|
||||
|
||||
SamplerDescriptor result = SamplerDescriptor::FromTIC(tic);
|
||||
result.is_shadow.Assign(tsc.depth_compare_enabled.Value());
|
||||
return result;
|
||||
}
|
||||
|
||||
|
|
|
@ -438,16 +438,6 @@ public:
|
|||
DecrWrapOGL = 0x8508,
|
||||
};
|
||||
|
||||
enum class MemoryLayout : u32 {
|
||||
Linear = 0,
|
||||
BlockLinear = 1,
|
||||
};
|
||||
|
||||
enum class InvMemoryLayout : u32 {
|
||||
BlockLinear = 0,
|
||||
Linear = 1,
|
||||
};
|
||||
|
||||
enum class CounterReset : u32 {
|
||||
SampleCnt = 0x01,
|
||||
Unk02 = 0x02,
|
||||
|
@ -589,21 +579,31 @@ public:
|
|||
NegativeW = 7,
|
||||
};
|
||||
|
||||
enum class SamplerIndex : u32 {
|
||||
Independently = 0,
|
||||
ViaHeaderIndex = 1,
|
||||
};
|
||||
|
||||
struct TileMode {
|
||||
union {
|
||||
BitField<0, 4, u32> block_width;
|
||||
BitField<4, 4, u32> block_height;
|
||||
BitField<8, 4, u32> block_depth;
|
||||
BitField<12, 1, u32> is_pitch_linear;
|
||||
BitField<16, 1, u32> is_3d;
|
||||
};
|
||||
};
|
||||
static_assert(sizeof(TileMode) == 4);
|
||||
|
||||
struct RenderTargetConfig {
|
||||
u32 address_high;
|
||||
u32 address_low;
|
||||
u32 width;
|
||||
u32 height;
|
||||
Tegra::RenderTargetFormat format;
|
||||
TileMode tile_mode;
|
||||
union {
|
||||
BitField<0, 3, u32> block_width;
|
||||
BitField<4, 3, u32> block_height;
|
||||
BitField<8, 3, u32> block_depth;
|
||||
BitField<12, 1, InvMemoryLayout> type;
|
||||
BitField<16, 1, u32> is_3d;
|
||||
} memory_layout;
|
||||
union {
|
||||
BitField<0, 16, u32> layers;
|
||||
BitField<0, 16, u32> depth;
|
||||
BitField<16, 1, u32> volume;
|
||||
};
|
||||
u32 layer_stride;
|
||||
|
@ -832,7 +832,11 @@ public:
|
|||
|
||||
u32 patch_vertices;
|
||||
|
||||
INSERT_UNION_PADDING_WORDS(0xC);
|
||||
INSERT_UNION_PADDING_WORDS(0x4);
|
||||
|
||||
u32 fragment_barrier;
|
||||
|
||||
INSERT_UNION_PADDING_WORDS(0x7);
|
||||
|
||||
std::array<ScissorTest, NumViewports> scissor_test;
|
||||
|
||||
|
@ -842,7 +846,15 @@ public:
|
|||
u32 stencil_back_mask;
|
||||
u32 stencil_back_func_mask;
|
||||
|
||||
INSERT_UNION_PADDING_WORDS(0xC);
|
||||
INSERT_UNION_PADDING_WORDS(0x5);
|
||||
|
||||
u32 invalidate_texture_data_cache;
|
||||
|
||||
INSERT_UNION_PADDING_WORDS(0x1);
|
||||
|
||||
u32 tiled_cache_barrier;
|
||||
|
||||
INSERT_UNION_PADDING_WORDS(0x4);
|
||||
|
||||
u32 color_mask_common;
|
||||
|
||||
|
@ -866,12 +878,7 @@ public:
|
|||
u32 address_high;
|
||||
u32 address_low;
|
||||
Tegra::DepthFormat format;
|
||||
union {
|
||||
BitField<0, 4, u32> block_width;
|
||||
BitField<4, 4, u32> block_height;
|
||||
BitField<8, 4, u32> block_depth;
|
||||
BitField<20, 1, InvMemoryLayout> type;
|
||||
} memory_layout;
|
||||
TileMode tile_mode;
|
||||
u32 layer_stride;
|
||||
|
||||
GPUVAddr Address() const {
|
||||
|
@ -880,7 +887,18 @@ public:
|
|||
}
|
||||
} zeta;
|
||||
|
||||
INSERT_UNION_PADDING_WORDS(0x41);
|
||||
struct {
|
||||
union {
|
||||
BitField<0, 16, u32> x;
|
||||
BitField<16, 16, u32> width;
|
||||
};
|
||||
union {
|
||||
BitField<0, 16, u32> y;
|
||||
BitField<16, 16, u32> height;
|
||||
};
|
||||
} render_area;
|
||||
|
||||
INSERT_UNION_PADDING_WORDS(0x3F);
|
||||
|
||||
union {
|
||||
BitField<0, 4, u32> stencil;
|
||||
|
@ -921,7 +939,7 @@ public:
|
|||
BitField<25, 3, u32> map_7;
|
||||
};
|
||||
|
||||
u32 GetMap(std::size_t index) const {
|
||||
u32 Map(std::size_t index) const {
|
||||
const std::array<u32, NumRenderTargets> maps{map_0, map_1, map_2, map_3,
|
||||
map_4, map_5, map_6, map_7};
|
||||
ASSERT(index < maps.size());
|
||||
|
@ -934,11 +952,13 @@ public:
|
|||
u32 zeta_width;
|
||||
u32 zeta_height;
|
||||
union {
|
||||
BitField<0, 16, u32> zeta_layers;
|
||||
BitField<0, 16, u32> zeta_depth;
|
||||
BitField<16, 1, u32> zeta_volume;
|
||||
};
|
||||
|
||||
INSERT_UNION_PADDING_WORDS(0x26);
|
||||
SamplerIndex sampler_index;
|
||||
|
||||
INSERT_UNION_PADDING_WORDS(0x25);
|
||||
|
||||
u32 depth_test_enable;
|
||||
|
||||
|
@ -964,6 +984,7 @@ public:
|
|||
float b;
|
||||
float a;
|
||||
} blend_color;
|
||||
|
||||
INSERT_UNION_PADDING_WORDS(0x4);
|
||||
|
||||
struct {
|
||||
|
@ -1001,7 +1022,12 @@ public:
|
|||
float line_width_smooth;
|
||||
float line_width_aliased;
|
||||
|
||||
INSERT_UNION_PADDING_WORDS(0x1F);
|
||||
INSERT_UNION_PADDING_WORDS(0x1B);
|
||||
|
||||
u32 invalidate_sampler_cache_no_wfi;
|
||||
u32 invalidate_texture_header_cache_no_wfi;
|
||||
|
||||
INSERT_UNION_PADDING_WORDS(0x2);
|
||||
|
||||
u32 vb_element_base;
|
||||
u32 vb_base_instance;
|
||||
|
@ -1045,13 +1071,13 @@ public:
|
|||
} condition;
|
||||
|
||||
struct {
|
||||
u32 tsc_address_high;
|
||||
u32 tsc_address_low;
|
||||
u32 tsc_limit;
|
||||
u32 address_high;
|
||||
u32 address_low;
|
||||
u32 limit;
|
||||
|
||||
GPUVAddr TSCAddress() const {
|
||||
return static_cast<GPUVAddr>(
|
||||
(static_cast<GPUVAddr>(tsc_address_high) << 32) | tsc_address_low);
|
||||
GPUVAddr Address() const {
|
||||
return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) |
|
||||
address_low);
|
||||
}
|
||||
} tsc;
|
||||
|
||||
|
@ -1062,13 +1088,13 @@ public:
|
|||
u32 line_smooth_enable;
|
||||
|
||||
struct {
|
||||
u32 tic_address_high;
|
||||
u32 tic_address_low;
|
||||
u32 tic_limit;
|
||||
u32 address_high;
|
||||
u32 address_low;
|
||||
u32 limit;
|
||||
|
||||
GPUVAddr TICAddress() const {
|
||||
return static_cast<GPUVAddr>(
|
||||
(static_cast<GPUVAddr>(tic_address_high) << 32) | tic_address_low);
|
||||
GPUVAddr Address() const {
|
||||
return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) |
|
||||
address_low);
|
||||
}
|
||||
} tic;
|
||||
|
||||
|
@ -1397,12 +1423,6 @@ public:
|
|||
|
||||
void FlushMMEInlineDraw();
|
||||
|
||||
/// Given a texture handle, returns the TSC and TIC entries.
|
||||
Texture::FullTextureInfo GetTextureInfo(Texture::TextureHandle tex_handle) const;
|
||||
|
||||
/// Returns the texture information for a specific texture in a specific shader stage.
|
||||
Texture::FullTextureInfo GetStageTexture(ShaderType stage, std::size_t offset) const;
|
||||
|
||||
u32 AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offset) const override;
|
||||
|
||||
SamplerDescriptor AccessBoundSampler(ShaderType stage, u64 offset) const override;
|
||||
|
@ -1598,10 +1618,13 @@ ASSERT_REG_POSITION(polygon_offset_point_enable, 0x370);
|
|||
ASSERT_REG_POSITION(polygon_offset_line_enable, 0x371);
|
||||
ASSERT_REG_POSITION(polygon_offset_fill_enable, 0x372);
|
||||
ASSERT_REG_POSITION(patch_vertices, 0x373);
|
||||
ASSERT_REG_POSITION(fragment_barrier, 0x378);
|
||||
ASSERT_REG_POSITION(scissor_test, 0x380);
|
||||
ASSERT_REG_POSITION(stencil_back_func_ref, 0x3D5);
|
||||
ASSERT_REG_POSITION(stencil_back_mask, 0x3D6);
|
||||
ASSERT_REG_POSITION(stencil_back_func_mask, 0x3D7);
|
||||
ASSERT_REG_POSITION(invalidate_texture_data_cache, 0x3DD);
|
||||
ASSERT_REG_POSITION(tiled_cache_barrier, 0x3DF);
|
||||
ASSERT_REG_POSITION(color_mask_common, 0x3E4);
|
||||
ASSERT_REG_POSITION(depth_bounds, 0x3E7);
|
||||
ASSERT_REG_POSITION(rt_separate_frag_data, 0x3EB);
|
||||
|
@ -1609,6 +1632,7 @@ ASSERT_REG_POSITION(multisample_raster_enable, 0x3ED);
|
|||
ASSERT_REG_POSITION(multisample_raster_samples, 0x3EE);
|
||||
ASSERT_REG_POSITION(multisample_sample_mask, 0x3EF);
|
||||
ASSERT_REG_POSITION(zeta, 0x3F8);
|
||||
ASSERT_REG_POSITION(render_area, 0x3FD);
|
||||
ASSERT_REG_POSITION(clear_flags, 0x43E);
|
||||
ASSERT_REG_POSITION(fill_rectangle, 0x44F);
|
||||
ASSERT_REG_POSITION(vertex_attrib_format, 0x458);
|
||||
|
@ -1617,7 +1641,8 @@ ASSERT_REG_POSITION(multisample_coverage_to_color, 0x47E);
|
|||
ASSERT_REG_POSITION(rt_control, 0x487);
|
||||
ASSERT_REG_POSITION(zeta_width, 0x48a);
|
||||
ASSERT_REG_POSITION(zeta_height, 0x48b);
|
||||
ASSERT_REG_POSITION(zeta_layers, 0x48c);
|
||||
ASSERT_REG_POSITION(zeta_depth, 0x48c);
|
||||
ASSERT_REG_POSITION(sampler_index, 0x48D);
|
||||
ASSERT_REG_POSITION(depth_test_enable, 0x4B3);
|
||||
ASSERT_REG_POSITION(independent_blend_enable, 0x4B9);
|
||||
ASSERT_REG_POSITION(depth_write_enabled, 0x4BA);
|
||||
|
@ -1641,6 +1666,8 @@ ASSERT_REG_POSITION(frag_color_clamp, 0x4EA);
|
|||
ASSERT_REG_POSITION(screen_y_control, 0x4EB);
|
||||
ASSERT_REG_POSITION(line_width_smooth, 0x4EC);
|
||||
ASSERT_REG_POSITION(line_width_aliased, 0x4ED);
|
||||
ASSERT_REG_POSITION(invalidate_sampler_cache_no_wfi, 0x509);
|
||||
ASSERT_REG_POSITION(invalidate_texture_header_cache_no_wfi, 0x50A);
|
||||
ASSERT_REG_POSITION(vb_element_base, 0x50D);
|
||||
ASSERT_REG_POSITION(vb_base_instance, 0x50E);
|
||||
ASSERT_REG_POSITION(clip_distance_enabled, 0x544);
|
||||
|
|
|
@ -96,6 +96,7 @@ void MaxwellDMA::CopyPitchToPitch() {
|
|||
}
|
||||
|
||||
void MaxwellDMA::CopyBlockLinearToPitch() {
|
||||
UNIMPLEMENTED_IF(regs.src_params.block_size.width != 0);
|
||||
UNIMPLEMENTED_IF(regs.src_params.block_size.depth != 0);
|
||||
UNIMPLEMENTED_IF(regs.src_params.layer != 0);
|
||||
|
||||
|
@ -135,6 +136,8 @@ void MaxwellDMA::CopyBlockLinearToPitch() {
|
|||
}
|
||||
|
||||
void MaxwellDMA::CopyPitchToBlockLinear() {
|
||||
UNIMPLEMENTED_IF_MSG(regs.dst_params.block_size.width != 0, "Block width is not one");
|
||||
|
||||
const auto& dst_params = regs.dst_params;
|
||||
const u32 bytes_per_pixel = regs.pitch_in / regs.line_length_in;
|
||||
const u32 width = dst_params.width;
|
||||
|
|
|
@ -9,6 +9,7 @@
|
|||
|
||||
#include "common/common_types.h"
|
||||
#include "core/core.h"
|
||||
#include "video_core/delayed_destruction_ring.h"
|
||||
#include "video_core/gpu.h"
|
||||
#include "video_core/memory_manager.h"
|
||||
#include "video_core/rasterizer_interface.h"
|
||||
|
@ -47,6 +48,11 @@ protected:
|
|||
template <typename TFence, typename TTextureCache, typename TTBufferCache, typename TQueryCache>
|
||||
class FenceManager {
|
||||
public:
|
||||
/// Notify the fence manager about a new frame
|
||||
void TickFrame() {
|
||||
delayed_destruction_ring.Tick();
|
||||
}
|
||||
|
||||
void SignalSemaphore(GPUVAddr addr, u32 value) {
|
||||
TryReleasePendingFences();
|
||||
const bool should_flush = ShouldFlush();
|
||||
|
@ -86,7 +92,7 @@ public:
|
|||
} else {
|
||||
gpu.IncrementSyncPoint(current_fence->GetPayload());
|
||||
}
|
||||
fences.pop();
|
||||
PopFence();
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -132,7 +138,7 @@ private:
|
|||
} else {
|
||||
gpu.IncrementSyncPoint(current_fence->GetPayload());
|
||||
}
|
||||
fences.pop();
|
||||
PopFence();
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -158,7 +164,14 @@ private:
|
|||
query_cache.CommitAsyncFlushes();
|
||||
}
|
||||
|
||||
void PopFence() {
|
||||
delayed_destruction_ring.Push(std::move(fences.front()));
|
||||
fences.pop();
|
||||
}
|
||||
|
||||
std::queue<TFence> fences;
|
||||
|
||||
DelayedDestructionRing<TFence, 6> delayed_destruction_ring;
|
||||
};
|
||||
|
||||
} // namespace VideoCommon
|
||||
|
|
|
@ -1,8 +1,26 @@
|
|||
set(SHADER_SOURCES
|
||||
set(SHADER_FILES
|
||||
block_linear_unswizzle_2d.comp
|
||||
block_linear_unswizzle_3d.comp
|
||||
convert_depth_to_float.frag
|
||||
convert_float_to_depth.frag
|
||||
full_screen_triangle.vert
|
||||
opengl_copy_bc4.comp
|
||||
opengl_present.frag
|
||||
opengl_present.vert
|
||||
pitch_unswizzle.comp
|
||||
vulkan_blit_color_float.frag
|
||||
vulkan_blit_depth_stencil.frag
|
||||
vulkan_present.frag
|
||||
vulkan_present.vert
|
||||
vulkan_quad_array.comp
|
||||
vulkan_quad_indexed.comp
|
||||
vulkan_uint8.comp
|
||||
)
|
||||
|
||||
find_program(GLSLANGVALIDATOR "glslangValidator" REQUIRED)
|
||||
|
||||
set(GLSL_FLAGS "")
|
||||
|
||||
set(SHADER_INCLUDE ${CMAKE_CURRENT_BINARY_DIR}/include)
|
||||
set(SHADER_DIR ${SHADER_INCLUDE}/video_core/host_shaders)
|
||||
set(HOST_SHADERS_INCLUDE ${SHADER_INCLUDE} PARENT_SCOPE)
|
||||
|
@ -10,27 +28,44 @@ set(HOST_SHADERS_INCLUDE ${SHADER_INCLUDE} PARENT_SCOPE)
|
|||
set(INPUT_FILE ${CMAKE_CURRENT_SOURCE_DIR}/source_shader.h.in)
|
||||
set(HEADER_GENERATOR ${CMAKE_CURRENT_SOURCE_DIR}/StringShaderHeader.cmake)
|
||||
|
||||
foreach(FILENAME IN ITEMS ${SHADER_SOURCES})
|
||||
foreach(FILENAME IN ITEMS ${SHADER_FILES})
|
||||
string(REPLACE "." "_" SHADER_NAME ${FILENAME})
|
||||
set(SOURCE_FILE ${CMAKE_CURRENT_SOURCE_DIR}/${FILENAME})
|
||||
set(HEADER_FILE ${SHADER_DIR}/${SHADER_NAME}.h)
|
||||
add_custom_command(
|
||||
OUTPUT
|
||||
${HEADER_FILE}
|
||||
COMMAND
|
||||
${CMAKE_COMMAND} -P ${HEADER_GENERATOR} ${SOURCE_FILE} ${HEADER_FILE} ${INPUT_FILE}
|
||||
MAIN_DEPENDENCY
|
||||
${SOURCE_FILE}
|
||||
DEPENDS
|
||||
${INPUT_FILE}
|
||||
# HEADER_GENERATOR should be included here but msbuild seems to assume it's always modified
|
||||
)
|
||||
set(SHADER_HEADERS ${SHADER_HEADERS} ${HEADER_FILE})
|
||||
# Skip generating source headers on Vulkan exclusive files
|
||||
if (NOT ${FILENAME} MATCHES "vulkan.*")
|
||||
set(SOURCE_HEADER_FILE ${SHADER_DIR}/${SHADER_NAME}.h)
|
||||
add_custom_command(
|
||||
OUTPUT
|
||||
${SOURCE_HEADER_FILE}
|
||||
COMMAND
|
||||
${CMAKE_COMMAND} -P ${HEADER_GENERATOR} ${SOURCE_FILE} ${SOURCE_HEADER_FILE} ${INPUT_FILE}
|
||||
MAIN_DEPENDENCY
|
||||
${SOURCE_FILE}
|
||||
DEPENDS
|
||||
${INPUT_FILE}
|
||||
# HEADER_GENERATOR should be included here but msbuild seems to assume it's always modified
|
||||
)
|
||||
set(SHADER_HEADERS ${SHADER_HEADERS} ${SOURCE_HEADER_FILE})
|
||||
endif()
|
||||
# Skip compiling to SPIR-V OpenGL exclusive files
|
||||
if (NOT ${FILENAME} MATCHES "opengl.*")
|
||||
string(TOUPPER ${SHADER_NAME}_SPV SPIRV_VARIABLE_NAME)
|
||||
set(SPIRV_HEADER_FILE ${SHADER_DIR}/${SHADER_NAME}_spv.h)
|
||||
add_custom_command(
|
||||
OUTPUT
|
||||
${SPIRV_HEADER_FILE}
|
||||
COMMAND
|
||||
${GLSLANGVALIDATOR} -V ${GLSL_FLAGS} --variable-name ${SPIRV_VARIABLE_NAME} -o ${SPIRV_HEADER_FILE} ${SOURCE_FILE}
|
||||
MAIN_DEPENDENCY
|
||||
${SOURCE_FILE}
|
||||
)
|
||||
set(SHADER_HEADERS ${SHADER_HEADERS} ${SPIRV_HEADER_FILE})
|
||||
endif()
|
||||
endforeach()
|
||||
|
||||
add_custom_target(host_shaders
|
||||
DEPENDS
|
||||
${SHADER_HEADERS}
|
||||
SOURCES
|
||||
${SHADER_SOURCES}
|
||||
${SHADER_FILES}
|
||||
)
|
||||
|
|
122
src/video_core/host_shaders/block_linear_unswizzle_2d.comp
Normal file
122
src/video_core/host_shaders/block_linear_unswizzle_2d.comp
Normal file
|
@ -0,0 +1,122 @@
|
|||
// Copyright 2020 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#version 430
|
||||
|
||||
#ifdef VULKAN
|
||||
|
||||
#extension GL_EXT_shader_16bit_storage : require
|
||||
#extension GL_EXT_shader_8bit_storage : require
|
||||
#define HAS_EXTENDED_TYPES 1
|
||||
#define BEGIN_PUSH_CONSTANTS layout(push_constant) uniform PushConstants {
|
||||
#define END_PUSH_CONSTANTS };
|
||||
#define UNIFORM(n)
|
||||
#define BINDING_SWIZZLE_BUFFER 0
|
||||
#define BINDING_INPUT_BUFFER 1
|
||||
#define BINDING_OUTPUT_IMAGE 2
|
||||
|
||||
#else // ^^^ Vulkan ^^^ // vvv OpenGL vvv
|
||||
|
||||
#extension GL_NV_gpu_shader5 : enable
|
||||
#ifdef GL_NV_gpu_shader5
|
||||
#define HAS_EXTENDED_TYPES 1
|
||||
#else
|
||||
#define HAS_EXTENDED_TYPES 0
|
||||
#endif
|
||||
#define BEGIN_PUSH_CONSTANTS
|
||||
#define END_PUSH_CONSTANTS
|
||||
#define UNIFORM(n) layout (location = n) uniform
|
||||
#define BINDING_SWIZZLE_BUFFER 0
|
||||
#define BINDING_INPUT_BUFFER 1
|
||||
#define BINDING_OUTPUT_IMAGE 0
|
||||
|
||||
#endif
|
||||
|
||||
BEGIN_PUSH_CONSTANTS
|
||||
UNIFORM(0) uvec3 origin;
|
||||
UNIFORM(1) ivec3 destination;
|
||||
UNIFORM(2) uint bytes_per_block_log2;
|
||||
UNIFORM(3) uint layer_stride;
|
||||
UNIFORM(4) uint block_size;
|
||||
UNIFORM(5) uint x_shift;
|
||||
UNIFORM(6) uint block_height;
|
||||
UNIFORM(7) uint block_height_mask;
|
||||
END_PUSH_CONSTANTS
|
||||
|
||||
layout(binding = BINDING_SWIZZLE_BUFFER, std430) readonly buffer SwizzleTable {
|
||||
uint swizzle_table[];
|
||||
};
|
||||
|
||||
#if HAS_EXTENDED_TYPES
|
||||
layout(binding = BINDING_INPUT_BUFFER, std430) buffer InputBufferU8 { uint8_t u8data[]; };
|
||||
layout(binding = BINDING_INPUT_BUFFER, std430) buffer InputBufferU16 { uint16_t u16data[]; };
|
||||
#endif
|
||||
layout(binding = BINDING_INPUT_BUFFER, std430) buffer InputBufferU32 { uint u32data[]; };
|
||||
layout(binding = BINDING_INPUT_BUFFER, std430) buffer InputBufferU64 { uvec2 u64data[]; };
|
||||
layout(binding = BINDING_INPUT_BUFFER, std430) buffer InputBufferU128 { uvec4 u128data[]; };
|
||||
|
||||
layout(binding = BINDING_OUTPUT_IMAGE) uniform writeonly uimage2DArray output_image;
|
||||
|
||||
layout(local_size_x = 32, local_size_y = 32, local_size_z = 1) in;
|
||||
|
||||
const uint GOB_SIZE_X = 64;
|
||||
const uint GOB_SIZE_Y = 8;
|
||||
const uint GOB_SIZE_Z = 1;
|
||||
const uint GOB_SIZE = GOB_SIZE_X * GOB_SIZE_Y * GOB_SIZE_Z;
|
||||
|
||||
const uint GOB_SIZE_X_SHIFT = 6;
|
||||
const uint GOB_SIZE_Y_SHIFT = 3;
|
||||
const uint GOB_SIZE_Z_SHIFT = 0;
|
||||
const uint GOB_SIZE_SHIFT = GOB_SIZE_X_SHIFT + GOB_SIZE_Y_SHIFT + GOB_SIZE_Z_SHIFT;
|
||||
|
||||
const uvec2 SWIZZLE_MASK = uvec2(GOB_SIZE_X - 1, GOB_SIZE_Y - 1);
|
||||
|
||||
uint SwizzleOffset(uvec2 pos) {
|
||||
pos = pos & SWIZZLE_MASK;
|
||||
return swizzle_table[pos.y * 64 + pos.x];
|
||||
}
|
||||
|
||||
uvec4 ReadTexel(uint offset) {
|
||||
switch (bytes_per_block_log2) {
|
||||
#if HAS_EXTENDED_TYPES
|
||||
case 0:
|
||||
return uvec4(u8data[offset], 0, 0, 0);
|
||||
case 1:
|
||||
return uvec4(u16data[offset / 2], 0, 0, 0);
|
||||
#else
|
||||
case 0:
|
||||
return uvec4(bitfieldExtract(u32data[offset / 4], int((offset * 8) & 24), 8), 0, 0, 0);
|
||||
case 1:
|
||||
return uvec4(bitfieldExtract(u32data[offset / 4], int((offset * 8) & 16), 16), 0, 0, 0);
|
||||
#endif
|
||||
case 2:
|
||||
return uvec4(u32data[offset / 4], 0, 0, 0);
|
||||
case 3:
|
||||
return uvec4(u64data[offset / 8], 0, 0);
|
||||
case 4:
|
||||
return u128data[offset / 16];
|
||||
}
|
||||
return uvec4(0);
|
||||
}
|
||||
|
||||
void main() {
|
||||
uvec3 pos = gl_GlobalInvocationID + origin;
|
||||
pos.x <<= bytes_per_block_log2;
|
||||
|
||||
// Read as soon as possible due to its latency
|
||||
const uint swizzle = SwizzleOffset(pos.xy);
|
||||
|
||||
const uint block_y = pos.y >> GOB_SIZE_Y_SHIFT;
|
||||
|
||||
uint offset = 0;
|
||||
offset += pos.z * layer_stride;
|
||||
offset += (block_y >> block_height) * block_size;
|
||||
offset += (block_y & block_height_mask) << GOB_SIZE_SHIFT;
|
||||
offset += (pos.x >> GOB_SIZE_X_SHIFT) << x_shift;
|
||||
offset += swizzle;
|
||||
|
||||
const uvec4 texel = ReadTexel(offset);
|
||||
const ivec3 coord = ivec3(gl_GlobalInvocationID) + destination;
|
||||
imageStore(output_image, coord, texel);
|
||||
}
|
125
src/video_core/host_shaders/block_linear_unswizzle_3d.comp
Normal file
125
src/video_core/host_shaders/block_linear_unswizzle_3d.comp
Normal file
|
@ -0,0 +1,125 @@
|
|||
// Copyright 2020 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#version 430
|
||||
|
||||
#ifdef VULKAN
|
||||
|
||||
#extension GL_EXT_shader_16bit_storage : require
|
||||
#extension GL_EXT_shader_8bit_storage : require
|
||||
#define HAS_EXTENDED_TYPES 1
|
||||
#define BEGIN_PUSH_CONSTANTS layout(push_constant) uniform PushConstants {
|
||||
#define END_PUSH_CONSTANTS };
|
||||
#define UNIFORM(n)
|
||||
#define BINDING_SWIZZLE_BUFFER 0
|
||||
#define BINDING_INPUT_BUFFER 1
|
||||
#define BINDING_OUTPUT_IMAGE 2
|
||||
|
||||
#else // ^^^ Vulkan ^^^ // vvv OpenGL vvv
|
||||
|
||||
#extension GL_NV_gpu_shader5 : enable
|
||||
#ifdef GL_NV_gpu_shader5
|
||||
#define HAS_EXTENDED_TYPES 1
|
||||
#else
|
||||
#define HAS_EXTENDED_TYPES 0
|
||||
#endif
|
||||
#define BEGIN_PUSH_CONSTANTS
|
||||
#define END_PUSH_CONSTANTS
|
||||
#define UNIFORM(n) layout (location = n) uniform
|
||||
#define BINDING_SWIZZLE_BUFFER 0
|
||||
#define BINDING_INPUT_BUFFER 1
|
||||
#define BINDING_OUTPUT_IMAGE 0
|
||||
|
||||
#endif
|
||||
|
||||
BEGIN_PUSH_CONSTANTS
|
||||
UNIFORM(0) uvec3 origin;
|
||||
UNIFORM(1) ivec3 destination;
|
||||
UNIFORM(2) uint bytes_per_block_log2;
|
||||
UNIFORM(3) uint slice_size;
|
||||
UNIFORM(4) uint block_size;
|
||||
UNIFORM(5) uint x_shift;
|
||||
UNIFORM(6) uint block_height;
|
||||
UNIFORM(7) uint block_height_mask;
|
||||
UNIFORM(8) uint block_depth;
|
||||
UNIFORM(9) uint block_depth_mask;
|
||||
END_PUSH_CONSTANTS
|
||||
|
||||
layout(binding = BINDING_SWIZZLE_BUFFER, std430) readonly buffer SwizzleTable {
|
||||
uint swizzle_table[];
|
||||
};
|
||||
|
||||
#if HAS_EXTENDED_TYPES
|
||||
layout(binding = BINDING_INPUT_BUFFER, std430) buffer InputBufferU8 { uint8_t u8data[]; };
|
||||
layout(binding = BINDING_INPUT_BUFFER, std430) buffer InputBufferU16 { uint16_t u16data[]; };
|
||||
#endif
|
||||
layout(binding = BINDING_INPUT_BUFFER, std430) buffer InputBufferU32 { uint u32data[]; };
|
||||
layout(binding = BINDING_INPUT_BUFFER, std430) buffer InputBufferU64 { uvec2 u64data[]; };
|
||||
layout(binding = BINDING_INPUT_BUFFER, std430) buffer InputBufferU128 { uvec4 u128data[]; };
|
||||
|
||||
layout(binding = BINDING_OUTPUT_IMAGE) uniform writeonly uimage3D output_image;
|
||||
|
||||
layout(local_size_x = 16, local_size_y = 8, local_size_z = 8) in;
|
||||
|
||||
const uint GOB_SIZE_X = 64;
|
||||
const uint GOB_SIZE_Y = 8;
|
||||
const uint GOB_SIZE_Z = 1;
|
||||
const uint GOB_SIZE = GOB_SIZE_X * GOB_SIZE_Y * GOB_SIZE_Z;
|
||||
|
||||
const uint GOB_SIZE_X_SHIFT = 6;
|
||||
const uint GOB_SIZE_Y_SHIFT = 3;
|
||||
const uint GOB_SIZE_Z_SHIFT = 0;
|
||||
const uint GOB_SIZE_SHIFT = GOB_SIZE_X_SHIFT + GOB_SIZE_Y_SHIFT + GOB_SIZE_Z_SHIFT;
|
||||
|
||||
const uvec2 SWIZZLE_MASK = uvec2(GOB_SIZE_X - 1, GOB_SIZE_Y - 1);
|
||||
|
||||
uint SwizzleOffset(uvec2 pos) {
|
||||
pos = pos & SWIZZLE_MASK;
|
||||
return swizzle_table[pos.y * 64 + pos.x];
|
||||
}
|
||||
|
||||
uvec4 ReadTexel(uint offset) {
|
||||
switch (bytes_per_block_log2) {
|
||||
#if HAS_EXTENDED_TYPES
|
||||
case 0:
|
||||
return uvec4(u8data[offset], 0, 0, 0);
|
||||
case 1:
|
||||
return uvec4(u16data[offset / 2], 0, 0, 0);
|
||||
#else
|
||||
case 0:
|
||||
return uvec4(bitfieldExtract(u32data[offset / 4], int((offset * 8) & 24), 8), 0, 0, 0);
|
||||
case 1:
|
||||
return uvec4(bitfieldExtract(u32data[offset / 4], int((offset * 8) & 16), 16), 0, 0, 0);
|
||||
#endif
|
||||
case 2:
|
||||
return uvec4(u32data[offset / 4], 0, 0, 0);
|
||||
case 3:
|
||||
return uvec4(u64data[offset / 8], 0, 0);
|
||||
case 4:
|
||||
return u128data[offset / 16];
|
||||
}
|
||||
return uvec4(0);
|
||||
}
|
||||
|
||||
void main() {
|
||||
uvec3 pos = gl_GlobalInvocationID + origin;
|
||||
pos.x <<= bytes_per_block_log2;
|
||||
|
||||
// Read as soon as possible due to its latency
|
||||
const uint swizzle = SwizzleOffset(pos.xy);
|
||||
|
||||
const uint block_y = pos.y >> GOB_SIZE_Y_SHIFT;
|
||||
|
||||
uint offset = 0;
|
||||
offset += (pos.z >> block_depth) * slice_size;
|
||||
offset += (pos.z & block_depth_mask) << (GOB_SIZE_SHIFT + block_height);
|
||||
offset += (block_y >> block_height) * block_size;
|
||||
offset += (block_y & block_height_mask) << GOB_SIZE_SHIFT;
|
||||
offset += (pos.x >> GOB_SIZE_X_SHIFT) << x_shift;
|
||||
offset += swizzle;
|
||||
|
||||
const uvec4 texel = ReadTexel(offset);
|
||||
const ivec3 coord = ivec3(gl_GlobalInvocationID) + destination;
|
||||
imageStore(output_image, coord, texel);
|
||||
}
|
13
src/video_core/host_shaders/convert_depth_to_float.frag
Normal file
13
src/video_core/host_shaders/convert_depth_to_float.frag
Normal file
|
@ -0,0 +1,13 @@
|
|||
// Copyright 2020 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#version 450
|
||||
|
||||
layout(binding = 0) uniform sampler2D depth_texture;
|
||||
layout(location = 0) out float output_color;
|
||||
|
||||
void main() {
|
||||
ivec2 coord = ivec2(gl_FragCoord.xy);
|
||||
output_color = texelFetch(depth_texture, coord, 0).r;
|
||||
}
|
13
src/video_core/host_shaders/convert_float_to_depth.frag
Normal file
13
src/video_core/host_shaders/convert_float_to_depth.frag
Normal file
|
@ -0,0 +1,13 @@
|
|||
// Copyright 2020 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#version 450
|
||||
|
||||
layout(binding = 0) uniform sampler2D color_texture;
|
||||
|
||||
void main() {
|
||||
ivec2 coord = ivec2(gl_FragCoord.xy);
|
||||
float color = texelFetch(color_texture, coord, 0).r;
|
||||
gl_FragDepth = color;
|
||||
}
|
29
src/video_core/host_shaders/full_screen_triangle.vert
Normal file
29
src/video_core/host_shaders/full_screen_triangle.vert
Normal file
|
@ -0,0 +1,29 @@
|
|||
// Copyright 2020 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#version 450
|
||||
|
||||
#ifdef VULKAN
|
||||
#define BEGIN_PUSH_CONSTANTS layout(push_constant) uniform PushConstants {
|
||||
#define END_PUSH_CONSTANTS };
|
||||
#define UNIFORM(n)
|
||||
#else // ^^^ Vulkan ^^^ // vvv OpenGL vvv
|
||||
#define BEGIN_PUSH_CONSTANTS
|
||||
#define END_PUSH_CONSTANTS
|
||||
#define UNIFORM(n) layout (location = n) uniform
|
||||
#endif
|
||||
|
||||
BEGIN_PUSH_CONSTANTS
|
||||
UNIFORM(0) vec2 tex_scale;
|
||||
UNIFORM(1) vec2 tex_offset;
|
||||
END_PUSH_CONSTANTS
|
||||
|
||||
layout(location = 0) out vec2 texcoord;
|
||||
|
||||
void main() {
|
||||
float x = float((gl_VertexIndex & 1) << 2);
|
||||
float y = float((gl_VertexIndex & 2) << 1);
|
||||
gl_Position = vec4(x - 1.0, y - 1.0, 0.0, 1.0);
|
||||
texcoord = fma(vec2(x, y) / 2.0, tex_scale, tex_offset);
|
||||
}
|
70
src/video_core/host_shaders/opengl_copy_bc4.comp
Normal file
70
src/video_core/host_shaders/opengl_copy_bc4.comp
Normal file
|
@ -0,0 +1,70 @@
|
|||
// Copyright 2020 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#version 430 core
|
||||
#extension GL_ARB_gpu_shader_int64 : require
|
||||
|
||||
layout (local_size_x = 4, local_size_y = 4) in;
|
||||
|
||||
layout(binding = 0, rg32ui) readonly uniform uimage3D bc4_input;
|
||||
layout(binding = 1, rgba8ui) writeonly uniform uimage3D bc4_output;
|
||||
|
||||
layout(location = 0) uniform uvec3 src_offset;
|
||||
layout(location = 1) uniform uvec3 dst_offset;
|
||||
|
||||
// https://www.khronos.org/registry/OpenGL/extensions/ARB/ARB_texture_compression_rgtc.txt
|
||||
uint DecompressBlock(uint64_t bits, uvec2 coord) {
|
||||
const uint code_offset = 16 + 3 * (4 * coord.y + coord.x);
|
||||
const uint code = uint(bits >> code_offset) & 7;
|
||||
const uint red0 = uint(bits >> 0) & 0xff;
|
||||
const uint red1 = uint(bits >> 8) & 0xff;
|
||||
if (red0 > red1) {
|
||||
switch (code) {
|
||||
case 0:
|
||||
return red0;
|
||||
case 1:
|
||||
return red1;
|
||||
case 2:
|
||||
return (6 * red0 + 1 * red1) / 7;
|
||||
case 3:
|
||||
return (5 * red0 + 2 * red1) / 7;
|
||||
case 4:
|
||||
return (4 * red0 + 3 * red1) / 7;
|
||||
case 5:
|
||||
return (3 * red0 + 4 * red1) / 7;
|
||||
case 6:
|
||||
return (2 * red0 + 5 * red1) / 7;
|
||||
case 7:
|
||||
return (1 * red0 + 6 * red1) / 7;
|
||||
}
|
||||
} else {
|
||||
switch (code) {
|
||||
case 0:
|
||||
return red0;
|
||||
case 1:
|
||||
return red1;
|
||||
case 2:
|
||||
return (4 * red0 + 1 * red1) / 5;
|
||||
case 3:
|
||||
return (3 * red0 + 2 * red1) / 5;
|
||||
case 4:
|
||||
return (2 * red0 + 3 * red1) / 5;
|
||||
case 5:
|
||||
return (1 * red0 + 4 * red1) / 5;
|
||||
case 6:
|
||||
return 0;
|
||||
case 7:
|
||||
return 0xff;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
void main() {
|
||||
uvec2 packed_bits = imageLoad(bc4_input, ivec3(gl_WorkGroupID + src_offset)).rg;
|
||||
uint64_t bits = packUint2x32(packed_bits);
|
||||
uint red = DecompressBlock(bits, gl_LocalInvocationID.xy);
|
||||
uvec4 color = uvec4(red & 0xff, 0, 0, 0xff);
|
||||
imageStore(bc4_output, ivec3(gl_GlobalInvocationID + dst_offset), color);
|
||||
}
|
|
@ -1,3 +1,7 @@
|
|||
// Copyright 2020 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#version 430 core
|
||||
|
||||
layout (location = 0) in vec2 frag_tex_coord;
|
||||
|
|
|
@ -1,3 +1,7 @@
|
|||
// Copyright 2020 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#version 430 core
|
||||
|
||||
out gl_PerVertex {
|
||||
|
|
86
src/video_core/host_shaders/pitch_unswizzle.comp
Normal file
86
src/video_core/host_shaders/pitch_unswizzle.comp
Normal file
|
@ -0,0 +1,86 @@
|
|||
// Copyright 2020 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#version 430
|
||||
|
||||
#ifdef VULKAN
|
||||
|
||||
#extension GL_EXT_shader_16bit_storage : require
|
||||
#extension GL_EXT_shader_8bit_storage : require
|
||||
#define HAS_EXTENDED_TYPES 1
|
||||
#define BEGIN_PUSH_CONSTANTS layout(push_constant) uniform PushConstants {
|
||||
#define END_PUSH_CONSTANTS };
|
||||
#define UNIFORM(n)
|
||||
#define BINDING_INPUT_BUFFER 0
|
||||
#define BINDING_OUTPUT_IMAGE 1
|
||||
|
||||
#else // ^^^ Vulkan ^^^ // vvv OpenGL vvv
|
||||
|
||||
#extension GL_NV_gpu_shader5 : enable
|
||||
#ifdef GL_NV_gpu_shader5
|
||||
#define HAS_EXTENDED_TYPES 1
|
||||
#else
|
||||
#define HAS_EXTENDED_TYPES 0
|
||||
#endif
|
||||
#define BEGIN_PUSH_CONSTANTS
|
||||
#define END_PUSH_CONSTANTS
|
||||
#define UNIFORM(n) layout (location = n) uniform
|
||||
#define BINDING_INPUT_BUFFER 0
|
||||
#define BINDING_OUTPUT_IMAGE 0
|
||||
|
||||
#endif
|
||||
|
||||
BEGIN_PUSH_CONSTANTS
|
||||
UNIFORM(0) uvec2 origin;
|
||||
UNIFORM(1) ivec2 destination;
|
||||
UNIFORM(2) uint bytes_per_block;
|
||||
UNIFORM(3) uint pitch;
|
||||
END_PUSH_CONSTANTS
|
||||
|
||||
#if HAS_EXTENDED_TYPES
|
||||
layout(binding = BINDING_INPUT_BUFFER, std430) readonly buffer InputBufferU8 { uint8_t u8data[]; };
|
||||
layout(binding = BINDING_INPUT_BUFFER, std430) readonly buffer InputBufferU16 { uint16_t u16data[]; };
|
||||
#endif
|
||||
layout(binding = BINDING_INPUT_BUFFER, std430) readonly buffer InputBufferU32 { uint u32data[]; };
|
||||
layout(binding = BINDING_INPUT_BUFFER, std430) readonly buffer InputBufferU64 { uvec2 u64data[]; };
|
||||
layout(binding = BINDING_INPUT_BUFFER, std430) readonly buffer InputBufferU128 { uvec4 u128data[]; };
|
||||
|
||||
layout(binding = BINDING_OUTPUT_IMAGE) writeonly uniform uimage2D output_image;
|
||||
|
||||
layout(local_size_x = 32, local_size_y = 32, local_size_z = 1) in;
|
||||
|
||||
uvec4 ReadTexel(uint offset) {
|
||||
switch (bytes_per_block) {
|
||||
#if HAS_EXTENDED_TYPES
|
||||
case 1:
|
||||
return uvec4(u8data[offset], 0, 0, 0);
|
||||
case 2:
|
||||
return uvec4(u16data[offset / 2], 0, 0, 0);
|
||||
#else
|
||||
case 1:
|
||||
return uvec4(bitfieldExtract(u32data[offset / 4], int((offset * 8) & 24), 8), 0, 0, 0);
|
||||
case 2:
|
||||
return uvec4(bitfieldExtract(u32data[offset / 4], int((offset * 8) & 16), 16), 0, 0, 0);
|
||||
#endif
|
||||
case 4:
|
||||
return uvec4(u32data[offset / 4], 0, 0, 0);
|
||||
case 8:
|
||||
return uvec4(u64data[offset / 8], 0, 0);
|
||||
case 16:
|
||||
return u128data[offset / 16];
|
||||
}
|
||||
return uvec4(0);
|
||||
}
|
||||
|
||||
void main() {
|
||||
uvec2 pos = gl_GlobalInvocationID.xy + origin;
|
||||
|
||||
uint offset = 0;
|
||||
offset += pos.x * bytes_per_block;
|
||||
offset += pos.y * pitch;
|
||||
|
||||
const uvec4 texel = ReadTexel(offset);
|
||||
const ivec2 coord = ivec2(gl_GlobalInvocationID.xy) + destination;
|
||||
imageStore(output_image, coord, texel);
|
||||
}
|
14
src/video_core/host_shaders/vulkan_blit_color_float.frag
Normal file
14
src/video_core/host_shaders/vulkan_blit_color_float.frag
Normal file
|
@ -0,0 +1,14 @@
|
|||
// Copyright 2020 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#version 450
|
||||
|
||||
layout(binding = 0) uniform sampler2D tex;
|
||||
|
||||
layout(location = 0) in vec2 texcoord;
|
||||
layout(location = 0) out vec4 color;
|
||||
|
||||
void main() {
|
||||
color = textureLod(tex, texcoord, 0);
|
||||
}
|
16
src/video_core/host_shaders/vulkan_blit_depth_stencil.frag
Normal file
16
src/video_core/host_shaders/vulkan_blit_depth_stencil.frag
Normal file
|
@ -0,0 +1,16 @@
|
|||
// Copyright 2020 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#version 450
|
||||
#extension GL_ARB_shader_stencil_export : require
|
||||
|
||||
layout(binding = 0) uniform sampler2D depth_tex;
|
||||
layout(binding = 1) uniform isampler2D stencil_tex;
|
||||
|
||||
layout(location = 0) in vec2 texcoord;
|
||||
|
||||
void main() {
|
||||
gl_FragDepth = textureLod(depth_tex, texcoord, 0).r;
|
||||
gl_FragStencilRefARB = textureLod(stencil_tex, texcoord, 0).r;
|
||||
}
|
|
@ -2,15 +2,6 @@
|
|||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
/*
|
||||
* Build instructions:
|
||||
* $ glslangValidator -V $THIS_FILE -o output.spv
|
||||
* $ spirv-opt -O --strip-debug output.spv -o optimized.spv
|
||||
* $ xxd -i optimized.spv
|
||||
*
|
||||
* Then copy that bytecode to the C++ file
|
||||
*/
|
||||
|
||||
#version 460 core
|
||||
|
||||
layout (location = 0) in vec2 frag_tex_coord;
|
|
@ -2,15 +2,6 @@
|
|||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
/*
|
||||
* Build instructions:
|
||||
* $ glslangValidator -V $THIS_FILE -o output.spv
|
||||
* $ spirv-opt -O --strip-debug output.spv -o optimized.spv
|
||||
* $ xxd -i optimized.spv
|
||||
*
|
||||
* Then copy that bytecode to the C++ file
|
||||
*/
|
||||
|
||||
#version 460 core
|
||||
|
||||
layout (location = 0) in vec2 vert_position;
|
|
@ -2,15 +2,6 @@
|
|||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
/*
|
||||
* Build instructions:
|
||||
* $ glslangValidator -V $THIS_FILE -o output.spv
|
||||
* $ spirv-opt -O --strip-debug output.spv -o optimized.spv
|
||||
* $ xxd -i optimized.spv
|
||||
*
|
||||
* Then copy that bytecode to the C++ file
|
||||
*/
|
||||
|
||||
#version 460 core
|
||||
|
||||
layout (local_size_x = 1024) in;
|
|
@ -2,15 +2,6 @@
|
|||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
/*
|
||||
* Build instructions:
|
||||
* $ glslangValidator -V quad_indexed.comp -o output.spv
|
||||
* $ spirv-opt -O --strip-debug output.spv -o optimized.spv
|
||||
* $ xxd -i optimized.spv
|
||||
*
|
||||
* Then copy that bytecode to the C++ file
|
||||
*/
|
||||
|
||||
#version 460 core
|
||||
|
||||
layout (local_size_x = 1024) in;
|
|
@ -2,15 +2,6 @@
|
|||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
/*
|
||||
* Build instructions:
|
||||
* $ glslangValidator -V $THIS_FILE -o output.spv
|
||||
* $ spirv-opt -O --strip-debug output.spv -o optimized.spv
|
||||
* $ xxd -i optimized.spv
|
||||
*
|
||||
* Then copy that bytecode to the C++ file
|
||||
*/
|
||||
|
||||
#version 460 core
|
||||
#extension GL_EXT_shader_16bit_storage : require
|
||||
#extension GL_EXT_shader_8bit_storage : require
|
|
@ -57,7 +57,10 @@ void MemoryManager::Unmap(GPUVAddr gpu_addr, std::size_t size) {
|
|||
}
|
||||
|
||||
// Flush and invalidate through the GPU interface, to be asynchronous if possible.
|
||||
system.GPU().FlushAndInvalidateRegion(*GpuToCpuAddress(gpu_addr), size);
|
||||
const std::optional<VAddr> cpu_addr = GpuToCpuAddress(gpu_addr);
|
||||
ASSERT(cpu_addr);
|
||||
|
||||
rasterizer->UnmapMemory(*cpu_addr, size);
|
||||
|
||||
UpdateRange(gpu_addr, PageEntry::State::Unmapped, size);
|
||||
}
|
||||
|
|
|
@ -1,250 +0,0 @@
|
|||
// Copyright 2018 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include <array>
|
||||
#include <cstring>
|
||||
#include "common/assert.h"
|
||||
#include "common/common_types.h"
|
||||
#include "video_core/morton.h"
|
||||
#include "video_core/surface.h"
|
||||
#include "video_core/textures/decoders.h"
|
||||
|
||||
namespace VideoCore {
|
||||
|
||||
using Surface::GetBytesPerPixel;
|
||||
using Surface::PixelFormat;
|
||||
|
||||
using MortonCopyFn = void (*)(u32, u32, u32, u32, u32, u32, u8*, u8*);
|
||||
using ConversionArray = std::array<MortonCopyFn, Surface::MaxPixelFormat>;
|
||||
|
||||
template <bool morton_to_linear, PixelFormat format>
|
||||
static void MortonCopy(u32 stride, u32 block_height, u32 height, u32 block_depth, u32 depth,
|
||||
u32 tile_width_spacing, u8* buffer, u8* addr) {
|
||||
constexpr u32 bytes_per_pixel = GetBytesPerPixel(format);
|
||||
|
||||
// With the BCn formats (DXT and DXN), each 4x4 tile is swizzled instead of just individual
|
||||
// pixel values.
|
||||
constexpr u32 tile_size_x{GetDefaultBlockWidth(format)};
|
||||
constexpr u32 tile_size_y{GetDefaultBlockHeight(format)};
|
||||
|
||||
if constexpr (morton_to_linear) {
|
||||
Tegra::Texture::UnswizzleTexture(buffer, addr, tile_size_x, tile_size_y, bytes_per_pixel,
|
||||
stride, height, depth, block_height, block_depth,
|
||||
tile_width_spacing);
|
||||
} else {
|
||||
Tegra::Texture::CopySwizzledData((stride + tile_size_x - 1) / tile_size_x,
|
||||
(height + tile_size_y - 1) / tile_size_y, depth,
|
||||
bytes_per_pixel, bytes_per_pixel, addr, buffer, false,
|
||||
block_height, block_depth, tile_width_spacing);
|
||||
}
|
||||
}
|
||||
|
||||
static constexpr ConversionArray morton_to_linear_fns = {
|
||||
MortonCopy<true, PixelFormat::A8B8G8R8_UNORM>,
|
||||
MortonCopy<true, PixelFormat::A8B8G8R8_SNORM>,
|
||||
MortonCopy<true, PixelFormat::A8B8G8R8_SINT>,
|
||||
MortonCopy<true, PixelFormat::A8B8G8R8_UINT>,
|
||||
MortonCopy<true, PixelFormat::R5G6B5_UNORM>,
|
||||
MortonCopy<true, PixelFormat::B5G6R5_UNORM>,
|
||||
MortonCopy<true, PixelFormat::A1R5G5B5_UNORM>,
|
||||
MortonCopy<true, PixelFormat::A2B10G10R10_UNORM>,
|
||||
MortonCopy<true, PixelFormat::A2B10G10R10_UINT>,
|
||||
MortonCopy<true, PixelFormat::A1B5G5R5_UNORM>,
|
||||
MortonCopy<true, PixelFormat::R8_UNORM>,
|
||||
MortonCopy<true, PixelFormat::R8_SNORM>,
|
||||
MortonCopy<true, PixelFormat::R8_SINT>,
|
||||
MortonCopy<true, PixelFormat::R8_UINT>,
|
||||
MortonCopy<true, PixelFormat::R16G16B16A16_FLOAT>,
|
||||
MortonCopy<true, PixelFormat::R16G16B16A16_UNORM>,
|
||||
MortonCopy<true, PixelFormat::R16G16B16A16_SNORM>,
|
||||
MortonCopy<true, PixelFormat::R16G16B16A16_SINT>,
|
||||
MortonCopy<true, PixelFormat::R16G16B16A16_UINT>,
|
||||
MortonCopy<true, PixelFormat::B10G11R11_FLOAT>,
|
||||
MortonCopy<true, PixelFormat::R32G32B32A32_UINT>,
|
||||
MortonCopy<true, PixelFormat::BC1_RGBA_UNORM>,
|
||||
MortonCopy<true, PixelFormat::BC2_UNORM>,
|
||||
MortonCopy<true, PixelFormat::BC3_UNORM>,
|
||||
MortonCopy<true, PixelFormat::BC4_UNORM>,
|
||||
MortonCopy<true, PixelFormat::BC4_SNORM>,
|
||||
MortonCopy<true, PixelFormat::BC5_UNORM>,
|
||||
MortonCopy<true, PixelFormat::BC5_SNORM>,
|
||||
MortonCopy<true, PixelFormat::BC7_UNORM>,
|
||||
MortonCopy<true, PixelFormat::BC6H_UFLOAT>,
|
||||
MortonCopy<true, PixelFormat::BC6H_SFLOAT>,
|
||||
MortonCopy<true, PixelFormat::ASTC_2D_4X4_UNORM>,
|
||||
MortonCopy<true, PixelFormat::B8G8R8A8_UNORM>,
|
||||
MortonCopy<true, PixelFormat::R32G32B32A32_FLOAT>,
|
||||
MortonCopy<true, PixelFormat::R32G32B32A32_SINT>,
|
||||
MortonCopy<true, PixelFormat::R32G32_FLOAT>,
|
||||
MortonCopy<true, PixelFormat::R32G32_SINT>,
|
||||
MortonCopy<true, PixelFormat::R32_FLOAT>,
|
||||
MortonCopy<true, PixelFormat::R16_FLOAT>,
|
||||
MortonCopy<true, PixelFormat::R16_UNORM>,
|
||||
MortonCopy<true, PixelFormat::R16_SNORM>,
|
||||
MortonCopy<true, PixelFormat::R16_UINT>,
|
||||
MortonCopy<true, PixelFormat::R16_SINT>,
|
||||
MortonCopy<true, PixelFormat::R16G16_UNORM>,
|
||||
MortonCopy<true, PixelFormat::R16G16_FLOAT>,
|
||||
MortonCopy<true, PixelFormat::R16G16_UINT>,
|
||||
MortonCopy<true, PixelFormat::R16G16_SINT>,
|
||||
MortonCopy<true, PixelFormat::R16G16_SNORM>,
|
||||
MortonCopy<true, PixelFormat::R32G32B32_FLOAT>,
|
||||
MortonCopy<true, PixelFormat::A8B8G8R8_SRGB>,
|
||||
MortonCopy<true, PixelFormat::R8G8_UNORM>,
|
||||
MortonCopy<true, PixelFormat::R8G8_SNORM>,
|
||||
MortonCopy<true, PixelFormat::R8G8_SINT>,
|
||||
MortonCopy<true, PixelFormat::R8G8_UINT>,
|
||||
MortonCopy<true, PixelFormat::R32G32_UINT>,
|
||||
MortonCopy<true, PixelFormat::R16G16B16X16_FLOAT>,
|
||||
MortonCopy<true, PixelFormat::R32_UINT>,
|
||||
MortonCopy<true, PixelFormat::R32_SINT>,
|
||||
MortonCopy<true, PixelFormat::ASTC_2D_8X8_UNORM>,
|
||||
MortonCopy<true, PixelFormat::ASTC_2D_8X5_UNORM>,
|
||||
MortonCopy<true, PixelFormat::ASTC_2D_5X4_UNORM>,
|
||||
MortonCopy<true, PixelFormat::B8G8R8A8_SRGB>,
|
||||
MortonCopy<true, PixelFormat::BC1_RGBA_SRGB>,
|
||||
MortonCopy<true, PixelFormat::BC2_SRGB>,
|
||||
MortonCopy<true, PixelFormat::BC3_SRGB>,
|
||||
MortonCopy<true, PixelFormat::BC7_SRGB>,
|
||||
MortonCopy<true, PixelFormat::A4B4G4R4_UNORM>,
|
||||
MortonCopy<true, PixelFormat::ASTC_2D_4X4_SRGB>,
|
||||
MortonCopy<true, PixelFormat::ASTC_2D_8X8_SRGB>,
|
||||
MortonCopy<true, PixelFormat::ASTC_2D_8X5_SRGB>,
|
||||
MortonCopy<true, PixelFormat::ASTC_2D_5X4_SRGB>,
|
||||
MortonCopy<true, PixelFormat::ASTC_2D_5X5_UNORM>,
|
||||
MortonCopy<true, PixelFormat::ASTC_2D_5X5_SRGB>,
|
||||
MortonCopy<true, PixelFormat::ASTC_2D_10X8_UNORM>,
|
||||
MortonCopy<true, PixelFormat::ASTC_2D_10X8_SRGB>,
|
||||
MortonCopy<true, PixelFormat::ASTC_2D_6X6_UNORM>,
|
||||
MortonCopy<true, PixelFormat::ASTC_2D_6X6_SRGB>,
|
||||
MortonCopy<true, PixelFormat::ASTC_2D_10X10_UNORM>,
|
||||
MortonCopy<true, PixelFormat::ASTC_2D_10X10_SRGB>,
|
||||
MortonCopy<true, PixelFormat::ASTC_2D_12X12_UNORM>,
|
||||
MortonCopy<true, PixelFormat::ASTC_2D_12X12_SRGB>,
|
||||
MortonCopy<true, PixelFormat::ASTC_2D_8X6_UNORM>,
|
||||
MortonCopy<true, PixelFormat::ASTC_2D_8X6_SRGB>,
|
||||
MortonCopy<true, PixelFormat::ASTC_2D_6X5_UNORM>,
|
||||
MortonCopy<true, PixelFormat::ASTC_2D_6X5_SRGB>,
|
||||
MortonCopy<true, PixelFormat::E5B9G9R9_FLOAT>,
|
||||
MortonCopy<true, PixelFormat::D32_FLOAT>,
|
||||
MortonCopy<true, PixelFormat::D16_UNORM>,
|
||||
MortonCopy<true, PixelFormat::D24_UNORM_S8_UINT>,
|
||||
MortonCopy<true, PixelFormat::S8_UINT_D24_UNORM>,
|
||||
MortonCopy<true, PixelFormat::D32_FLOAT_S8_UINT>,
|
||||
};
|
||||
|
||||
static constexpr ConversionArray linear_to_morton_fns = {
|
||||
MortonCopy<false, PixelFormat::A8B8G8R8_UNORM>,
|
||||
MortonCopy<false, PixelFormat::A8B8G8R8_SNORM>,
|
||||
MortonCopy<false, PixelFormat::A8B8G8R8_SINT>,
|
||||
MortonCopy<false, PixelFormat::A8B8G8R8_UINT>,
|
||||
MortonCopy<false, PixelFormat::R5G6B5_UNORM>,
|
||||
MortonCopy<false, PixelFormat::B5G6R5_UNORM>,
|
||||
MortonCopy<false, PixelFormat::A1R5G5B5_UNORM>,
|
||||
MortonCopy<false, PixelFormat::A2B10G10R10_UNORM>,
|
||||
MortonCopy<false, PixelFormat::A2B10G10R10_UINT>,
|
||||
MortonCopy<false, PixelFormat::A1B5G5R5_UNORM>,
|
||||
MortonCopy<false, PixelFormat::R8_UNORM>,
|
||||
MortonCopy<false, PixelFormat::R8_SNORM>,
|
||||
MortonCopy<false, PixelFormat::R8_SINT>,
|
||||
MortonCopy<false, PixelFormat::R8_UINT>,
|
||||
MortonCopy<false, PixelFormat::R16G16B16A16_FLOAT>,
|
||||
MortonCopy<false, PixelFormat::R16G16B16A16_SNORM>,
|
||||
MortonCopy<false, PixelFormat::R16G16B16A16_SINT>,
|
||||
MortonCopy<false, PixelFormat::R16G16B16A16_UNORM>,
|
||||
MortonCopy<false, PixelFormat::R16G16B16A16_UINT>,
|
||||
MortonCopy<false, PixelFormat::B10G11R11_FLOAT>,
|
||||
MortonCopy<false, PixelFormat::R32G32B32A32_UINT>,
|
||||
MortonCopy<false, PixelFormat::BC1_RGBA_UNORM>,
|
||||
MortonCopy<false, PixelFormat::BC2_UNORM>,
|
||||
MortonCopy<false, PixelFormat::BC3_UNORM>,
|
||||
MortonCopy<false, PixelFormat::BC4_UNORM>,
|
||||
MortonCopy<false, PixelFormat::BC4_SNORM>,
|
||||
MortonCopy<false, PixelFormat::BC5_UNORM>,
|
||||
MortonCopy<false, PixelFormat::BC5_SNORM>,
|
||||
MortonCopy<false, PixelFormat::BC7_UNORM>,
|
||||
MortonCopy<false, PixelFormat::BC6H_UFLOAT>,
|
||||
MortonCopy<false, PixelFormat::BC6H_SFLOAT>,
|
||||
// TODO(Subv): Swizzling ASTC formats are not supported
|
||||
nullptr,
|
||||
MortonCopy<false, PixelFormat::B8G8R8A8_UNORM>,
|
||||
MortonCopy<false, PixelFormat::R32G32B32A32_FLOAT>,
|
||||
MortonCopy<false, PixelFormat::R32G32B32A32_SINT>,
|
||||
MortonCopy<false, PixelFormat::R32G32_FLOAT>,
|
||||
MortonCopy<false, PixelFormat::R32G32_SINT>,
|
||||
MortonCopy<false, PixelFormat::R32_FLOAT>,
|
||||
MortonCopy<false, PixelFormat::R16_FLOAT>,
|
||||
MortonCopy<false, PixelFormat::R16_UNORM>,
|
||||
MortonCopy<false, PixelFormat::R16_SNORM>,
|
||||
MortonCopy<false, PixelFormat::R16_UINT>,
|
||||
MortonCopy<false, PixelFormat::R16_SINT>,
|
||||
MortonCopy<false, PixelFormat::R16G16_UNORM>,
|
||||
MortonCopy<false, PixelFormat::R16G16_FLOAT>,
|
||||
MortonCopy<false, PixelFormat::R16G16_UINT>,
|
||||
MortonCopy<false, PixelFormat::R16G16_SINT>,
|
||||
MortonCopy<false, PixelFormat::R16G16_SNORM>,
|
||||
MortonCopy<false, PixelFormat::R32G32B32_FLOAT>,
|
||||
MortonCopy<false, PixelFormat::A8B8G8R8_SRGB>,
|
||||
MortonCopy<false, PixelFormat::R8G8_UNORM>,
|
||||
MortonCopy<false, PixelFormat::R8G8_SNORM>,
|
||||
MortonCopy<false, PixelFormat::R8G8_SINT>,
|
||||
MortonCopy<false, PixelFormat::R8G8_UINT>,
|
||||
MortonCopy<false, PixelFormat::R32G32_UINT>,
|
||||
MortonCopy<false, PixelFormat::R16G16B16X16_FLOAT>,
|
||||
MortonCopy<false, PixelFormat::R32_UINT>,
|
||||
MortonCopy<false, PixelFormat::R32_SINT>,
|
||||
nullptr,
|
||||
nullptr,
|
||||
nullptr,
|
||||
MortonCopy<false, PixelFormat::B8G8R8A8_SRGB>,
|
||||
MortonCopy<false, PixelFormat::BC1_RGBA_SRGB>,
|
||||
MortonCopy<false, PixelFormat::BC2_SRGB>,
|
||||
MortonCopy<false, PixelFormat::BC3_SRGB>,
|
||||
MortonCopy<false, PixelFormat::BC7_SRGB>,
|
||||
MortonCopy<false, PixelFormat::A4B4G4R4_UNORM>,
|
||||
nullptr,
|
||||
nullptr,
|
||||
nullptr,
|
||||
nullptr,
|
||||
nullptr,
|
||||
nullptr,
|
||||
nullptr,
|
||||
nullptr,
|
||||
nullptr,
|
||||
nullptr,
|
||||
nullptr,
|
||||
nullptr,
|
||||
nullptr,
|
||||
nullptr,
|
||||
nullptr,
|
||||
nullptr,
|
||||
nullptr,
|
||||
nullptr,
|
||||
MortonCopy<false, PixelFormat::E5B9G9R9_FLOAT>,
|
||||
MortonCopy<false, PixelFormat::D32_FLOAT>,
|
||||
MortonCopy<false, PixelFormat::D16_UNORM>,
|
||||
MortonCopy<false, PixelFormat::D24_UNORM_S8_UINT>,
|
||||
MortonCopy<false, PixelFormat::S8_UINT_D24_UNORM>,
|
||||
MortonCopy<false, PixelFormat::D32_FLOAT_S8_UINT>,
|
||||
};
|
||||
|
||||
static MortonCopyFn GetSwizzleFunction(MortonSwizzleMode mode, Surface::PixelFormat format) {
|
||||
switch (mode) {
|
||||
case MortonSwizzleMode::MortonToLinear:
|
||||
return morton_to_linear_fns[static_cast<std::size_t>(format)];
|
||||
case MortonSwizzleMode::LinearToMorton:
|
||||
return linear_to_morton_fns[static_cast<std::size_t>(format)];
|
||||
}
|
||||
UNREACHABLE();
|
||||
return morton_to_linear_fns[static_cast<std::size_t>(format)];
|
||||
}
|
||||
|
||||
void MortonSwizzle(MortonSwizzleMode mode, Surface::PixelFormat format, u32 stride,
|
||||
u32 block_height, u32 height, u32 block_depth, u32 depth, u32 tile_width_spacing,
|
||||
u8* buffer, u8* addr) {
|
||||
GetSwizzleFunction(mode, format)(stride, block_height, height, block_depth, depth,
|
||||
tile_width_spacing, buffer, addr);
|
||||
}
|
||||
|
||||
} // namespace VideoCore
|
|
@ -1,18 +0,0 @@
|
|||
// Copyright 2018 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "common/common_types.h"
|
||||
#include "video_core/surface.h"
|
||||
|
||||
namespace VideoCore {
|
||||
|
||||
enum class MortonSwizzleMode { MortonToLinear, LinearToMorton };
|
||||
|
||||
void MortonSwizzle(MortonSwizzleMode mode, VideoCore::Surface::PixelFormat format, u32 stride,
|
||||
u32 block_height, u32 height, u32 block_depth, u32 depth, u32 tile_width_spacing,
|
||||
u8* buffer, u8* addr);
|
||||
|
||||
} // namespace VideoCore
|
|
@ -76,6 +76,9 @@ public:
|
|||
/// Sync memory between guest and host.
|
||||
virtual void SyncGuestHost() = 0;
|
||||
|
||||
/// Unmap memory range
|
||||
virtual void UnmapMemory(VAddr addr, u64 size) = 0;
|
||||
|
||||
/// Notify rasterizer that any caches of the specified region should be flushed to Switch memory
|
||||
/// and invalidated
|
||||
virtual void FlushAndInvalidateRegion(VAddr addr, u64 size) = 0;
|
||||
|
@ -83,6 +86,12 @@ public:
|
|||
/// Notify the host renderer to wait for previous primitive and compute operations.
|
||||
virtual void WaitForIdle() = 0;
|
||||
|
||||
/// Notify the host renderer to wait for reads and writes to render targets and flush caches.
|
||||
virtual void FragmentBarrier() = 0;
|
||||
|
||||
/// Notify the host renderer to make available previous render target writes.
|
||||
virtual void TiledCacheBarrier() = 0;
|
||||
|
||||
/// Notify the rasterizer to send all written commands to the host GPU.
|
||||
virtual void FlushCommands() = 0;
|
||||
|
||||
|
@ -91,8 +100,7 @@ public:
|
|||
|
||||
/// Attempt to use a faster method to perform a surface copy
|
||||
[[nodiscard]] virtual bool AccelerateSurfaceCopy(
|
||||
const Tegra::Engines::Fermi2D::Regs::Surface& src,
|
||||
const Tegra::Engines::Fermi2D::Regs::Surface& dst,
|
||||
const Tegra::Engines::Fermi2D::Surface& src, const Tegra::Engines::Fermi2D::Surface& dst,
|
||||
const Tegra::Engines::Fermi2D::Config& copy_config) {
|
||||
return false;
|
||||
}
|
||||
|
|
|
@ -61,10 +61,9 @@ void Buffer::CopyFrom(const Buffer& src, std::size_t src_offset, std::size_t dst
|
|||
|
||||
OGLBufferCache::OGLBufferCache(VideoCore::RasterizerInterface& rasterizer_,
|
||||
Tegra::MemoryManager& gpu_memory_, Core::Memory::Memory& cpu_memory_,
|
||||
const Device& device_, std::size_t stream_size_)
|
||||
: GenericBufferCache{rasterizer_, gpu_memory_, cpu_memory_,
|
||||
std::make_unique<OGLStreamBuffer>(device_, stream_size_, true)},
|
||||
device{device_} {
|
||||
const Device& device_, OGLStreamBuffer& stream_buffer_,
|
||||
StateTracker& state_tracker)
|
||||
: GenericBufferCache{rasterizer_, gpu_memory_, cpu_memory_, stream_buffer_}, device{device_} {
|
||||
if (!device.HasFastBufferSubData()) {
|
||||
return;
|
||||
}
|
||||
|
|
|
@ -22,6 +22,7 @@ namespace OpenGL {
|
|||
class Device;
|
||||
class OGLStreamBuffer;
|
||||
class RasterizerOpenGL;
|
||||
class StateTracker;
|
||||
|
||||
class Buffer : public VideoCommon::BufferBlock {
|
||||
public:
|
||||
|
@ -52,9 +53,10 @@ private:
|
|||
using GenericBufferCache = VideoCommon::BufferCache<Buffer, GLuint, OGLStreamBuffer>;
|
||||
class OGLBufferCache final : public GenericBufferCache {
|
||||
public:
|
||||
explicit OGLBufferCache(VideoCore::RasterizerInterface& rasterizer_,
|
||||
Tegra::MemoryManager& gpu_memory_, Core::Memory::Memory& cpu_memory_,
|
||||
const Device& device_, std::size_t stream_size_);
|
||||
explicit OGLBufferCache(VideoCore::RasterizerInterface& rasterizer,
|
||||
Tegra::MemoryManager& gpu_memory, Core::Memory::Memory& cpu_memory,
|
||||
const Device& device, OGLStreamBuffer& stream_buffer,
|
||||
StateTracker& state_tracker);
|
||||
~OGLBufferCache();
|
||||
|
||||
BufferInfo GetEmptyBuffer(std::size_t) override;
|
||||
|
|
|
@ -5,9 +5,11 @@
|
|||
#include <algorithm>
|
||||
#include <array>
|
||||
#include <cstddef>
|
||||
#include <cstdlib>
|
||||
#include <cstring>
|
||||
#include <limits>
|
||||
#include <optional>
|
||||
#include <span>
|
||||
#include <vector>
|
||||
|
||||
#include <glad/glad.h>
|
||||
|
@ -27,27 +29,29 @@ constexpr u32 ReservedUniformBlocks = 1;
|
|||
|
||||
constexpr u32 NumStages = 5;
|
||||
|
||||
constexpr std::array LimitUBOs = {
|
||||
constexpr std::array LIMIT_UBOS = {
|
||||
GL_MAX_VERTEX_UNIFORM_BLOCKS, GL_MAX_TESS_CONTROL_UNIFORM_BLOCKS,
|
||||
GL_MAX_TESS_EVALUATION_UNIFORM_BLOCKS, GL_MAX_GEOMETRY_UNIFORM_BLOCKS,
|
||||
GL_MAX_FRAGMENT_UNIFORM_BLOCKS, GL_MAX_COMPUTE_UNIFORM_BLOCKS};
|
||||
|
||||
constexpr std::array LimitSSBOs = {
|
||||
GL_MAX_FRAGMENT_UNIFORM_BLOCKS, GL_MAX_COMPUTE_UNIFORM_BLOCKS,
|
||||
};
|
||||
constexpr std::array LIMIT_SSBOS = {
|
||||
GL_MAX_VERTEX_SHADER_STORAGE_BLOCKS, GL_MAX_TESS_CONTROL_SHADER_STORAGE_BLOCKS,
|
||||
GL_MAX_TESS_EVALUATION_SHADER_STORAGE_BLOCKS, GL_MAX_GEOMETRY_SHADER_STORAGE_BLOCKS,
|
||||
GL_MAX_FRAGMENT_SHADER_STORAGE_BLOCKS, GL_MAX_COMPUTE_SHADER_STORAGE_BLOCKS};
|
||||
|
||||
constexpr std::array LimitSamplers = {GL_MAX_VERTEX_TEXTURE_IMAGE_UNITS,
|
||||
GL_MAX_TESS_CONTROL_TEXTURE_IMAGE_UNITS,
|
||||
GL_MAX_TESS_EVALUATION_TEXTURE_IMAGE_UNITS,
|
||||
GL_MAX_GEOMETRY_TEXTURE_IMAGE_UNITS,
|
||||
GL_MAX_TEXTURE_IMAGE_UNITS,
|
||||
GL_MAX_COMPUTE_TEXTURE_IMAGE_UNITS};
|
||||
|
||||
constexpr std::array LimitImages = {
|
||||
GL_MAX_FRAGMENT_SHADER_STORAGE_BLOCKS, GL_MAX_COMPUTE_SHADER_STORAGE_BLOCKS,
|
||||
};
|
||||
constexpr std::array LIMIT_SAMPLERS = {
|
||||
GL_MAX_VERTEX_TEXTURE_IMAGE_UNITS,
|
||||
GL_MAX_TESS_CONTROL_TEXTURE_IMAGE_UNITS,
|
||||
GL_MAX_TESS_EVALUATION_TEXTURE_IMAGE_UNITS,
|
||||
GL_MAX_GEOMETRY_TEXTURE_IMAGE_UNITS,
|
||||
GL_MAX_TEXTURE_IMAGE_UNITS,
|
||||
GL_MAX_COMPUTE_TEXTURE_IMAGE_UNITS,
|
||||
};
|
||||
constexpr std::array LIMIT_IMAGES = {
|
||||
GL_MAX_VERTEX_IMAGE_UNIFORMS, GL_MAX_TESS_CONTROL_IMAGE_UNIFORMS,
|
||||
GL_MAX_TESS_EVALUATION_IMAGE_UNIFORMS, GL_MAX_GEOMETRY_IMAGE_UNIFORMS,
|
||||
GL_MAX_FRAGMENT_IMAGE_UNIFORMS, GL_MAX_COMPUTE_IMAGE_UNIFORMS};
|
||||
GL_MAX_FRAGMENT_IMAGE_UNIFORMS, GL_MAX_COMPUTE_IMAGE_UNIFORMS,
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
T GetInteger(GLenum pname) {
|
||||
|
@ -76,8 +80,8 @@ std::vector<std::string_view> GetExtensions() {
|
|||
return extensions;
|
||||
}
|
||||
|
||||
bool HasExtension(const std::vector<std::string_view>& images, std::string_view extension) {
|
||||
return std::find(images.begin(), images.end(), extension) != images.end();
|
||||
bool HasExtension(std::span<const std::string_view> extensions, std::string_view extension) {
|
||||
return std::ranges::find(extensions, extension) != extensions.end();
|
||||
}
|
||||
|
||||
u32 Extract(u32& base, u32& num, u32 amount, std::optional<GLenum> limit = {}) {
|
||||
|
@ -91,8 +95,8 @@ u32 Extract(u32& base, u32& num, u32 amount, std::optional<GLenum> limit = {}) {
|
|||
|
||||
std::array<u32, Tegra::Engines::MaxShaderTypes> BuildMaxUniformBuffers() noexcept {
|
||||
std::array<u32, Tegra::Engines::MaxShaderTypes> max;
|
||||
std::transform(LimitUBOs.begin(), LimitUBOs.end(), max.begin(),
|
||||
[](GLenum pname) { return GetInteger<u32>(pname); });
|
||||
std::ranges::transform(LIMIT_UBOS, max.begin(),
|
||||
[](GLenum pname) { return GetInteger<u32>(pname); });
|
||||
return max;
|
||||
}
|
||||
|
||||
|
@ -115,9 +119,10 @@ std::array<Device::BaseBindings, Tegra::Engines::MaxShaderTypes> BuildBaseBindin
|
|||
for (std::size_t i = 0; i < NumStages; ++i) {
|
||||
const std::size_t stage = stage_swizzle[i];
|
||||
bindings[stage] = {
|
||||
Extract(base_ubo, num_ubos, total_ubos / NumStages, LimitUBOs[stage]),
|
||||
Extract(base_ssbo, num_ssbos, total_ssbos / NumStages, LimitSSBOs[stage]),
|
||||
Extract(base_samplers, num_samplers, total_samplers / NumStages, LimitSamplers[stage])};
|
||||
Extract(base_ubo, num_ubos, total_ubos / NumStages, LIMIT_UBOS[stage]),
|
||||
Extract(base_ssbo, num_ssbos, total_ssbos / NumStages, LIMIT_SSBOS[stage]),
|
||||
Extract(base_samplers, num_samplers, total_samplers / NumStages,
|
||||
LIMIT_SAMPLERS[stage])};
|
||||
}
|
||||
|
||||
u32 num_images = GetInteger<u32>(GL_MAX_IMAGE_UNITS);
|
||||
|
@ -130,7 +135,7 @@ std::array<Device::BaseBindings, Tegra::Engines::MaxShaderTypes> BuildBaseBindin
|
|||
|
||||
// Reserve at least 4 image bindings on the fragment stage.
|
||||
bindings[4].image =
|
||||
Extract(base_images, num_images, std::max(4U, num_images / NumStages), LimitImages[4]);
|
||||
Extract(base_images, num_images, std::max(4U, num_images / NumStages), LIMIT_IMAGES[4]);
|
||||
|
||||
// This is guaranteed to be at least 1.
|
||||
const u32 total_extracted_images = num_images / (NumStages - 1);
|
||||
|
@ -142,7 +147,7 @@ std::array<Device::BaseBindings, Tegra::Engines::MaxShaderTypes> BuildBaseBindin
|
|||
continue;
|
||||
}
|
||||
bindings[stage].image =
|
||||
Extract(base_images, num_images, total_extracted_images, LimitImages[stage]);
|
||||
Extract(base_images, num_images, total_extracted_images, LIMIT_IMAGES[stage]);
|
||||
}
|
||||
|
||||
// Compute doesn't care about any of this.
|
||||
|
@ -188,6 +193,11 @@ bool IsASTCSupported() {
|
|||
return true;
|
||||
}
|
||||
|
||||
[[nodiscard]] bool IsDebugToolAttached(std::span<const std::string_view> extensions) {
|
||||
const bool nsight = std::getenv("NVTX_INJECTION64_PATH") || std::getenv("NSIGHT_LAUNCHED");
|
||||
return nsight || HasExtension(extensions, "GL_EXT_debug_tool");
|
||||
}
|
||||
|
||||
} // Anonymous namespace
|
||||
|
||||
Device::Device()
|
||||
|
@ -206,9 +216,8 @@ Device::Device()
|
|||
"Beta driver 443.24 is known to have issues. There might be performance issues.");
|
||||
disable_fast_buffer_sub_data = true;
|
||||
}
|
||||
|
||||
uniform_buffer_alignment = GetInteger<std::size_t>(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT);
|
||||
shader_storage_alignment = GetInteger<std::size_t>(GL_SHADER_STORAGE_BUFFER_OFFSET_ALIGNMENT);
|
||||
uniform_buffer_alignment = GetInteger<size_t>(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT);
|
||||
shader_storage_alignment = GetInteger<size_t>(GL_SHADER_STORAGE_BUFFER_OFFSET_ALIGNMENT);
|
||||
max_vertex_attributes = GetInteger<u32>(GL_MAX_VERTEX_ATTRIBS);
|
||||
max_varyings = GetInteger<u32>(GL_MAX_VARYING_VECTORS);
|
||||
max_compute_shared_memory_size = GetInteger<u32>(GL_MAX_COMPUTE_SHARED_MEMORY_SIZE);
|
||||
|
@ -224,6 +233,7 @@ Device::Device()
|
|||
has_precise_bug = TestPreciseBug();
|
||||
has_nv_viewport_array2 = GLAD_GL_NV_viewport_array2;
|
||||
has_vertex_buffer_unified_memory = GLAD_GL_NV_vertex_buffer_unified_memory;
|
||||
has_debugging_tool_attached = IsDebugToolAttached(extensions);
|
||||
|
||||
// At the moment of writing this, only Nvidia's driver optimizes BufferSubData on exclusive
|
||||
// uniform buffers as "push constants"
|
||||
|
|
|
@ -36,11 +36,11 @@ public:
|
|||
return GetBaseBindings(static_cast<std::size_t>(shader_type));
|
||||
}
|
||||
|
||||
std::size_t GetUniformBufferAlignment() const {
|
||||
size_t GetUniformBufferAlignment() const {
|
||||
return uniform_buffer_alignment;
|
||||
}
|
||||
|
||||
std::size_t GetShaderStorageBufferAlignment() const {
|
||||
size_t GetShaderStorageBufferAlignment() const {
|
||||
return shader_storage_alignment;
|
||||
}
|
||||
|
||||
|
@ -104,6 +104,10 @@ public:
|
|||
return has_nv_viewport_array2;
|
||||
}
|
||||
|
||||
bool HasDebuggingToolAttached() const {
|
||||
return has_debugging_tool_attached;
|
||||
}
|
||||
|
||||
bool UseAssemblyShaders() const {
|
||||
return use_assembly_shaders;
|
||||
}
|
||||
|
@ -118,8 +122,8 @@ private:
|
|||
|
||||
std::array<u32, Tegra::Engines::MaxShaderTypes> max_uniform_buffers{};
|
||||
std::array<BaseBindings, Tegra::Engines::MaxShaderTypes> base_bindings{};
|
||||
std::size_t uniform_buffer_alignment{};
|
||||
std::size_t shader_storage_alignment{};
|
||||
size_t uniform_buffer_alignment{};
|
||||
size_t shader_storage_alignment{};
|
||||
u32 max_vertex_attributes{};
|
||||
u32 max_varyings{};
|
||||
u32 max_compute_shared_memory_size{};
|
||||
|
@ -135,6 +139,7 @@ private:
|
|||
bool has_precise_bug{};
|
||||
bool has_fast_buffer_sub_data{};
|
||||
bool has_nv_viewport_array2{};
|
||||
bool has_debugging_tool_attached{};
|
||||
bool use_assembly_shaders{};
|
||||
bool use_asynchronous_shaders{};
|
||||
};
|
||||
|
|
|
@ -46,7 +46,7 @@ void GLInnerFence::Wait() {
|
|||
}
|
||||
|
||||
FenceManagerOpenGL::FenceManagerOpenGL(VideoCore::RasterizerInterface& rasterizer_,
|
||||
Tegra::GPU& gpu_, TextureCacheOpenGL& texture_cache_,
|
||||
Tegra::GPU& gpu_, TextureCache& texture_cache_,
|
||||
OGLBufferCache& buffer_cache_, QueryCache& query_cache_)
|
||||
: GenericFenceManager{rasterizer_, gpu_, texture_cache_, buffer_cache_, query_cache_} {}
|
||||
|
||||
|
|
|
@ -33,12 +33,12 @@ private:
|
|||
|
||||
using Fence = std::shared_ptr<GLInnerFence>;
|
||||
using GenericFenceManager =
|
||||
VideoCommon::FenceManager<Fence, TextureCacheOpenGL, OGLBufferCache, QueryCache>;
|
||||
VideoCommon::FenceManager<Fence, TextureCache, OGLBufferCache, QueryCache>;
|
||||
|
||||
class FenceManagerOpenGL final : public GenericFenceManager {
|
||||
public:
|
||||
explicit FenceManagerOpenGL(VideoCore::RasterizerInterface& rasterizer_, Tegra::GPU& gpu_,
|
||||
TextureCacheOpenGL& texture_cache_, OGLBufferCache& buffer_cache_,
|
||||
TextureCache& texture_cache_, OGLBufferCache& buffer_cache_,
|
||||
QueryCache& query_cache_);
|
||||
|
||||
protected:
|
||||
|
|
|
@ -1,85 +0,0 @@
|
|||
// Copyright 2019 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include <tuple>
|
||||
#include <unordered_map>
|
||||
#include <utility>
|
||||
|
||||
#include <glad/glad.h>
|
||||
|
||||
#include "common/common_types.h"
|
||||
#include "video_core/engines/maxwell_3d.h"
|
||||
#include "video_core/renderer_opengl/gl_framebuffer_cache.h"
|
||||
|
||||
namespace OpenGL {
|
||||
|
||||
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
|
||||
using VideoCore::Surface::SurfaceType;
|
||||
|
||||
FramebufferCacheOpenGL::FramebufferCacheOpenGL() = default;
|
||||
|
||||
FramebufferCacheOpenGL::~FramebufferCacheOpenGL() = default;
|
||||
|
||||
GLuint FramebufferCacheOpenGL::GetFramebuffer(const FramebufferCacheKey& key) {
|
||||
const auto [entry, is_cache_miss] = cache.try_emplace(key);
|
||||
auto& framebuffer{entry->second};
|
||||
if (is_cache_miss) {
|
||||
framebuffer = CreateFramebuffer(key);
|
||||
}
|
||||
return framebuffer.handle;
|
||||
}
|
||||
|
||||
OGLFramebuffer FramebufferCacheOpenGL::CreateFramebuffer(const FramebufferCacheKey& key) {
|
||||
OGLFramebuffer framebuffer;
|
||||
framebuffer.Create();
|
||||
|
||||
// TODO(Rodrigo): Use DSA here after Nvidia fixes their framebuffer DSA bugs.
|
||||
glBindFramebuffer(GL_DRAW_FRAMEBUFFER, framebuffer.handle);
|
||||
|
||||
if (key.zeta) {
|
||||
const bool stencil = key.zeta->GetSurfaceParams().type == SurfaceType::DepthStencil;
|
||||
const GLenum attach_target = stencil ? GL_DEPTH_STENCIL_ATTACHMENT : GL_DEPTH_ATTACHMENT;
|
||||
key.zeta->Attach(attach_target, GL_DRAW_FRAMEBUFFER);
|
||||
}
|
||||
|
||||
std::size_t num_buffers = 0;
|
||||
std::array<GLenum, Maxwell::NumRenderTargets> targets;
|
||||
|
||||
for (std::size_t index = 0; index < Maxwell::NumRenderTargets; ++index) {
|
||||
if (!key.colors[index]) {
|
||||
targets[index] = GL_NONE;
|
||||
continue;
|
||||
}
|
||||
const GLenum attach_target = GL_COLOR_ATTACHMENT0 + static_cast<GLenum>(index);
|
||||
key.colors[index]->Attach(attach_target, GL_DRAW_FRAMEBUFFER);
|
||||
|
||||
const u32 attachment = (key.color_attachments >> (BitsPerAttachment * index)) & 0b1111;
|
||||
targets[index] = GL_COLOR_ATTACHMENT0 + attachment;
|
||||
num_buffers = index + 1;
|
||||
}
|
||||
|
||||
if (num_buffers > 0) {
|
||||
glDrawBuffers(static_cast<GLsizei>(num_buffers), std::data(targets));
|
||||
} else {
|
||||
glDrawBuffer(GL_NONE);
|
||||
}
|
||||
|
||||
return framebuffer;
|
||||
}
|
||||
|
||||
std::size_t FramebufferCacheKey::Hash() const noexcept {
|
||||
std::size_t hash = std::hash<View>{}(zeta);
|
||||
for (const auto& color : colors) {
|
||||
hash ^= std::hash<View>{}(color);
|
||||
}
|
||||
hash ^= static_cast<std::size_t>(color_attachments) << 16;
|
||||
return hash;
|
||||
}
|
||||
|
||||
bool FramebufferCacheKey::operator==(const FramebufferCacheKey& rhs) const noexcept {
|
||||
return std::tie(colors, zeta, color_attachments) ==
|
||||
std::tie(rhs.colors, rhs.zeta, rhs.color_attachments);
|
||||
}
|
||||
|
||||
} // namespace OpenGL
|
|
@ -1,68 +0,0 @@
|
|||
// Copyright 2019 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <array>
|
||||
#include <cstddef>
|
||||
#include <unordered_map>
|
||||
|
||||
#include <glad/glad.h>
|
||||
|
||||
#include "common/common_types.h"
|
||||
#include "video_core/engines/maxwell_3d.h"
|
||||
#include "video_core/renderer_opengl/gl_resource_manager.h"
|
||||
#include "video_core/renderer_opengl/gl_texture_cache.h"
|
||||
|
||||
namespace OpenGL {
|
||||
|
||||
constexpr std::size_t BitsPerAttachment = 4;
|
||||
|
||||
struct FramebufferCacheKey {
|
||||
View zeta;
|
||||
std::array<View, Tegra::Engines::Maxwell3D::Regs::NumRenderTargets> colors;
|
||||
u32 color_attachments = 0;
|
||||
|
||||
std::size_t Hash() const noexcept;
|
||||
|
||||
bool operator==(const FramebufferCacheKey& rhs) const noexcept;
|
||||
|
||||
bool operator!=(const FramebufferCacheKey& rhs) const noexcept {
|
||||
return !operator==(rhs);
|
||||
}
|
||||
|
||||
void SetAttachment(std::size_t index, u32 attachment) {
|
||||
color_attachments |= attachment << (BitsPerAttachment * index);
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace OpenGL
|
||||
|
||||
namespace std {
|
||||
|
||||
template <>
|
||||
struct hash<OpenGL::FramebufferCacheKey> {
|
||||
std::size_t operator()(const OpenGL::FramebufferCacheKey& k) const noexcept {
|
||||
return k.Hash();
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace std
|
||||
|
||||
namespace OpenGL {
|
||||
|
||||
class FramebufferCacheOpenGL {
|
||||
public:
|
||||
FramebufferCacheOpenGL();
|
||||
~FramebufferCacheOpenGL();
|
||||
|
||||
GLuint GetFramebuffer(const FramebufferCacheKey& key);
|
||||
|
||||
private:
|
||||
OGLFramebuffer CreateFramebuffer(const FramebufferCacheKey& key);
|
||||
|
||||
std::unordered_map<FramebufferCacheKey, OGLFramebuffer> cache;
|
||||
};
|
||||
|
||||
} // namespace OpenGL
|
|
@ -25,12 +25,15 @@
|
|||
#include "video_core/engines/maxwell_3d.h"
|
||||
#include "video_core/engines/shader_type.h"
|
||||
#include "video_core/memory_manager.h"
|
||||
#include "video_core/renderer_opengl/gl_device.h"
|
||||
#include "video_core/renderer_opengl/gl_query_cache.h"
|
||||
#include "video_core/renderer_opengl/gl_rasterizer.h"
|
||||
#include "video_core/renderer_opengl/gl_shader_cache.h"
|
||||
#include "video_core/renderer_opengl/gl_texture_cache.h"
|
||||
#include "video_core/renderer_opengl/maxwell_to_gl.h"
|
||||
#include "video_core/renderer_opengl/renderer_opengl.h"
|
||||
#include "video_core/shader_cache.h"
|
||||
#include "video_core/texture_cache/texture_cache.h"
|
||||
|
||||
namespace OpenGL {
|
||||
|
||||
|
@ -55,18 +58,32 @@ MICROPROFILE_DEFINE(OpenGL_PrimitiveAssembly, "OpenGL", "Prim Asmbl", MP_RGB(255
|
|||
|
||||
namespace {
|
||||
|
||||
constexpr std::size_t NUM_CONST_BUFFERS_PER_STAGE = 18;
|
||||
constexpr std::size_t NUM_CONST_BUFFERS_BYTES_PER_STAGE =
|
||||
constexpr size_t NUM_CONST_BUFFERS_PER_STAGE = 18;
|
||||
constexpr size_t NUM_CONST_BUFFERS_BYTES_PER_STAGE =
|
||||
NUM_CONST_BUFFERS_PER_STAGE * Maxwell::MaxConstBufferSize;
|
||||
constexpr std::size_t TOTAL_CONST_BUFFER_BYTES =
|
||||
constexpr size_t TOTAL_CONST_BUFFER_BYTES =
|
||||
NUM_CONST_BUFFERS_BYTES_PER_STAGE * Maxwell::MaxShaderStage;
|
||||
|
||||
constexpr std::size_t NUM_SUPPORTED_VERTEX_ATTRIBUTES = 16;
|
||||
constexpr std::size_t NUM_SUPPORTED_VERTEX_BINDINGS = 16;
|
||||
constexpr size_t NUM_SUPPORTED_VERTEX_ATTRIBUTES = 16;
|
||||
constexpr size_t NUM_SUPPORTED_VERTEX_BINDINGS = 16;
|
||||
|
||||
constexpr size_t MAX_TEXTURES = 192;
|
||||
constexpr size_t MAX_IMAGES = 48;
|
||||
|
||||
struct TextureHandle {
|
||||
constexpr TextureHandle(u32 data, bool via_header_index) {
|
||||
const Tegra::Texture::TextureHandle handle{data};
|
||||
image = handle.tic_id;
|
||||
sampler = via_header_index ? image : handle.tsc_id.Value();
|
||||
}
|
||||
|
||||
u32 image;
|
||||
u32 sampler;
|
||||
};
|
||||
|
||||
template <typename Engine, typename Entry>
|
||||
Tegra::Texture::FullTextureInfo GetTextureInfo(const Engine& engine, const Entry& entry,
|
||||
ShaderType shader_type, std::size_t index = 0) {
|
||||
TextureHandle GetTextureInfo(const Engine& engine, bool via_header_index, const Entry& entry,
|
||||
ShaderType shader_type, size_t index = 0) {
|
||||
if constexpr (std::is_same_v<Entry, SamplerEntry>) {
|
||||
if (entry.is_separated) {
|
||||
const u32 buffer_1 = entry.buffer;
|
||||
|
@ -75,21 +92,16 @@ Tegra::Texture::FullTextureInfo GetTextureInfo(const Engine& engine, const Entry
|
|||
const u32 offset_2 = entry.secondary_offset;
|
||||
const u32 handle_1 = engine.AccessConstBuffer32(shader_type, buffer_1, offset_1);
|
||||
const u32 handle_2 = engine.AccessConstBuffer32(shader_type, buffer_2, offset_2);
|
||||
return engine.GetTextureInfo(handle_1 | handle_2);
|
||||
return TextureHandle(handle_1 | handle_2, via_header_index);
|
||||
}
|
||||
}
|
||||
if (entry.is_bindless) {
|
||||
const u32 handle = engine.AccessConstBuffer32(shader_type, entry.buffer, entry.offset);
|
||||
return engine.GetTextureInfo(handle);
|
||||
}
|
||||
|
||||
const auto& gpu_profile = engine.AccessGuestDriverProfile();
|
||||
const u32 offset = entry.offset + static_cast<u32>(index * gpu_profile.GetTextureHandlerSize());
|
||||
if constexpr (std::is_same_v<Engine, Tegra::Engines::Maxwell3D>) {
|
||||
return engine.GetStageTexture(shader_type, offset);
|
||||
} else {
|
||||
return engine.GetTexture(offset);
|
||||
const u32 raw = engine.AccessConstBuffer32(shader_type, entry.buffer, entry.offset);
|
||||
return TextureHandle(raw, via_header_index);
|
||||
}
|
||||
const u32 buffer = engine.GetBoundBuffer();
|
||||
const u64 offset = (entry.offset + index) * sizeof(u32);
|
||||
return TextureHandle(engine.AccessConstBuffer32(shader_type, buffer, offset), via_header_index);
|
||||
}
|
||||
|
||||
std::size_t GetConstBufferSize(const Tegra::Engines::ConstBufferInfo& buffer,
|
||||
|
@ -97,7 +109,6 @@ std::size_t GetConstBufferSize(const Tegra::Engines::ConstBufferInfo& buffer,
|
|||
if (!entry.IsIndirect()) {
|
||||
return entry.GetSize();
|
||||
}
|
||||
|
||||
if (buffer.size > Maxwell::MaxConstBufferSize) {
|
||||
LOG_WARNING(Render_OpenGL, "Indirect constbuffer size {} exceeds maximum {}", buffer.size,
|
||||
Maxwell::MaxConstBufferSize);
|
||||
|
@ -147,23 +158,60 @@ void UpdateBindlessSSBOs(GLenum target, const BindlessSSBO* ssbos, size_t num_ss
|
|||
reinterpret_cast<const GLuint*>(ssbos));
|
||||
}
|
||||
|
||||
ImageViewType ImageViewTypeFromEntry(const SamplerEntry& entry) {
|
||||
if (entry.is_buffer) {
|
||||
return ImageViewType::Buffer;
|
||||
}
|
||||
switch (entry.type) {
|
||||
case Tegra::Shader::TextureType::Texture1D:
|
||||
return entry.is_array ? ImageViewType::e1DArray : ImageViewType::e1D;
|
||||
case Tegra::Shader::TextureType::Texture2D:
|
||||
return entry.is_array ? ImageViewType::e2DArray : ImageViewType::e2D;
|
||||
case Tegra::Shader::TextureType::Texture3D:
|
||||
return ImageViewType::e3D;
|
||||
case Tegra::Shader::TextureType::TextureCube:
|
||||
return entry.is_array ? ImageViewType::CubeArray : ImageViewType::Cube;
|
||||
}
|
||||
UNREACHABLE();
|
||||
return ImageViewType::e2D;
|
||||
}
|
||||
|
||||
ImageViewType ImageViewTypeFromEntry(const ImageEntry& entry) {
|
||||
switch (entry.type) {
|
||||
case Tegra::Shader::ImageType::Texture1D:
|
||||
return ImageViewType::e1D;
|
||||
case Tegra::Shader::ImageType::Texture1DArray:
|
||||
return ImageViewType::e1DArray;
|
||||
case Tegra::Shader::ImageType::Texture2D:
|
||||
return ImageViewType::e2D;
|
||||
case Tegra::Shader::ImageType::Texture2DArray:
|
||||
return ImageViewType::e2DArray;
|
||||
case Tegra::Shader::ImageType::Texture3D:
|
||||
return ImageViewType::e3D;
|
||||
case Tegra::Shader::ImageType::TextureBuffer:
|
||||
return ImageViewType::Buffer;
|
||||
}
|
||||
UNREACHABLE();
|
||||
return ImageViewType::e2D;
|
||||
}
|
||||
|
||||
} // Anonymous namespace
|
||||
|
||||
RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_,
|
||||
Core::Memory::Memory& cpu_memory_, const Device& device_,
|
||||
ScreenInfo& screen_info_, ProgramManager& program_manager_,
|
||||
StateTracker& state_tracker_)
|
||||
: RasterizerAccelerated{cpu_memory_}, gpu(gpu_), maxwell3d(gpu.Maxwell3D()),
|
||||
: RasterizerAccelerated(cpu_memory_), gpu(gpu_), maxwell3d(gpu.Maxwell3D()),
|
||||
kepler_compute(gpu.KeplerCompute()), gpu_memory(gpu.MemoryManager()), device(device_),
|
||||
screen_info(screen_info_), program_manager(program_manager_), state_tracker(state_tracker_),
|
||||
texture_cache(*this, maxwell3d, gpu_memory, device, state_tracker),
|
||||
stream_buffer(device, state_tracker),
|
||||
texture_cache_runtime(device, program_manager, state_tracker),
|
||||
texture_cache(texture_cache_runtime, *this, maxwell3d, kepler_compute, gpu_memory),
|
||||
shader_cache(*this, emu_window_, gpu, maxwell3d, kepler_compute, gpu_memory, device),
|
||||
query_cache(*this, maxwell3d, gpu_memory),
|
||||
buffer_cache(*this, gpu_memory, cpu_memory_, device, STREAM_BUFFER_SIZE),
|
||||
buffer_cache(*this, gpu_memory, cpu_memory_, device, stream_buffer, state_tracker),
|
||||
fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache),
|
||||
async_shaders(emu_window_) {
|
||||
CheckExtensions();
|
||||
|
||||
unified_uniform_buffer.Create();
|
||||
glNamedBufferStorage(unified_uniform_buffer.handle, TOTAL_CONST_BUFFER_BYTES, nullptr, 0);
|
||||
|
||||
|
@ -174,7 +222,6 @@ RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window_, Tegra
|
|||
nullptr, 0);
|
||||
}
|
||||
}
|
||||
|
||||
if (device.UseAsynchronousShaders()) {
|
||||
async_shaders.AllocateWorkers();
|
||||
}
|
||||
|
@ -186,14 +233,6 @@ RasterizerOpenGL::~RasterizerOpenGL() {
|
|||
}
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::CheckExtensions() {
|
||||
if (!GLAD_GL_ARB_texture_filter_anisotropic && !GLAD_GL_EXT_texture_filter_anisotropic) {
|
||||
LOG_WARNING(
|
||||
Render_OpenGL,
|
||||
"Anisotropic filter is not supported! This can cause graphical issues in some games.");
|
||||
}
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::SetupVertexFormat() {
|
||||
auto& flags = maxwell3d.dirty.flags;
|
||||
if (!flags[Dirty::VertexFormats]) {
|
||||
|
@ -316,10 +355,16 @@ GLintptr RasterizerOpenGL::SetupIndexBuffer() {
|
|||
return info.offset;
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
|
||||
void RasterizerOpenGL::SetupShaders() {
|
||||
MICROPROFILE_SCOPE(OpenGL_Shader);
|
||||
u32 clip_distances = 0;
|
||||
|
||||
std::array<Shader*, Maxwell::MaxShaderStage> shaders{};
|
||||
image_view_indices.clear();
|
||||
sampler_handles.clear();
|
||||
|
||||
texture_cache.SynchronizeGraphicsDescriptors();
|
||||
|
||||
for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) {
|
||||
const auto& shader_config = maxwell3d.regs.shader_config[index];
|
||||
const auto program{static_cast<Maxwell::ShaderProgram>(index)};
|
||||
|
@ -338,7 +383,6 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
|
|||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
// Currently this stages are not supported in the OpenGL backend.
|
||||
// TODO(Blinkhawk): Port tesselation shaders from Vulkan to OpenGL
|
||||
if (program == Maxwell::ShaderProgram::TesselationControl ||
|
||||
|
@ -347,7 +391,6 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
|
|||
}
|
||||
|
||||
Shader* const shader = shader_cache.GetStageProgram(program, async_shaders);
|
||||
|
||||
const GLuint program_handle = shader->IsBuilt() ? shader->GetHandle() : 0;
|
||||
switch (program) {
|
||||
case Maxwell::ShaderProgram::VertexA:
|
||||
|
@ -363,14 +406,17 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
|
|||
default:
|
||||
UNIMPLEMENTED_MSG("Unimplemented shader index={}, enable={}, offset=0x{:08X}", index,
|
||||
shader_config.enable.Value(), shader_config.offset);
|
||||
break;
|
||||
}
|
||||
|
||||
// Stage indices are 0 - 5
|
||||
const std::size_t stage = index == 0 ? 0 : index - 1;
|
||||
const size_t stage = index == 0 ? 0 : index - 1;
|
||||
shaders[stage] = shader;
|
||||
|
||||
SetupDrawConstBuffers(stage, shader);
|
||||
SetupDrawGlobalMemory(stage, shader);
|
||||
SetupDrawTextures(stage, shader);
|
||||
SetupDrawImages(stage, shader);
|
||||
SetupDrawTextures(shader, stage);
|
||||
SetupDrawImages(shader, stage);
|
||||
|
||||
// Workaround for Intel drivers.
|
||||
// When a clip distance is enabled but not set in the shader it crops parts of the screen
|
||||
|
@ -384,9 +430,23 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
|
|||
++index;
|
||||
}
|
||||
}
|
||||
|
||||
SyncClipEnabled(clip_distances);
|
||||
maxwell3d.dirty.flags[Dirty::Shaders] = false;
|
||||
|
||||
const std::span indices_span(image_view_indices.data(), image_view_indices.size());
|
||||
texture_cache.FillGraphicsImageViews(indices_span, image_view_ids);
|
||||
|
||||
size_t image_view_index = 0;
|
||||
size_t texture_index = 0;
|
||||
size_t image_index = 0;
|
||||
for (size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) {
|
||||
const Shader* const shader = shaders[stage];
|
||||
if (shader) {
|
||||
const auto base = device.GetBaseBindings(stage);
|
||||
BindTextures(shader->GetEntries(), base.sampler, base.image, image_view_index,
|
||||
texture_index, image_index);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
std::size_t RasterizerOpenGL::CalculateVertexArraysSize() const {
|
||||
|
@ -417,98 +477,6 @@ void RasterizerOpenGL::LoadDiskResources(u64 title_id, const std::atomic_bool& s
|
|||
shader_cache.LoadDiskCache(title_id, stop_loading, callback);
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::ConfigureFramebuffers() {
|
||||
MICROPROFILE_SCOPE(OpenGL_Framebuffer);
|
||||
if (!maxwell3d.dirty.flags[VideoCommon::Dirty::RenderTargets]) {
|
||||
return;
|
||||
}
|
||||
maxwell3d.dirty.flags[VideoCommon::Dirty::RenderTargets] = false;
|
||||
|
||||
texture_cache.GuardRenderTargets(true);
|
||||
|
||||
View depth_surface = texture_cache.GetDepthBufferSurface(true);
|
||||
|
||||
const auto& regs = maxwell3d.regs;
|
||||
UNIMPLEMENTED_IF(regs.rt_separate_frag_data == 0);
|
||||
|
||||
// Bind the framebuffer surfaces
|
||||
FramebufferCacheKey key;
|
||||
const auto colors_count = static_cast<std::size_t>(regs.rt_control.count);
|
||||
for (std::size_t index = 0; index < colors_count; ++index) {
|
||||
View color_surface{texture_cache.GetColorBufferSurface(index, true)};
|
||||
if (!color_surface) {
|
||||
continue;
|
||||
}
|
||||
// Assume that a surface will be written to if it is used as a framebuffer, even
|
||||
// if the shader doesn't actually write to it.
|
||||
texture_cache.MarkColorBufferInUse(index);
|
||||
|
||||
key.SetAttachment(index, regs.rt_control.GetMap(index));
|
||||
key.colors[index] = std::move(color_surface);
|
||||
}
|
||||
|
||||
if (depth_surface) {
|
||||
// Assume that a surface will be written to if it is used as a framebuffer, even if
|
||||
// the shader doesn't actually write to it.
|
||||
texture_cache.MarkDepthBufferInUse();
|
||||
key.zeta = std::move(depth_surface);
|
||||
}
|
||||
|
||||
texture_cache.GuardRenderTargets(false);
|
||||
|
||||
glBindFramebuffer(GL_DRAW_FRAMEBUFFER, framebuffer_cache.GetFramebuffer(key));
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::ConfigureClearFramebuffer(bool using_color, bool using_depth_stencil) {
|
||||
const auto& regs = maxwell3d.regs;
|
||||
|
||||
texture_cache.GuardRenderTargets(true);
|
||||
View color_surface;
|
||||
|
||||
if (using_color) {
|
||||
// Determine if we have to preserve the contents.
|
||||
// First we have to make sure all clear masks are enabled.
|
||||
bool preserve_contents = !regs.clear_buffers.R || !regs.clear_buffers.G ||
|
||||
!regs.clear_buffers.B || !regs.clear_buffers.A;
|
||||
const std::size_t index = regs.clear_buffers.RT;
|
||||
if (regs.clear_flags.scissor) {
|
||||
// Then we have to confirm scissor testing clears the whole image.
|
||||
const auto& scissor = regs.scissor_test[0];
|
||||
preserve_contents |= scissor.min_x > 0;
|
||||
preserve_contents |= scissor.min_y > 0;
|
||||
preserve_contents |= scissor.max_x < regs.rt[index].width;
|
||||
preserve_contents |= scissor.max_y < regs.rt[index].height;
|
||||
}
|
||||
|
||||
color_surface = texture_cache.GetColorBufferSurface(index, preserve_contents);
|
||||
texture_cache.MarkColorBufferInUse(index);
|
||||
}
|
||||
|
||||
View depth_surface;
|
||||
if (using_depth_stencil) {
|
||||
bool preserve_contents = false;
|
||||
if (regs.clear_flags.scissor) {
|
||||
// For depth stencil clears we only have to confirm scissor test covers the whole image.
|
||||
const auto& scissor = regs.scissor_test[0];
|
||||
preserve_contents |= scissor.min_x > 0;
|
||||
preserve_contents |= scissor.min_y > 0;
|
||||
preserve_contents |= scissor.max_x < regs.zeta_width;
|
||||
preserve_contents |= scissor.max_y < regs.zeta_height;
|
||||
}
|
||||
|
||||
depth_surface = texture_cache.GetDepthBufferSurface(preserve_contents);
|
||||
texture_cache.MarkDepthBufferInUse();
|
||||
}
|
||||
texture_cache.GuardRenderTargets(false);
|
||||
|
||||
FramebufferCacheKey key;
|
||||
key.colors[0] = std::move(color_surface);
|
||||
key.zeta = std::move(depth_surface);
|
||||
|
||||
state_tracker.NotifyFramebuffer();
|
||||
glBindFramebuffer(GL_DRAW_FRAMEBUFFER, framebuffer_cache.GetFramebuffer(key));
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::Clear() {
|
||||
if (!maxwell3d.ShouldExecute()) {
|
||||
return;
|
||||
|
@ -523,8 +491,9 @@ void RasterizerOpenGL::Clear() {
|
|||
regs.clear_buffers.A) {
|
||||
use_color = true;
|
||||
|
||||
state_tracker.NotifyColorMask0();
|
||||
glColorMaski(0, regs.clear_buffers.R != 0, regs.clear_buffers.G != 0,
|
||||
const GLuint index = regs.clear_buffers.RT;
|
||||
state_tracker.NotifyColorMask(index);
|
||||
glColorMaski(index, regs.clear_buffers.R != 0, regs.clear_buffers.G != 0,
|
||||
regs.clear_buffers.B != 0, regs.clear_buffers.A != 0);
|
||||
|
||||
// TODO(Rodrigo): Determine if clamping is used on clears
|
||||
|
@ -557,15 +526,17 @@ void RasterizerOpenGL::Clear() {
|
|||
state_tracker.NotifyScissor0();
|
||||
glDisablei(GL_SCISSOR_TEST, 0);
|
||||
}
|
||||
|
||||
UNIMPLEMENTED_IF(regs.clear_flags.viewport);
|
||||
|
||||
ConfigureClearFramebuffer(use_color, use_depth || use_stencil);
|
||||
|
||||
if (use_color) {
|
||||
glClearBufferfv(GL_COLOR, 0, regs.clear_color);
|
||||
{
|
||||
auto lock = texture_cache.AcquireLock();
|
||||
texture_cache.UpdateRenderTargets(true);
|
||||
state_tracker.BindFramebuffer(texture_cache.GetFramebuffer()->Handle());
|
||||
}
|
||||
|
||||
if (use_color) {
|
||||
glClearBufferfv(GL_COLOR, regs.clear_buffers.RT, regs.clear_color);
|
||||
}
|
||||
if (use_depth && use_stencil) {
|
||||
glClearBufferfi(GL_DEPTH_STENCIL, 0, regs.clear_depth, regs.clear_stencil);
|
||||
} else if (use_depth) {
|
||||
|
@ -622,16 +593,7 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
|
|||
(Maxwell::MaxConstBufferSize + device.GetUniformBufferAlignment());
|
||||
|
||||
// Prepare the vertex array.
|
||||
const bool invalidated = buffer_cache.Map(buffer_size);
|
||||
|
||||
if (invalidated) {
|
||||
// When the stream buffer has been invalidated, we have to consider vertex buffers as dirty
|
||||
auto& dirty = maxwell3d.dirty.flags;
|
||||
dirty[Dirty::VertexBuffers] = true;
|
||||
for (int index = Dirty::VertexBuffer0; index <= Dirty::VertexBuffer31; ++index) {
|
||||
dirty[index] = true;
|
||||
}
|
||||
}
|
||||
buffer_cache.Map(buffer_size);
|
||||
|
||||
// Prepare vertex array format.
|
||||
SetupVertexFormat();
|
||||
|
@ -655,22 +617,16 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
|
|||
}
|
||||
|
||||
// Setup shaders and their used resources.
|
||||
texture_cache.GuardSamplers(true);
|
||||
const GLenum primitive_mode = MaxwellToGL::PrimitiveTopology(maxwell3d.regs.draw.topology);
|
||||
SetupShaders(primitive_mode);
|
||||
texture_cache.GuardSamplers(false);
|
||||
|
||||
ConfigureFramebuffers();
|
||||
auto lock = texture_cache.AcquireLock();
|
||||
SetupShaders();
|
||||
|
||||
// Signal the buffer cache that we are not going to upload more things.
|
||||
buffer_cache.Unmap();
|
||||
|
||||
texture_cache.UpdateRenderTargets(false);
|
||||
state_tracker.BindFramebuffer(texture_cache.GetFramebuffer()->Handle());
|
||||
program_manager.BindGraphicsPipeline();
|
||||
|
||||
if (texture_cache.TextureBarrier()) {
|
||||
glTextureBarrier();
|
||||
}
|
||||
|
||||
const GLenum primitive_mode = MaxwellToGL::PrimitiveTopology(maxwell3d.regs.draw.topology);
|
||||
BeginTransformFeedback(primitive_mode);
|
||||
|
||||
const GLuint base_instance = static_cast<GLuint>(maxwell3d.regs.vb_base_instance);
|
||||
|
@ -722,15 +678,13 @@ void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) {
|
|||
buffer_cache.Acquire();
|
||||
current_cbuf = 0;
|
||||
|
||||
auto kernel = shader_cache.GetComputeKernel(code_addr);
|
||||
program_manager.BindCompute(kernel->GetHandle());
|
||||
Shader* const kernel = shader_cache.GetComputeKernel(code_addr);
|
||||
|
||||
SetupComputeTextures(kernel);
|
||||
SetupComputeImages(kernel);
|
||||
auto lock = texture_cache.AcquireLock();
|
||||
BindComputeTextures(kernel);
|
||||
|
||||
const std::size_t buffer_size =
|
||||
Tegra::Engines::KeplerCompute::NumConstBuffers *
|
||||
(Maxwell::MaxConstBufferSize + device.GetUniformBufferAlignment());
|
||||
const size_t buffer_size = Tegra::Engines::KeplerCompute::NumConstBuffers *
|
||||
(Maxwell::MaxConstBufferSize + device.GetUniformBufferAlignment());
|
||||
buffer_cache.Map(buffer_size);
|
||||
|
||||
SetupComputeConstBuffers(kernel);
|
||||
|
@ -739,7 +693,6 @@ void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) {
|
|||
buffer_cache.Unmap();
|
||||
|
||||
const auto& launch_desc = kepler_compute.launch_description;
|
||||
program_manager.BindCompute(kernel->GetHandle());
|
||||
glDispatchCompute(launch_desc.grid_dim_x, launch_desc.grid_dim_y, launch_desc.grid_dim_z);
|
||||
++num_queued_commands;
|
||||
}
|
||||
|
@ -760,7 +713,10 @@ void RasterizerOpenGL::FlushRegion(VAddr addr, u64 size) {
|
|||
if (addr == 0 || size == 0) {
|
||||
return;
|
||||
}
|
||||
texture_cache.FlushRegion(addr, size);
|
||||
{
|
||||
auto lock = texture_cache.AcquireLock();
|
||||
texture_cache.DownloadMemory(addr, size);
|
||||
}
|
||||
buffer_cache.FlushRegion(addr, size);
|
||||
query_cache.FlushRegion(addr, size);
|
||||
}
|
||||
|
@ -769,7 +725,8 @@ bool RasterizerOpenGL::MustFlushRegion(VAddr addr, u64 size) {
|
|||
if (!Settings::IsGPULevelHigh()) {
|
||||
return buffer_cache.MustFlushRegion(addr, size);
|
||||
}
|
||||
return texture_cache.MustFlushRegion(addr, size) || buffer_cache.MustFlushRegion(addr, size);
|
||||
return texture_cache.IsRegionGpuModified(addr, size) ||
|
||||
buffer_cache.MustFlushRegion(addr, size);
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::InvalidateRegion(VAddr addr, u64 size) {
|
||||
|
@ -777,7 +734,10 @@ void RasterizerOpenGL::InvalidateRegion(VAddr addr, u64 size) {
|
|||
if (addr == 0 || size == 0) {
|
||||
return;
|
||||
}
|
||||
texture_cache.InvalidateRegion(addr, size);
|
||||
{
|
||||
auto lock = texture_cache.AcquireLock();
|
||||
texture_cache.WriteMemory(addr, size);
|
||||
}
|
||||
shader_cache.InvalidateRegion(addr, size);
|
||||
buffer_cache.InvalidateRegion(addr, size);
|
||||
query_cache.InvalidateRegion(addr, size);
|
||||
|
@ -788,18 +748,29 @@ void RasterizerOpenGL::OnCPUWrite(VAddr addr, u64 size) {
|
|||
if (addr == 0 || size == 0) {
|
||||
return;
|
||||
}
|
||||
texture_cache.OnCPUWrite(addr, size);
|
||||
{
|
||||
auto lock = texture_cache.AcquireLock();
|
||||
texture_cache.WriteMemory(addr, size);
|
||||
}
|
||||
shader_cache.OnCPUWrite(addr, size);
|
||||
buffer_cache.OnCPUWrite(addr, size);
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::SyncGuestHost() {
|
||||
MICROPROFILE_SCOPE(OpenGL_CacheManagement);
|
||||
texture_cache.SyncGuestHost();
|
||||
buffer_cache.SyncGuestHost();
|
||||
shader_cache.SyncGuestHost();
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::UnmapMemory(VAddr addr, u64 size) {
|
||||
{
|
||||
auto lock = texture_cache.AcquireLock();
|
||||
texture_cache.UnmapMemory(addr, size);
|
||||
}
|
||||
buffer_cache.OnCPUWrite(addr, size);
|
||||
shader_cache.OnCPUWrite(addr, size);
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::SignalSemaphore(GPUVAddr addr, u32 value) {
|
||||
if (!gpu.IsAsync()) {
|
||||
gpu_memory.Write<u32>(addr, value);
|
||||
|
@ -841,6 +812,14 @@ void RasterizerOpenGL::WaitForIdle() {
|
|||
GL_SHADER_STORAGE_BARRIER_BIT | GL_QUERY_BUFFER_BARRIER_BIT);
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::FragmentBarrier() {
|
||||
glMemoryBarrier(GL_FRAMEBUFFER_BARRIER_BIT);
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::TiledCacheBarrier() {
|
||||
glTextureBarrier();
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::FlushCommands() {
|
||||
// Only flush when we have commands queued to OpenGL.
|
||||
if (num_queued_commands == 0) {
|
||||
|
@ -854,47 +833,97 @@ void RasterizerOpenGL::TickFrame() {
|
|||
// Ticking a frame means that buffers will be swapped, calling glFlush implicitly.
|
||||
num_queued_commands = 0;
|
||||
|
||||
fence_manager.TickFrame();
|
||||
buffer_cache.TickFrame();
|
||||
{
|
||||
auto lock = texture_cache.AcquireLock();
|
||||
texture_cache.TickFrame();
|
||||
}
|
||||
}
|
||||
|
||||
bool RasterizerOpenGL::AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src,
|
||||
const Tegra::Engines::Fermi2D::Regs::Surface& dst,
|
||||
bool RasterizerOpenGL::AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Surface& src,
|
||||
const Tegra::Engines::Fermi2D::Surface& dst,
|
||||
const Tegra::Engines::Fermi2D::Config& copy_config) {
|
||||
MICROPROFILE_SCOPE(OpenGL_Blits);
|
||||
texture_cache.DoFermiCopy(src, dst, copy_config);
|
||||
auto lock = texture_cache.AcquireLock();
|
||||
texture_cache.BlitImage(dst, src, copy_config);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config,
|
||||
VAddr framebuffer_addr, u32 pixel_stride) {
|
||||
if (!framebuffer_addr) {
|
||||
return {};
|
||||
if (framebuffer_addr == 0) {
|
||||
return false;
|
||||
}
|
||||
|
||||
MICROPROFILE_SCOPE(OpenGL_CacheManagement);
|
||||
|
||||
const auto surface{texture_cache.TryFindFramebufferSurface(framebuffer_addr)};
|
||||
if (!surface) {
|
||||
return {};
|
||||
auto lock = texture_cache.AcquireLock();
|
||||
ImageView* const image_view{texture_cache.TryFindFramebufferImageView(framebuffer_addr)};
|
||||
if (!image_view) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Verify that the cached surface is the same size and format as the requested framebuffer
|
||||
const auto& params{surface->GetSurfaceParams()};
|
||||
const auto& pixel_format{
|
||||
VideoCore::Surface::PixelFormatFromGPUPixelFormat(config.pixel_format)};
|
||||
ASSERT_MSG(params.width == config.width, "Framebuffer width is different");
|
||||
ASSERT_MSG(params.height == config.height, "Framebuffer height is different");
|
||||
|
||||
if (params.pixel_format != pixel_format) {
|
||||
LOG_DEBUG(Render_OpenGL, "Framebuffer pixel_format is different");
|
||||
}
|
||||
|
||||
screen_info.display_texture = surface->GetTexture();
|
||||
screen_info.display_srgb = surface->GetSurfaceParams().srgb_conversion;
|
||||
// ASSERT_MSG(image_view->size.width == config.width, "Framebuffer width is different");
|
||||
// ASSERT_MSG(image_view->size.height == config.height, "Framebuffer height is different");
|
||||
|
||||
screen_info.display_texture = image_view->Handle(ImageViewType::e2D);
|
||||
screen_info.display_srgb = VideoCore::Surface::IsPixelFormatSRGB(image_view->format);
|
||||
return true;
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::BindComputeTextures(Shader* kernel) {
|
||||
image_view_indices.clear();
|
||||
sampler_handles.clear();
|
||||
|
||||
texture_cache.SynchronizeComputeDescriptors();
|
||||
|
||||
SetupComputeTextures(kernel);
|
||||
SetupComputeImages(kernel);
|
||||
|
||||
const std::span indices_span(image_view_indices.data(), image_view_indices.size());
|
||||
texture_cache.FillComputeImageViews(indices_span, image_view_ids);
|
||||
|
||||
program_manager.BindCompute(kernel->GetHandle());
|
||||
size_t image_view_index = 0;
|
||||
size_t texture_index = 0;
|
||||
size_t image_index = 0;
|
||||
BindTextures(kernel->GetEntries(), 0, 0, image_view_index, texture_index, image_index);
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::BindTextures(const ShaderEntries& entries, GLuint base_texture,
|
||||
GLuint base_image, size_t& image_view_index,
|
||||
size_t& texture_index, size_t& image_index) {
|
||||
const GLuint* const samplers = sampler_handles.data() + texture_index;
|
||||
const GLuint* const textures = texture_handles.data() + texture_index;
|
||||
const GLuint* const images = image_handles.data() + image_index;
|
||||
|
||||
const size_t num_samplers = entries.samplers.size();
|
||||
for (const auto& sampler : entries.samplers) {
|
||||
for (size_t i = 0; i < sampler.size; ++i) {
|
||||
const ImageViewId image_view_id = image_view_ids[image_view_index++];
|
||||
const ImageView& image_view = texture_cache.GetImageView(image_view_id);
|
||||
const GLuint handle = image_view.Handle(ImageViewTypeFromEntry(sampler));
|
||||
texture_handles[texture_index++] = handle;
|
||||
}
|
||||
}
|
||||
const size_t num_images = entries.images.size();
|
||||
for (size_t unit = 0; unit < num_images; ++unit) {
|
||||
// TODO: Mark as modified
|
||||
const ImageViewId image_view_id = image_view_ids[image_view_index++];
|
||||
const ImageView& image_view = texture_cache.GetImageView(image_view_id);
|
||||
const GLuint handle = image_view.Handle(ImageViewTypeFromEntry(entries.images[unit]));
|
||||
image_handles[image_index] = handle;
|
||||
++image_index;
|
||||
}
|
||||
if (num_samplers > 0) {
|
||||
glBindSamplers(base_texture, static_cast<GLsizei>(num_samplers), samplers);
|
||||
glBindTextures(base_texture, static_cast<GLsizei>(num_samplers), textures);
|
||||
}
|
||||
if (num_images > 0) {
|
||||
glBindImageTextures(base_image, static_cast<GLsizei>(num_images), images);
|
||||
}
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::SetupDrawConstBuffers(std::size_t stage_index, Shader* shader) {
|
||||
static constexpr std::array PARAMETER_LUT{
|
||||
GL_VERTEX_PROGRAM_PARAMETER_BUFFER_NV, GL_TESS_CONTROL_PROGRAM_PARAMETER_BUFFER_NV,
|
||||
|
@ -999,7 +1028,6 @@ void RasterizerOpenGL::SetupDrawGlobalMemory(std::size_t stage_index, Shader* sh
|
|||
GL_VERTEX_PROGRAM_NV, GL_TESS_CONTROL_PROGRAM_NV, GL_TESS_EVALUATION_PROGRAM_NV,
|
||||
GL_GEOMETRY_PROGRAM_NV, GL_FRAGMENT_PROGRAM_NV,
|
||||
};
|
||||
|
||||
const auto& cbufs{maxwell3d.state.shader_stages[stage_index]};
|
||||
const auto& entries{shader->GetEntries().global_memory_entries};
|
||||
|
||||
|
@ -1056,77 +1084,53 @@ void RasterizerOpenGL::SetupGlobalMemory(u32 binding, const GlobalMemoryEntry& e
|
|||
}
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::SetupDrawTextures(std::size_t stage_index, Shader* shader) {
|
||||
MICROPROFILE_SCOPE(OpenGL_Texture);
|
||||
u32 binding = device.GetBaseBindings(stage_index).sampler;
|
||||
void RasterizerOpenGL::SetupDrawTextures(const Shader* shader, size_t stage_index) {
|
||||
const bool via_header_index =
|
||||
maxwell3d.regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex;
|
||||
for (const auto& entry : shader->GetEntries().samplers) {
|
||||
const auto shader_type = static_cast<ShaderType>(stage_index);
|
||||
for (std::size_t i = 0; i < entry.size; ++i) {
|
||||
const auto texture = GetTextureInfo(maxwell3d, entry, shader_type, i);
|
||||
SetupTexture(binding++, texture, entry);
|
||||
for (size_t index = 0; index < entry.size; ++index) {
|
||||
const auto handle =
|
||||
GetTextureInfo(maxwell3d, via_header_index, entry, shader_type, index);
|
||||
const Sampler* const sampler = texture_cache.GetGraphicsSampler(handle.sampler);
|
||||
sampler_handles.push_back(sampler->Handle());
|
||||
image_view_indices.push_back(handle.image);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::SetupComputeTextures(Shader* kernel) {
|
||||
MICROPROFILE_SCOPE(OpenGL_Texture);
|
||||
u32 binding = 0;
|
||||
void RasterizerOpenGL::SetupComputeTextures(const Shader* kernel) {
|
||||
const bool via_header_index = kepler_compute.launch_description.linked_tsc;
|
||||
for (const auto& entry : kernel->GetEntries().samplers) {
|
||||
for (std::size_t i = 0; i < entry.size; ++i) {
|
||||
const auto texture = GetTextureInfo(kepler_compute, entry, ShaderType::Compute, i);
|
||||
SetupTexture(binding++, texture, entry);
|
||||
for (size_t i = 0; i < entry.size; ++i) {
|
||||
const auto handle =
|
||||
GetTextureInfo(kepler_compute, via_header_index, entry, ShaderType::Compute, i);
|
||||
const Sampler* const sampler = texture_cache.GetComputeSampler(handle.sampler);
|
||||
sampler_handles.push_back(sampler->Handle());
|
||||
image_view_indices.push_back(handle.image);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::SetupTexture(u32 binding, const Tegra::Texture::FullTextureInfo& texture,
|
||||
const SamplerEntry& entry) {
|
||||
const auto view = texture_cache.GetTextureSurface(texture.tic, entry);
|
||||
if (!view) {
|
||||
// Can occur when texture addr is null or its memory is unmapped/invalid
|
||||
glBindSampler(binding, 0);
|
||||
glBindTextureUnit(binding, 0);
|
||||
return;
|
||||
}
|
||||
const GLuint handle = view->GetTexture(texture.tic.x_source, texture.tic.y_source,
|
||||
texture.tic.z_source, texture.tic.w_source);
|
||||
glBindTextureUnit(binding, handle);
|
||||
if (!view->GetSurfaceParams().IsBuffer()) {
|
||||
glBindSampler(binding, sampler_cache.GetSampler(texture.tsc));
|
||||
}
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::SetupDrawImages(std::size_t stage_index, Shader* shader) {
|
||||
u32 binding = device.GetBaseBindings(stage_index).image;
|
||||
void RasterizerOpenGL::SetupDrawImages(const Shader* shader, size_t stage_index) {
|
||||
const bool via_header_index =
|
||||
maxwell3d.regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex;
|
||||
for (const auto& entry : shader->GetEntries().images) {
|
||||
const auto shader_type = static_cast<ShaderType>(stage_index);
|
||||
const auto tic = GetTextureInfo(maxwell3d, entry, shader_type).tic;
|
||||
SetupImage(binding++, tic, entry);
|
||||
const auto handle = GetTextureInfo(maxwell3d, via_header_index, entry, shader_type);
|
||||
image_view_indices.push_back(handle.image);
|
||||
}
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::SetupComputeImages(Shader* shader) {
|
||||
u32 binding = 0;
|
||||
void RasterizerOpenGL::SetupComputeImages(const Shader* shader) {
|
||||
const bool via_header_index = kepler_compute.launch_description.linked_tsc;
|
||||
for (const auto& entry : shader->GetEntries().images) {
|
||||
const auto tic = GetTextureInfo(kepler_compute, entry, ShaderType::Compute).tic;
|
||||
SetupImage(binding++, tic, entry);
|
||||
const auto handle =
|
||||
GetTextureInfo(kepler_compute, via_header_index, entry, ShaderType::Compute);
|
||||
image_view_indices.push_back(handle.image);
|
||||
}
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::SetupImage(u32 binding, const Tegra::Texture::TICEntry& tic,
|
||||
const ImageEntry& entry) {
|
||||
const auto view = texture_cache.GetImageSurface(tic, entry);
|
||||
if (!view) {
|
||||
glBindImageTexture(binding, 0, 0, GL_FALSE, 0, GL_READ_ONLY, GL_R8);
|
||||
return;
|
||||
}
|
||||
if (entry.is_written) {
|
||||
view->MarkAsModified(texture_cache.Tick());
|
||||
}
|
||||
const GLuint handle = view->GetTexture(tic.x_source, tic.y_source, tic.z_source, tic.w_source);
|
||||
glBindImageTexture(binding, handle, 0, GL_TRUE, 0, GL_READ_WRITE, view->GetFormat());
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::SyncViewport() {
|
||||
auto& flags = maxwell3d.dirty.flags;
|
||||
const auto& regs = maxwell3d.regs;
|
||||
|
@ -1526,17 +1530,9 @@ void RasterizerOpenGL::SyncPointState() {
|
|||
flags[Dirty::PointSize] = false;
|
||||
|
||||
oglEnable(GL_POINT_SPRITE, maxwell3d.regs.point_sprite_enable);
|
||||
oglEnable(GL_PROGRAM_POINT_SIZE, maxwell3d.regs.vp_point_size.enable);
|
||||
|
||||
if (maxwell3d.regs.vp_point_size.enable) {
|
||||
// By definition of GL_POINT_SIZE, it only matters if GL_PROGRAM_POINT_SIZE is disabled.
|
||||
glEnable(GL_PROGRAM_POINT_SIZE);
|
||||
return;
|
||||
}
|
||||
|
||||
// Limit the point size to 1 since nouveau sometimes sets a point size of 0 (and that's invalid
|
||||
// in OpenGL).
|
||||
glPointSize(std::max(1.0f, maxwell3d.regs.point_size));
|
||||
glDisable(GL_PROGRAM_POINT_SIZE);
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::SyncLineState() {
|
||||
|
|
|
@ -7,12 +7,13 @@
|
|||
#include <array>
|
||||
#include <atomic>
|
||||
#include <cstddef>
|
||||
#include <map>
|
||||
#include <memory>
|
||||
#include <optional>
|
||||
#include <tuple>
|
||||
#include <utility>
|
||||
|
||||
#include <boost/container/static_vector.hpp>
|
||||
|
||||
#include <glad/glad.h>
|
||||
|
||||
#include "common/common_types.h"
|
||||
|
@ -23,16 +24,14 @@
|
|||
#include "video_core/renderer_opengl/gl_buffer_cache.h"
|
||||
#include "video_core/renderer_opengl/gl_device.h"
|
||||
#include "video_core/renderer_opengl/gl_fence_manager.h"
|
||||
#include "video_core/renderer_opengl/gl_framebuffer_cache.h"
|
||||
#include "video_core/renderer_opengl/gl_query_cache.h"
|
||||
#include "video_core/renderer_opengl/gl_resource_manager.h"
|
||||
#include "video_core/renderer_opengl/gl_sampler_cache.h"
|
||||
#include "video_core/renderer_opengl/gl_shader_cache.h"
|
||||
#include "video_core/renderer_opengl/gl_shader_decompiler.h"
|
||||
#include "video_core/renderer_opengl/gl_shader_manager.h"
|
||||
#include "video_core/renderer_opengl/gl_state_tracker.h"
|
||||
#include "video_core/renderer_opengl/gl_stream_buffer.h"
|
||||
#include "video_core/renderer_opengl/gl_texture_cache.h"
|
||||
#include "video_core/renderer_opengl/utils.h"
|
||||
#include "video_core/shader/async_shaders.h"
|
||||
#include "video_core/textures/texture.h"
|
||||
|
||||
|
@ -51,7 +50,7 @@ class MemoryManager;
|
|||
namespace OpenGL {
|
||||
|
||||
struct ScreenInfo;
|
||||
struct DrawParameters;
|
||||
struct ShaderEntries;
|
||||
|
||||
struct BindlessSSBO {
|
||||
GLuint64EXT address;
|
||||
|
@ -79,15 +78,18 @@ public:
|
|||
void InvalidateRegion(VAddr addr, u64 size) override;
|
||||
void OnCPUWrite(VAddr addr, u64 size) override;
|
||||
void SyncGuestHost() override;
|
||||
void UnmapMemory(VAddr addr, u64 size) override;
|
||||
void SignalSemaphore(GPUVAddr addr, u32 value) override;
|
||||
void SignalSyncPoint(u32 value) override;
|
||||
void ReleaseFences() override;
|
||||
void FlushAndInvalidateRegion(VAddr addr, u64 size) override;
|
||||
void WaitForIdle() override;
|
||||
void FragmentBarrier() override;
|
||||
void TiledCacheBarrier() override;
|
||||
void FlushCommands() override;
|
||||
void TickFrame() override;
|
||||
bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src,
|
||||
const Tegra::Engines::Fermi2D::Regs::Surface& dst,
|
||||
bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Surface& src,
|
||||
const Tegra::Engines::Fermi2D::Surface& dst,
|
||||
const Tegra::Engines::Fermi2D::Config& copy_config) override;
|
||||
bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr,
|
||||
u32 pixel_stride) override;
|
||||
|
@ -108,11 +110,14 @@ public:
|
|||
}
|
||||
|
||||
private:
|
||||
/// Configures the color and depth framebuffer states.
|
||||
void ConfigureFramebuffers();
|
||||
static constexpr size_t MAX_TEXTURES = 192;
|
||||
static constexpr size_t MAX_IMAGES = 48;
|
||||
static constexpr size_t MAX_IMAGE_VIEWS = MAX_TEXTURES + MAX_IMAGES;
|
||||
|
||||
/// Configures the color and depth framebuffer for clearing.
|
||||
void ConfigureClearFramebuffer(bool using_color, bool using_depth_stencil);
|
||||
void BindComputeTextures(Shader* kernel);
|
||||
|
||||
void BindTextures(const ShaderEntries& entries, GLuint base_texture, GLuint base_image,
|
||||
size_t& image_view_index, size_t& texture_index, size_t& image_index);
|
||||
|
||||
/// Configures the current constbuffers to use for the draw command.
|
||||
void SetupDrawConstBuffers(std::size_t stage_index, Shader* shader);
|
||||
|
@ -136,23 +141,16 @@ private:
|
|||
size_t size, BindlessSSBO* ssbo);
|
||||
|
||||
/// Configures the current textures to use for the draw command.
|
||||
void SetupDrawTextures(std::size_t stage_index, Shader* shader);
|
||||
void SetupDrawTextures(const Shader* shader, size_t stage_index);
|
||||
|
||||
/// Configures the textures used in a compute shader.
|
||||
void SetupComputeTextures(Shader* kernel);
|
||||
|
||||
/// Configures a texture.
|
||||
void SetupTexture(u32 binding, const Tegra::Texture::FullTextureInfo& texture,
|
||||
const SamplerEntry& entry);
|
||||
void SetupComputeTextures(const Shader* kernel);
|
||||
|
||||
/// Configures images in a graphics shader.
|
||||
void SetupDrawImages(std::size_t stage_index, Shader* shader);
|
||||
void SetupDrawImages(const Shader* shader, size_t stage_index);
|
||||
|
||||
/// Configures images in a compute shader.
|
||||
void SetupComputeImages(Shader* shader);
|
||||
|
||||
/// Configures an image.
|
||||
void SetupImage(u32 binding, const Tegra::Texture::TICEntry& tic, const ImageEntry& entry);
|
||||
void SetupComputeImages(const Shader* shader);
|
||||
|
||||
/// Syncs the viewport and depth range to match the guest state
|
||||
void SyncViewport();
|
||||
|
@ -227,9 +225,6 @@ private:
|
|||
/// End a transform feedback
|
||||
void EndTransformFeedback();
|
||||
|
||||
/// Check for extension that are not strictly required but are needed for correct emulation
|
||||
void CheckExtensions();
|
||||
|
||||
std::size_t CalculateVertexArraysSize() const;
|
||||
|
||||
std::size_t CalculateIndexBufferSize() const;
|
||||
|
@ -242,7 +237,7 @@ private:
|
|||
|
||||
GLintptr SetupIndexBuffer();
|
||||
|
||||
void SetupShaders(GLenum primitive_mode);
|
||||
void SetupShaders();
|
||||
|
||||
Tegra::GPU& gpu;
|
||||
Tegra::Engines::Maxwell3D& maxwell3d;
|
||||
|
@ -254,19 +249,21 @@ private:
|
|||
ProgramManager& program_manager;
|
||||
StateTracker& state_tracker;
|
||||
|
||||
TextureCacheOpenGL texture_cache;
|
||||
OGLStreamBuffer stream_buffer;
|
||||
TextureCacheRuntime texture_cache_runtime;
|
||||
TextureCache texture_cache;
|
||||
ShaderCacheOpenGL shader_cache;
|
||||
SamplerCacheOpenGL sampler_cache;
|
||||
FramebufferCacheOpenGL framebuffer_cache;
|
||||
QueryCache query_cache;
|
||||
OGLBufferCache buffer_cache;
|
||||
FenceManagerOpenGL fence_manager;
|
||||
|
||||
VideoCommon::Shader::AsyncShaders async_shaders;
|
||||
|
||||
static constexpr std::size_t STREAM_BUFFER_SIZE = 128 * 1024 * 1024;
|
||||
|
||||
GLint vertex_binding = 0;
|
||||
boost::container::static_vector<u32, MAX_IMAGE_VIEWS> image_view_indices;
|
||||
std::array<ImageViewId, MAX_IMAGE_VIEWS> image_view_ids;
|
||||
boost::container::static_vector<GLuint, MAX_TEXTURES> sampler_handles;
|
||||
std::array<GLuint, MAX_TEXTURES> texture_handles;
|
||||
std::array<GLuint, MAX_IMAGES> image_handles;
|
||||
|
||||
std::array<OGLBuffer, Tegra::Engines::Maxwell3D::Regs::NumTransformFeedbackBuffers>
|
||||
transform_feedback_buffers;
|
||||
|
@ -280,7 +277,7 @@ private:
|
|||
std::size_t current_cbuf = 0;
|
||||
OGLBuffer unified_uniform_buffer;
|
||||
|
||||
/// Number of commands queued to the OpenGL driver. Reseted on flush.
|
||||
/// Number of commands queued to the OpenGL driver. Resetted on flush.
|
||||
std::size_t num_queued_commands = 0;
|
||||
|
||||
u32 last_clip_distance_mask = 0;
|
||||
|
|
|
@ -71,7 +71,7 @@ void OGLSampler::Create() {
|
|||
return;
|
||||
|
||||
MICROPROFILE_SCOPE(OpenGL_ResourceCreation);
|
||||
glGenSamplers(1, &handle);
|
||||
glCreateSamplers(1, &handle);
|
||||
}
|
||||
|
||||
void OGLSampler::Release() {
|
||||
|
|
|
@ -1,52 +0,0 @@
|
|||
// Copyright 2019 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include "common/logging/log.h"
|
||||
#include "video_core/renderer_opengl/gl_resource_manager.h"
|
||||
#include "video_core/renderer_opengl/gl_sampler_cache.h"
|
||||
#include "video_core/renderer_opengl/maxwell_to_gl.h"
|
||||
|
||||
namespace OpenGL {
|
||||
|
||||
SamplerCacheOpenGL::SamplerCacheOpenGL() = default;
|
||||
|
||||
SamplerCacheOpenGL::~SamplerCacheOpenGL() = default;
|
||||
|
||||
OGLSampler SamplerCacheOpenGL::CreateSampler(const Tegra::Texture::TSCEntry& tsc) const {
|
||||
OGLSampler sampler;
|
||||
sampler.Create();
|
||||
|
||||
const GLuint sampler_id{sampler.handle};
|
||||
glSamplerParameteri(
|
||||
sampler_id, GL_TEXTURE_MAG_FILTER,
|
||||
MaxwellToGL::TextureFilterMode(tsc.mag_filter, Tegra::Texture::TextureMipmapFilter::None));
|
||||
glSamplerParameteri(sampler_id, GL_TEXTURE_MIN_FILTER,
|
||||
MaxwellToGL::TextureFilterMode(tsc.min_filter, tsc.mipmap_filter));
|
||||
glSamplerParameteri(sampler_id, GL_TEXTURE_WRAP_S, MaxwellToGL::WrapMode(tsc.wrap_u));
|
||||
glSamplerParameteri(sampler_id, GL_TEXTURE_WRAP_T, MaxwellToGL::WrapMode(tsc.wrap_v));
|
||||
glSamplerParameteri(sampler_id, GL_TEXTURE_WRAP_R, MaxwellToGL::WrapMode(tsc.wrap_p));
|
||||
glSamplerParameteri(sampler_id, GL_TEXTURE_COMPARE_MODE,
|
||||
tsc.depth_compare_enabled == 1 ? GL_COMPARE_REF_TO_TEXTURE : GL_NONE);
|
||||
glSamplerParameteri(sampler_id, GL_TEXTURE_COMPARE_FUNC,
|
||||
MaxwellToGL::DepthCompareFunc(tsc.depth_compare_func));
|
||||
glSamplerParameterfv(sampler_id, GL_TEXTURE_BORDER_COLOR, tsc.GetBorderColor().data());
|
||||
glSamplerParameterf(sampler_id, GL_TEXTURE_MIN_LOD, tsc.GetMinLod());
|
||||
glSamplerParameterf(sampler_id, GL_TEXTURE_MAX_LOD, tsc.GetMaxLod());
|
||||
glSamplerParameterf(sampler_id, GL_TEXTURE_LOD_BIAS, tsc.GetLodBias());
|
||||
if (GLAD_GL_ARB_texture_filter_anisotropic) {
|
||||
glSamplerParameterf(sampler_id, GL_TEXTURE_MAX_ANISOTROPY, tsc.GetMaxAnisotropy());
|
||||
} else if (GLAD_GL_EXT_texture_filter_anisotropic) {
|
||||
glSamplerParameterf(sampler_id, GL_TEXTURE_MAX_ANISOTROPY_EXT, tsc.GetMaxAnisotropy());
|
||||
} else {
|
||||
LOG_WARNING(Render_OpenGL, "Anisotropy not supported by host GPU driver");
|
||||
}
|
||||
|
||||
return sampler;
|
||||
}
|
||||
|
||||
GLuint SamplerCacheOpenGL::ToSamplerType(const OGLSampler& sampler) const {
|
||||
return sampler.handle;
|
||||
}
|
||||
|
||||
} // namespace OpenGL
|
|
@ -1,25 +0,0 @@
|
|||
// Copyright 2019 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <glad/glad.h>
|
||||
|
||||
#include "video_core/renderer_opengl/gl_resource_manager.h"
|
||||
#include "video_core/sampler_cache.h"
|
||||
|
||||
namespace OpenGL {
|
||||
|
||||
class SamplerCacheOpenGL final : public VideoCommon::SamplerCache<GLuint, OGLSampler> {
|
||||
public:
|
||||
explicit SamplerCacheOpenGL();
|
||||
~SamplerCacheOpenGL();
|
||||
|
||||
protected:
|
||||
OGLSampler CreateSampler(const Tegra::Texture::TSCEntry& tsc) const override;
|
||||
|
||||
GLuint ToSamplerType(const OGLSampler& sampler) const override;
|
||||
};
|
||||
|
||||
} // namespace OpenGL
|
|
@ -27,7 +27,6 @@
|
|||
#include "video_core/renderer_opengl/gl_shader_decompiler.h"
|
||||
#include "video_core/renderer_opengl/gl_shader_disk_cache.h"
|
||||
#include "video_core/renderer_opengl/gl_state_tracker.h"
|
||||
#include "video_core/renderer_opengl/utils.h"
|
||||
#include "video_core/shader/memory_util.h"
|
||||
#include "video_core/shader/registry.h"
|
||||
#include "video_core/shader/shader_ir.h"
|
||||
|
|
|
@ -38,11 +38,9 @@ using Tegra::Shader::IpaSampleMode;
|
|||
using Tegra::Shader::PixelImap;
|
||||
using Tegra::Shader::Register;
|
||||
using Tegra::Shader::TextureType;
|
||||
using VideoCommon::Shader::BuildTransformFeedback;
|
||||
using VideoCommon::Shader::Registry;
|
||||
|
||||
using namespace std::string_literals;
|
||||
using namespace VideoCommon::Shader;
|
||||
using namespace std::string_literals;
|
||||
|
||||
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
|
||||
using Operation = const OperationNode&;
|
||||
|
@ -2753,11 +2751,11 @@ private:
|
|||
}
|
||||
}
|
||||
|
||||
std::string GetSampler(const Sampler& sampler) const {
|
||||
std::string GetSampler(const SamplerEntry& sampler) const {
|
||||
return AppendSuffix(sampler.index, "sampler");
|
||||
}
|
||||
|
||||
std::string GetImage(const Image& image) const {
|
||||
std::string GetImage(const ImageEntry& image) const {
|
||||
return AppendSuffix(image.index, "image");
|
||||
}
|
||||
|
||||
|
|
|
@ -20,8 +20,8 @@ namespace OpenGL {
|
|||
class Device;
|
||||
|
||||
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
|
||||
using SamplerEntry = VideoCommon::Shader::Sampler;
|
||||
using ImageEntry = VideoCommon::Shader::Image;
|
||||
using SamplerEntry = VideoCommon::Shader::SamplerEntry;
|
||||
using ImageEntry = VideoCommon::Shader::ImageEntry;
|
||||
|
||||
class ConstBufferEntry : public VideoCommon::Shader::ConstBuffer {
|
||||
public:
|
||||
|
|
|
@ -83,6 +83,21 @@ void ProgramManager::RestoreGuestPipeline() {
|
|||
}
|
||||
}
|
||||
|
||||
void ProgramManager::BindHostCompute(GLuint program) {
|
||||
if (use_assembly_programs) {
|
||||
glDisable(GL_COMPUTE_PROGRAM_NV);
|
||||
}
|
||||
glUseProgram(program);
|
||||
is_graphics_bound = false;
|
||||
}
|
||||
|
||||
void ProgramManager::RestoreGuestCompute() {
|
||||
if (use_assembly_programs) {
|
||||
glEnable(GL_COMPUTE_PROGRAM_NV);
|
||||
glUseProgram(0);
|
||||
}
|
||||
}
|
||||
|
||||
void ProgramManager::UseVertexShader(GLuint program) {
|
||||
if (use_assembly_programs) {
|
||||
BindProgram(GL_VERTEX_PROGRAM_NV, program, current_state.vertex, vertex_enabled);
|
||||
|
|
|
@ -45,6 +45,12 @@ public:
|
|||
/// Rewinds BindHostPipeline state changes.
|
||||
void RestoreGuestPipeline();
|
||||
|
||||
/// Binds an OpenGL GLSL program object unsynchronized with the guest state.
|
||||
void BindHostCompute(GLuint program);
|
||||
|
||||
/// Rewinds BindHostCompute state changes.
|
||||
void RestoreGuestCompute();
|
||||
|
||||
void UseVertexShader(GLuint program);
|
||||
void UseGeometryShader(GLuint program);
|
||||
void UseFragmentShader(GLuint program);
|
||||
|
|
|
@ -249,4 +249,11 @@ StateTracker::StateTracker(Tegra::GPU& gpu) : flags{gpu.Maxwell3D().dirty.flags}
|
|||
}
|
||||
}
|
||||
|
||||
void StateTracker::InvalidateStreamBuffer() {
|
||||
flags[Dirty::VertexBuffers] = true;
|
||||
for (int index = Dirty::VertexBuffer0; index <= Dirty::VertexBuffer31; ++index) {
|
||||
flags[index] = true;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace OpenGL
|
||||
|
|
|
@ -92,6 +92,8 @@ class StateTracker {
|
|||
public:
|
||||
explicit StateTracker(Tegra::GPU& gpu);
|
||||
|
||||
void InvalidateStreamBuffer();
|
||||
|
||||
void BindIndexBuffer(GLuint new_index_buffer) {
|
||||
if (index_buffer == new_index_buffer) {
|
||||
return;
|
||||
|
@ -100,6 +102,14 @@ public:
|
|||
glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, new_index_buffer);
|
||||
}
|
||||
|
||||
void BindFramebuffer(GLuint new_framebuffer) {
|
||||
if (framebuffer == new_framebuffer) {
|
||||
return;
|
||||
}
|
||||
framebuffer = new_framebuffer;
|
||||
glBindFramebuffer(GL_DRAW_FRAMEBUFFER, framebuffer);
|
||||
}
|
||||
|
||||
void NotifyScreenDrawVertexArray() {
|
||||
flags[OpenGL::Dirty::VertexFormats] = true;
|
||||
flags[OpenGL::Dirty::VertexFormat0 + 0] = true;
|
||||
|
@ -129,9 +139,9 @@ public:
|
|||
flags[OpenGL::Dirty::Scissor0] = true;
|
||||
}
|
||||
|
||||
void NotifyColorMask0() {
|
||||
void NotifyColorMask(size_t index) {
|
||||
flags[OpenGL::Dirty::ColorMasks] = true;
|
||||
flags[OpenGL::Dirty::ColorMask0] = true;
|
||||
flags[OpenGL::Dirty::ColorMask0 + index] = true;
|
||||
}
|
||||
|
||||
void NotifyBlend0() {
|
||||
|
@ -190,6 +200,7 @@ public:
|
|||
private:
|
||||
Tegra::Engines::Maxwell3D::DirtyState::Flags& flags;
|
||||
|
||||
GLuint framebuffer = 0;
|
||||
GLuint index_buffer = 0;
|
||||
};
|
||||
|
||||
|
|
|
@ -9,6 +9,7 @@
|
|||
#include "common/assert.h"
|
||||
#include "common/microprofile.h"
|
||||
#include "video_core/renderer_opengl/gl_device.h"
|
||||
#include "video_core/renderer_opengl/gl_state_tracker.h"
|
||||
#include "video_core/renderer_opengl/gl_stream_buffer.h"
|
||||
|
||||
MICROPROFILE_DEFINE(OpenGL_StreamBuffer, "OpenGL", "Stream Buffer Orphaning",
|
||||
|
@ -16,24 +17,14 @@ MICROPROFILE_DEFINE(OpenGL_StreamBuffer, "OpenGL", "Stream Buffer Orphaning",
|
|||
|
||||
namespace OpenGL {
|
||||
|
||||
OGLStreamBuffer::OGLStreamBuffer(const Device& device, GLsizeiptr size, bool vertex_data_usage)
|
||||
: buffer_size(size) {
|
||||
OGLStreamBuffer::OGLStreamBuffer(const Device& device, StateTracker& state_tracker_)
|
||||
: state_tracker{state_tracker_} {
|
||||
gl_buffer.Create();
|
||||
|
||||
GLsizeiptr allocate_size = size;
|
||||
if (vertex_data_usage) {
|
||||
// On AMD GPU there is a strange crash in indexed drawing. The crash happens when the buffer
|
||||
// read position is near the end and is an out-of-bound access to the vertex buffer. This is
|
||||
// probably a bug in the driver and is related to the usage of vec3<byte> attributes in the
|
||||
// vertex array. Doubling the allocation size for the vertex buffer seems to avoid the
|
||||
// crash.
|
||||
allocate_size *= 2;
|
||||
}
|
||||
|
||||
static constexpr GLbitfield flags = GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT;
|
||||
glNamedBufferStorage(gl_buffer.handle, allocate_size, nullptr, flags);
|
||||
glNamedBufferStorage(gl_buffer.handle, BUFFER_SIZE, nullptr, flags);
|
||||
mapped_ptr = static_cast<u8*>(
|
||||
glMapNamedBufferRange(gl_buffer.handle, 0, buffer_size, flags | GL_MAP_FLUSH_EXPLICIT_BIT));
|
||||
glMapNamedBufferRange(gl_buffer.handle, 0, BUFFER_SIZE, flags | GL_MAP_FLUSH_EXPLICIT_BIT));
|
||||
|
||||
if (device.UseAssemblyShaders() || device.HasVertexBufferUnifiedMemory()) {
|
||||
glMakeNamedBufferResidentNV(gl_buffer.handle, GL_READ_ONLY);
|
||||
|
@ -46,25 +37,24 @@ OGLStreamBuffer::~OGLStreamBuffer() {
|
|||
gl_buffer.Release();
|
||||
}
|
||||
|
||||
std::tuple<u8*, GLintptr, bool> OGLStreamBuffer::Map(GLsizeiptr size, GLintptr alignment) {
|
||||
ASSERT(size <= buffer_size);
|
||||
ASSERT(alignment <= buffer_size);
|
||||
std::pair<u8*, GLintptr> OGLStreamBuffer::Map(GLsizeiptr size, GLintptr alignment) {
|
||||
ASSERT(size <= BUFFER_SIZE);
|
||||
ASSERT(alignment <= BUFFER_SIZE);
|
||||
mapped_size = size;
|
||||
|
||||
if (alignment > 0) {
|
||||
buffer_pos = Common::AlignUp<std::size_t>(buffer_pos, alignment);
|
||||
}
|
||||
|
||||
bool invalidate = false;
|
||||
if (buffer_pos + size > buffer_size) {
|
||||
if (buffer_pos + size > BUFFER_SIZE) {
|
||||
MICROPROFILE_SCOPE(OpenGL_StreamBuffer);
|
||||
glInvalidateBufferData(gl_buffer.handle);
|
||||
state_tracker.InvalidateStreamBuffer();
|
||||
|
||||
buffer_pos = 0;
|
||||
invalidate = true;
|
||||
}
|
||||
|
||||
return std::make_tuple(mapped_ptr + buffer_pos, buffer_pos, invalidate);
|
||||
return std::make_pair(mapped_ptr + buffer_pos, buffer_pos);
|
||||
}
|
||||
|
||||
void OGLStreamBuffer::Unmap(GLsizeiptr size) {
|
||||
|
|
|
@ -4,29 +4,31 @@
|
|||
|
||||
#pragma once
|
||||
|
||||
#include <tuple>
|
||||
#include <utility>
|
||||
|
||||
#include <glad/glad.h>
|
||||
|
||||
#include "common/common_types.h"
|
||||
#include "video_core/renderer_opengl/gl_resource_manager.h"
|
||||
|
||||
namespace OpenGL {
|
||||
|
||||
class Device;
|
||||
class StateTracker;
|
||||
|
||||
class OGLStreamBuffer : private NonCopyable {
|
||||
public:
|
||||
explicit OGLStreamBuffer(const Device& device, GLsizeiptr size, bool vertex_data_usage);
|
||||
explicit OGLStreamBuffer(const Device& device, StateTracker& state_tracker_);
|
||||
~OGLStreamBuffer();
|
||||
|
||||
/*
|
||||
* Allocates a linear chunk of memory in the GPU buffer with at least "size" bytes
|
||||
* and the optional alignment requirement.
|
||||
* If the buffer is full, the whole buffer is reallocated which invalidates old chunks.
|
||||
* The return values are the pointer to the new chunk, the offset within the buffer,
|
||||
* and the invalidation flag for previous chunks.
|
||||
* The return values are the pointer to the new chunk, and the offset within the buffer.
|
||||
* The actual used size must be specified on unmapping the chunk.
|
||||
*/
|
||||
std::tuple<u8*, GLintptr, bool> Map(GLsizeiptr size, GLintptr alignment = 0);
|
||||
std::pair<u8*, GLintptr> Map(GLsizeiptr size, GLintptr alignment = 0);
|
||||
|
||||
void Unmap(GLsizeiptr size);
|
||||
|
||||
|
@ -39,15 +41,18 @@ public:
|
|||
}
|
||||
|
||||
GLsizeiptr Size() const noexcept {
|
||||
return buffer_size;
|
||||
return BUFFER_SIZE;
|
||||
}
|
||||
|
||||
private:
|
||||
static constexpr GLsizeiptr BUFFER_SIZE = 256 * 1024 * 1024;
|
||||
|
||||
StateTracker& state_tracker;
|
||||
|
||||
OGLBuffer gl_buffer;
|
||||
|
||||
GLuint64EXT gpu_address = 0;
|
||||
GLintptr buffer_pos = 0;
|
||||
GLsizeiptr buffer_size = 0;
|
||||
GLsizeiptr mapped_size = 0;
|
||||
u8* mapped_ptr = nullptr;
|
||||
};
|
||||
|
|
File diff suppressed because it is too large
Load diff
|
@ -4,157 +4,247 @@
|
|||
|
||||
#pragma once
|
||||
|
||||
#include <array>
|
||||
#include <functional>
|
||||
#include <memory>
|
||||
#include <unordered_map>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
#include <span>
|
||||
|
||||
#include <glad/glad.h>
|
||||
|
||||
#include "common/common_types.h"
|
||||
#include "video_core/engines/shader_bytecode.h"
|
||||
#include "video_core/renderer_opengl/gl_device.h"
|
||||
#include "video_core/renderer_opengl/gl_resource_manager.h"
|
||||
#include "video_core/renderer_opengl/util_shaders.h"
|
||||
#include "video_core/texture_cache/texture_cache.h"
|
||||
|
||||
namespace OpenGL {
|
||||
|
||||
using VideoCommon::SurfaceParams;
|
||||
using VideoCommon::ViewParams;
|
||||
|
||||
class CachedSurfaceView;
|
||||
class CachedSurface;
|
||||
class TextureCacheOpenGL;
|
||||
class Device;
|
||||
class ProgramManager;
|
||||
class StateTracker;
|
||||
|
||||
using Surface = std::shared_ptr<CachedSurface>;
|
||||
using View = std::shared_ptr<CachedSurfaceView>;
|
||||
using TextureCacheBase = VideoCommon::TextureCache<Surface, View>;
|
||||
class Framebuffer;
|
||||
class Image;
|
||||
class ImageView;
|
||||
class Sampler;
|
||||
|
||||
class CachedSurface final : public VideoCommon::SurfaceBase<View> {
|
||||
friend CachedSurfaceView;
|
||||
using VideoCommon::ImageId;
|
||||
using VideoCommon::ImageViewId;
|
||||
using VideoCommon::ImageViewType;
|
||||
using VideoCommon::NUM_RT;
|
||||
using VideoCommon::Offset2D;
|
||||
using VideoCommon::RenderTargets;
|
||||
|
||||
class ImageBufferMap {
|
||||
public:
|
||||
explicit CachedSurface(GPUVAddr gpu_addr_, const SurfaceParams& params_,
|
||||
bool is_astc_supported_);
|
||||
~CachedSurface();
|
||||
explicit ImageBufferMap(GLuint handle, u8* map, size_t size, OGLSync* sync);
|
||||
~ImageBufferMap();
|
||||
|
||||
void UploadTexture(const std::vector<u8>& staging_buffer) override;
|
||||
void DownloadTexture(std::vector<u8>& staging_buffer) override;
|
||||
|
||||
GLenum GetTarget() const {
|
||||
return target;
|
||||
GLuint Handle() const noexcept {
|
||||
return handle;
|
||||
}
|
||||
|
||||
GLuint GetTexture() const {
|
||||
std::span<u8> Span() const noexcept {
|
||||
return span;
|
||||
}
|
||||
|
||||
private:
|
||||
std::span<u8> span;
|
||||
OGLSync* sync;
|
||||
GLuint handle;
|
||||
};
|
||||
|
||||
struct FormatProperties {
|
||||
GLenum compatibility_class;
|
||||
bool compatibility_by_size;
|
||||
bool is_compressed;
|
||||
};
|
||||
|
||||
class TextureCacheRuntime {
|
||||
friend Framebuffer;
|
||||
friend Image;
|
||||
friend ImageView;
|
||||
friend Sampler;
|
||||
|
||||
public:
|
||||
explicit TextureCacheRuntime(const Device& device, ProgramManager& program_manager,
|
||||
StateTracker& state_tracker);
|
||||
~TextureCacheRuntime();
|
||||
|
||||
void Finish();
|
||||
|
||||
ImageBufferMap MapUploadBuffer(size_t size);
|
||||
|
||||
ImageBufferMap MapDownloadBuffer(size_t size);
|
||||
|
||||
void CopyImage(Image& dst, Image& src, std::span<const VideoCommon::ImageCopy> copies);
|
||||
|
||||
void ConvertImage(Framebuffer* dst, ImageView& dst_view, ImageView& src_view) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
bool CanImageBeCopied(const Image& dst, const Image& src);
|
||||
|
||||
void EmulateCopyImage(Image& dst, Image& src, std::span<const VideoCommon::ImageCopy> copies);
|
||||
|
||||
void BlitFramebuffer(Framebuffer* dst, Framebuffer* src,
|
||||
const std::array<Offset2D, 2>& dst_region,
|
||||
const std::array<Offset2D, 2>& src_region,
|
||||
Tegra::Engines::Fermi2D::Filter filter,
|
||||
Tegra::Engines::Fermi2D::Operation operation);
|
||||
|
||||
void AccelerateImageUpload(Image& image, const ImageBufferMap& map, size_t buffer_offset,
|
||||
std::span<const VideoCommon::SwizzleParameters> swizzles);
|
||||
|
||||
void InsertUploadMemoryBarrier();
|
||||
|
||||
FormatProperties FormatInfo(VideoCommon::ImageType type, GLenum internal_format) const;
|
||||
|
||||
private:
|
||||
struct StagingBuffers {
|
||||
explicit StagingBuffers(GLenum storage_flags_, GLenum map_flags_);
|
||||
~StagingBuffers();
|
||||
|
||||
ImageBufferMap RequestMap(size_t requested_size, bool insert_fence);
|
||||
|
||||
size_t RequestBuffer(size_t requested_size);
|
||||
|
||||
std::optional<size_t> FindBuffer(size_t requested_size);
|
||||
|
||||
std::vector<OGLSync> syncs;
|
||||
std::vector<OGLBuffer> buffers;
|
||||
std::vector<u8*> maps;
|
||||
std::vector<size_t> sizes;
|
||||
GLenum storage_flags;
|
||||
GLenum map_flags;
|
||||
};
|
||||
|
||||
const Device& device;
|
||||
StateTracker& state_tracker;
|
||||
UtilShaders util_shaders;
|
||||
|
||||
std::array<std::unordered_map<GLenum, FormatProperties>, 3> format_properties;
|
||||
|
||||
StagingBuffers upload_buffers{GL_MAP_WRITE_BIT, GL_MAP_WRITE_BIT | GL_MAP_FLUSH_EXPLICIT_BIT};
|
||||
StagingBuffers download_buffers{GL_MAP_READ_BIT, GL_MAP_READ_BIT};
|
||||
|
||||
OGLTexture null_image_1d_array;
|
||||
OGLTexture null_image_cube_array;
|
||||
OGLTexture null_image_3d;
|
||||
OGLTexture null_image_rect;
|
||||
OGLTextureView null_image_view_1d;
|
||||
OGLTextureView null_image_view_2d;
|
||||
OGLTextureView null_image_view_2d_array;
|
||||
OGLTextureView null_image_view_cube;
|
||||
|
||||
std::array<GLuint, VideoCommon::NUM_IMAGE_VIEW_TYPES> null_image_views;
|
||||
};
|
||||
|
||||
class Image : public VideoCommon::ImageBase {
|
||||
friend ImageView;
|
||||
|
||||
public:
|
||||
explicit Image(TextureCacheRuntime&, const VideoCommon::ImageInfo& info, GPUVAddr gpu_addr,
|
||||
VAddr cpu_addr);
|
||||
|
||||
void UploadMemory(const ImageBufferMap& map, size_t buffer_offset,
|
||||
std::span<const VideoCommon::BufferImageCopy> copies);
|
||||
|
||||
void UploadMemory(const ImageBufferMap& map, size_t buffer_offset,
|
||||
std::span<const VideoCommon::BufferCopy> copies);
|
||||
|
||||
void DownloadMemory(ImageBufferMap& map, size_t buffer_offset,
|
||||
std::span<const VideoCommon::BufferImageCopy> copies);
|
||||
|
||||
GLuint Handle() const noexcept {
|
||||
return texture.handle;
|
||||
}
|
||||
|
||||
bool IsCompressed() const {
|
||||
return is_compressed;
|
||||
}
|
||||
|
||||
protected:
|
||||
void DecorateSurfaceName() override;
|
||||
|
||||
View CreateView(const ViewParams& view_key) override;
|
||||
View CreateViewInner(const ViewParams& view_key, bool is_proxy);
|
||||
|
||||
private:
|
||||
void UploadTextureMipmap(u32 level, const std::vector<u8>& staging_buffer);
|
||||
void CopyBufferToImage(const VideoCommon::BufferImageCopy& copy, size_t buffer_offset);
|
||||
|
||||
GLenum internal_format{};
|
||||
GLenum format{};
|
||||
GLenum type{};
|
||||
bool is_compressed{};
|
||||
GLenum target{};
|
||||
u32 view_count{};
|
||||
void CopyImageToBuffer(const VideoCommon::BufferImageCopy& copy, size_t buffer_offset);
|
||||
|
||||
OGLTexture texture;
|
||||
OGLBuffer texture_buffer;
|
||||
OGLTextureView store_view;
|
||||
OGLBuffer buffer;
|
||||
GLenum gl_internal_format = GL_NONE;
|
||||
GLenum gl_store_format = GL_NONE;
|
||||
GLenum gl_format = GL_NONE;
|
||||
GLenum gl_type = GL_NONE;
|
||||
};
|
||||
|
||||
class CachedSurfaceView final : public VideoCommon::ViewBase {
|
||||
class ImageView : public VideoCommon::ImageViewBase {
|
||||
friend Image;
|
||||
|
||||
public:
|
||||
explicit CachedSurfaceView(CachedSurface& surface_, const ViewParams& params_, bool is_proxy_);
|
||||
~CachedSurfaceView();
|
||||
explicit ImageView(TextureCacheRuntime&, const VideoCommon::ImageViewInfo&, ImageId, Image&);
|
||||
explicit ImageView(TextureCacheRuntime&, const VideoCommon::NullImageParams&);
|
||||
|
||||
/// @brief Attaches this texture view to the currently bound fb_target framebuffer
|
||||
/// @param attachment Attachment to bind textures to
|
||||
/// @param fb_target Framebuffer target to attach to (e.g. DRAW_FRAMEBUFFER)
|
||||
void Attach(GLenum attachment, GLenum fb_target) const;
|
||||
|
||||
GLuint GetTexture(Tegra::Texture::SwizzleSource x_source,
|
||||
Tegra::Texture::SwizzleSource y_source,
|
||||
Tegra::Texture::SwizzleSource z_source,
|
||||
Tegra::Texture::SwizzleSource w_source);
|
||||
|
||||
void DecorateViewName(GPUVAddr gpu_addr, const std::string& prefix);
|
||||
|
||||
void MarkAsModified(u64 tick) {
|
||||
surface.MarkAsModified(true, tick);
|
||||
[[nodiscard]] GLuint Handle(ImageViewType query_type) const noexcept {
|
||||
return views[static_cast<size_t>(query_type)];
|
||||
}
|
||||
|
||||
GLuint GetTexture() const {
|
||||
if (is_proxy) {
|
||||
return surface.GetTexture();
|
||||
}
|
||||
return main_view.handle;
|
||||
[[nodiscard]] GLuint DefaultHandle() const noexcept {
|
||||
return default_handle;
|
||||
}
|
||||
|
||||
GLenum GetFormat() const {
|
||||
return format;
|
||||
}
|
||||
|
||||
const SurfaceParams& GetSurfaceParams() const {
|
||||
return surface.GetSurfaceParams();
|
||||
[[nodiscard]] GLenum Format() const noexcept {
|
||||
return internal_format;
|
||||
}
|
||||
|
||||
private:
|
||||
OGLTextureView CreateTextureView() const;
|
||||
void SetupView(const Device& device, Image& image, ImageViewType view_type, GLuint handle,
|
||||
const VideoCommon::ImageViewInfo& info,
|
||||
VideoCommon::SubresourceRange view_range);
|
||||
|
||||
CachedSurface& surface;
|
||||
const GLenum format;
|
||||
const GLenum target;
|
||||
const bool is_proxy;
|
||||
|
||||
std::unordered_map<u32, OGLTextureView> view_cache;
|
||||
OGLTextureView main_view;
|
||||
|
||||
// Use an invalid default so it always fails the comparison test
|
||||
u32 current_swizzle = 0xffffffff;
|
||||
GLuint current_view = 0;
|
||||
std::array<GLuint, VideoCommon::NUM_IMAGE_VIEW_TYPES> views{};
|
||||
std::vector<OGLTextureView> stored_views;
|
||||
GLuint default_handle = 0;
|
||||
GLenum internal_format = GL_NONE;
|
||||
};
|
||||
|
||||
class TextureCacheOpenGL final : public TextureCacheBase {
|
||||
class ImageAlloc : public VideoCommon::ImageAllocBase {};
|
||||
|
||||
class Sampler {
|
||||
public:
|
||||
explicit TextureCacheOpenGL(VideoCore::RasterizerInterface& rasterizer_,
|
||||
Tegra::Engines::Maxwell3D& maxwell3d_,
|
||||
Tegra::MemoryManager& gpu_memory_, const Device& device_,
|
||||
StateTracker& state_tracker);
|
||||
~TextureCacheOpenGL();
|
||||
explicit Sampler(TextureCacheRuntime&, const Tegra::Texture::TSCEntry&);
|
||||
|
||||
protected:
|
||||
Surface CreateSurface(GPUVAddr gpu_addr, const SurfaceParams& params) override;
|
||||
|
||||
void ImageCopy(Surface& src_surface, Surface& dst_surface,
|
||||
const VideoCommon::CopyParams& copy_params) override;
|
||||
|
||||
void ImageBlit(View& src_view, View& dst_view,
|
||||
const Tegra::Engines::Fermi2D::Config& copy_config) override;
|
||||
|
||||
void BufferCopy(Surface& src_surface, Surface& dst_surface) override;
|
||||
GLuint Handle() const noexcept {
|
||||
return sampler.handle;
|
||||
}
|
||||
|
||||
private:
|
||||
GLuint FetchPBO(std::size_t buffer_size);
|
||||
|
||||
StateTracker& state_tracker;
|
||||
|
||||
OGLFramebuffer src_framebuffer;
|
||||
OGLFramebuffer dst_framebuffer;
|
||||
std::unordered_map<u32, OGLBuffer> copy_pbo_cache;
|
||||
OGLSampler sampler;
|
||||
};
|
||||
|
||||
class Framebuffer {
|
||||
public:
|
||||
explicit Framebuffer(TextureCacheRuntime&, std::span<ImageView*, NUM_RT> color_buffers,
|
||||
ImageView* depth_buffer, const VideoCommon::RenderTargets& key);
|
||||
|
||||
[[nodiscard]] GLuint Handle() const noexcept {
|
||||
return framebuffer.handle;
|
||||
}
|
||||
|
||||
[[nodiscard]] GLbitfield BufferBits() const noexcept {
|
||||
return buffer_bits;
|
||||
}
|
||||
|
||||
private:
|
||||
OGLFramebuffer framebuffer;
|
||||
GLbitfield buffer_bits = GL_NONE;
|
||||
};
|
||||
|
||||
struct TextureCacheParams {
|
||||
static constexpr bool ENABLE_VALIDATION = true;
|
||||
static constexpr bool FRAMEBUFFER_BLITS = true;
|
||||
static constexpr bool HAS_EMULATED_COPIES = true;
|
||||
|
||||
using Runtime = OpenGL::TextureCacheRuntime;
|
||||
using Image = OpenGL::Image;
|
||||
using ImageAlloc = OpenGL::ImageAlloc;
|
||||
using ImageView = OpenGL::ImageView;
|
||||
using Sampler = OpenGL::Sampler;
|
||||
using Framebuffer = OpenGL::Framebuffer;
|
||||
};
|
||||
|
||||
using TextureCache = VideoCommon::TextureCache<TextureCacheParams>;
|
||||
|
||||
} // namespace OpenGL
|
||||
|
|
|
@ -475,6 +475,19 @@ inline GLenum PolygonMode(Maxwell::PolygonMode polygon_mode) {
|
|||
return GL_FILL;
|
||||
}
|
||||
|
||||
inline GLenum ReductionFilter(Tegra::Texture::SamplerReduction filter) {
|
||||
switch (filter) {
|
||||
case Tegra::Texture::SamplerReduction::WeightedAverage:
|
||||
return GL_WEIGHTED_AVERAGE_ARB;
|
||||
case Tegra::Texture::SamplerReduction::Min:
|
||||
return GL_MIN;
|
||||
case Tegra::Texture::SamplerReduction::Max:
|
||||
return GL_MAX;
|
||||
}
|
||||
UNREACHABLE_MSG("Invalid reduction filter={}", static_cast<int>(filter));
|
||||
return GL_WEIGHTED_AVERAGE_ARB;
|
||||
}
|
||||
|
||||
inline GLenum ViewportSwizzle(Maxwell::ViewportSwizzle swizzle) {
|
||||
// Enumeration order matches register order. We can convert it arithmetically.
|
||||
return GL_VIEWPORT_SWIZZLE_POSITIVE_X_NV + static_cast<GLenum>(swizzle);
|
||||
|
|
|
@ -23,10 +23,10 @@
|
|||
#include "core/telemetry_session.h"
|
||||
#include "video_core/host_shaders/opengl_present_frag.h"
|
||||
#include "video_core/host_shaders/opengl_present_vert.h"
|
||||
#include "video_core/morton.h"
|
||||
#include "video_core/renderer_opengl/gl_rasterizer.h"
|
||||
#include "video_core/renderer_opengl/gl_shader_manager.h"
|
||||
#include "video_core/renderer_opengl/renderer_opengl.h"
|
||||
#include "video_core/textures/decoders.h"
|
||||
|
||||
namespace OpenGL {
|
||||
|
||||
|
@ -140,11 +140,10 @@ void RendererOpenGL::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
|
|||
if (!framebuffer) {
|
||||
return;
|
||||
}
|
||||
|
||||
PrepareRendertarget(framebuffer);
|
||||
RenderScreenshot();
|
||||
|
||||
glBindFramebuffer(GL_DRAW_FRAMEBUFFER, 0);
|
||||
state_tracker.BindFramebuffer(0);
|
||||
DrawScreen(emu_window.GetFramebufferLayout());
|
||||
|
||||
++m_current_frame;
|
||||
|
@ -187,19 +186,20 @@ void RendererOpenGL::LoadFBToScreenInfo(const Tegra::FramebufferConfig& framebuf
|
|||
// Reset the screen info's display texture to its own permanent texture
|
||||
screen_info.display_texture = screen_info.texture.resource.handle;
|
||||
|
||||
const auto pixel_format{
|
||||
VideoCore::Surface::PixelFormatFromGPUPixelFormat(framebuffer.pixel_format)};
|
||||
const u32 bytes_per_pixel{VideoCore::Surface::GetBytesPerPixel(pixel_format)};
|
||||
const u64 size_in_bytes{framebuffer.stride * framebuffer.height * bytes_per_pixel};
|
||||
u8* const host_ptr{cpu_memory.GetPointer(framebuffer_addr)};
|
||||
rasterizer->FlushRegion(ToCacheAddr(host_ptr), size_in_bytes);
|
||||
|
||||
// TODO(Rodrigo): Read this from HLE
|
||||
constexpr u32 block_height_log2 = 4;
|
||||
VideoCore::MortonSwizzle(VideoCore::MortonSwizzleMode::MortonToLinear, pixel_format,
|
||||
framebuffer.stride, block_height_log2, framebuffer.height, 0, 1, 1,
|
||||
gl_framebuffer_data.data(), host_ptr);
|
||||
const auto pixel_format{
|
||||
VideoCore::Surface::PixelFormatFromGPUPixelFormat(framebuffer.pixel_format)};
|
||||
const u32 bytes_per_pixel{VideoCore::Surface::BytesPerBlock(pixel_format)};
|
||||
const u64 size_in_bytes{Tegra::Texture::CalculateSize(
|
||||
true, bytes_per_pixel, framebuffer.stride, framebuffer.height, 1, block_height_log2, 0)};
|
||||
const u8* const host_ptr{cpu_memory.GetPointer(framebuffer_addr)};
|
||||
const std::span<const u8> input_data(host_ptr, size_in_bytes);
|
||||
Tegra::Texture::UnswizzleTexture(gl_framebuffer_data, input_data, bytes_per_pixel,
|
||||
framebuffer.width, framebuffer.height, 1, block_height_log2,
|
||||
0);
|
||||
|
||||
glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);
|
||||
glPixelStorei(GL_UNPACK_ROW_LENGTH, static_cast<GLint>(framebuffer.stride));
|
||||
|
||||
// Update existing texture
|
||||
|
@ -238,6 +238,10 @@ void RendererOpenGL::InitOpenGLObjects() {
|
|||
glUseProgramStages(pipeline.handle, GL_VERTEX_SHADER_BIT, vertex_program.handle);
|
||||
glUseProgramStages(pipeline.handle, GL_FRAGMENT_SHADER_BIT, fragment_program.handle);
|
||||
|
||||
// Generate presentation sampler
|
||||
present_sampler.Create();
|
||||
glSamplerParameteri(present_sampler.handle, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
|
||||
|
||||
// Generate VBO handle for drawing
|
||||
vertex_buffer.Create();
|
||||
|
||||
|
@ -255,6 +259,11 @@ void RendererOpenGL::InitOpenGLObjects() {
|
|||
// Clear screen to black
|
||||
LoadColorToActiveGLTexture(0, 0, 0, 0, screen_info.texture);
|
||||
|
||||
// Enable seamless cubemaps when per texture parameters are not available
|
||||
if (!GLAD_GL_ARB_seamless_cubemap_per_texture && !GLAD_GL_AMD_seamless_cubemap_per_texture) {
|
||||
glEnable(GL_TEXTURE_CUBE_MAP_SEAMLESS);
|
||||
}
|
||||
|
||||
// Enable unified vertex attributes and query vertex buffer address when the driver supports it
|
||||
if (device.HasVertexBufferUnifiedMemory()) {
|
||||
glEnableClientState(GL_VERTEX_ATTRIB_ARRAY_UNIFIED_NV);
|
||||
|
@ -296,7 +305,7 @@ void RendererOpenGL::ConfigureFramebufferTexture(TextureInfo& texture,
|
|||
|
||||
const auto pixel_format{
|
||||
VideoCore::Surface::PixelFormatFromGPUPixelFormat(framebuffer.pixel_format)};
|
||||
const u32 bytes_per_pixel{VideoCore::Surface::GetBytesPerPixel(pixel_format)};
|
||||
const u32 bytes_per_pixel{VideoCore::Surface::BytesPerBlock(pixel_format)};
|
||||
gl_framebuffer_data.resize(texture.width * texture.height * bytes_per_pixel);
|
||||
|
||||
GLint internal_format;
|
||||
|
@ -315,8 +324,8 @@ void RendererOpenGL::ConfigureFramebufferTexture(TextureInfo& texture,
|
|||
internal_format = GL_RGBA8;
|
||||
texture.gl_format = GL_RGBA;
|
||||
texture.gl_type = GL_UNSIGNED_INT_8_8_8_8_REV;
|
||||
UNIMPLEMENTED_MSG("Unknown framebuffer pixel format: {}",
|
||||
static_cast<u32>(framebuffer.pixel_format));
|
||||
// UNIMPLEMENTED_MSG("Unknown framebuffer pixel format: {}",
|
||||
// static_cast<u32>(framebuffer.pixel_format));
|
||||
}
|
||||
|
||||
texture.resource.Release();
|
||||
|
@ -382,7 +391,7 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) {
|
|||
state_tracker.NotifyPolygonModes();
|
||||
state_tracker.NotifyViewport0();
|
||||
state_tracker.NotifyScissor0();
|
||||
state_tracker.NotifyColorMask0();
|
||||
state_tracker.NotifyColorMask(0);
|
||||
state_tracker.NotifyBlend0();
|
||||
state_tracker.NotifyFramebuffer();
|
||||
state_tracker.NotifyFrontFace();
|
||||
|
@ -440,7 +449,7 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) {
|
|||
}
|
||||
|
||||
glBindTextureUnit(0, screen_info.display_texture);
|
||||
glBindSampler(0, 0);
|
||||
glBindSampler(0, present_sampler.handle);
|
||||
|
||||
glClear(GL_COLOR_BUFFER_BIT);
|
||||
glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
|
||||
|
@ -473,6 +482,8 @@ void RendererOpenGL::RenderScreenshot() {
|
|||
|
||||
DrawScreen(layout);
|
||||
|
||||
glBindBuffer(GL_PIXEL_PACK_BUFFER, 0);
|
||||
glPixelStorei(GL_PACK_ROW_LENGTH, 0);
|
||||
glReadPixels(0, 0, layout.width, layout.height, GL_BGRA, GL_UNSIGNED_INT_8_8_8_8_REV,
|
||||
renderer_settings.screenshot_bits);
|
||||
|
||||
|
|
|
@ -102,6 +102,7 @@ private:
|
|||
StateTracker state_tracker{gpu};
|
||||
|
||||
// OpenGL object IDs
|
||||
OGLSampler present_sampler;
|
||||
OGLBuffer vertex_buffer;
|
||||
OGLProgram vertex_program;
|
||||
OGLProgram fragment_program;
|
||||
|
|
224
src/video_core/renderer_opengl/util_shaders.cpp
Normal file
224
src/video_core/renderer_opengl/util_shaders.cpp
Normal file
|
@ -0,0 +1,224 @@
|
|||
// Copyright 2020 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include <bit>
|
||||
#include <span>
|
||||
#include <string_view>
|
||||
|
||||
#include <glad/glad.h>
|
||||
|
||||
#include "common/assert.h"
|
||||
#include "common/common_types.h"
|
||||
#include "common/div_ceil.h"
|
||||
#include "video_core/host_shaders/block_linear_unswizzle_2d_comp.h"
|
||||
#include "video_core/host_shaders/block_linear_unswizzle_3d_comp.h"
|
||||
#include "video_core/host_shaders/opengl_copy_bc4_comp.h"
|
||||
#include "video_core/host_shaders/pitch_unswizzle_comp.h"
|
||||
#include "video_core/renderer_opengl/gl_resource_manager.h"
|
||||
#include "video_core/renderer_opengl/gl_shader_manager.h"
|
||||
#include "video_core/renderer_opengl/gl_texture_cache.h"
|
||||
#include "video_core/renderer_opengl/util_shaders.h"
|
||||
#include "video_core/surface.h"
|
||||
#include "video_core/texture_cache/accelerated_swizzle.h"
|
||||
#include "video_core/texture_cache/types.h"
|
||||
#include "video_core/texture_cache/util.h"
|
||||
#include "video_core/textures/decoders.h"
|
||||
|
||||
namespace OpenGL {
|
||||
|
||||
using namespace HostShaders;
|
||||
|
||||
using VideoCommon::Extent3D;
|
||||
using VideoCommon::ImageCopy;
|
||||
using VideoCommon::ImageType;
|
||||
using VideoCommon::SwizzleParameters;
|
||||
using VideoCommon::Accelerated::MakeBlockLinearSwizzle2DParams;
|
||||
using VideoCommon::Accelerated::MakeBlockLinearSwizzle3DParams;
|
||||
using VideoCore::Surface::BytesPerBlock;
|
||||
|
||||
namespace {
|
||||
|
||||
OGLProgram MakeProgram(std::string_view source) {
|
||||
OGLShader shader;
|
||||
shader.Create(source, GL_COMPUTE_SHADER);
|
||||
|
||||
OGLProgram program;
|
||||
program.Create(true, false, shader.handle);
|
||||
return program;
|
||||
}
|
||||
|
||||
} // Anonymous namespace
|
||||
|
||||
UtilShaders::UtilShaders(ProgramManager& program_manager_)
|
||||
: program_manager{program_manager_},
|
||||
block_linear_unswizzle_2d_program(MakeProgram(BLOCK_LINEAR_UNSWIZZLE_2D_COMP)),
|
||||
block_linear_unswizzle_3d_program(MakeProgram(BLOCK_LINEAR_UNSWIZZLE_3D_COMP)),
|
||||
pitch_unswizzle_program(MakeProgram(PITCH_UNSWIZZLE_COMP)),
|
||||
copy_bc4_program(MakeProgram(OPENGL_COPY_BC4_COMP)) {
|
||||
const auto swizzle_table = Tegra::Texture::MakeSwizzleTable();
|
||||
swizzle_table_buffer.Create();
|
||||
glNamedBufferStorage(swizzle_table_buffer.handle, sizeof(swizzle_table), &swizzle_table, 0);
|
||||
}
|
||||
|
||||
UtilShaders::~UtilShaders() = default;
|
||||
|
||||
void UtilShaders::BlockLinearUpload2D(Image& image, const ImageBufferMap& map, size_t buffer_offset,
|
||||
std::span<const SwizzleParameters> swizzles) {
|
||||
static constexpr Extent3D WORKGROUP_SIZE{32, 32, 1};
|
||||
static constexpr GLuint BINDING_SWIZZLE_BUFFER = 0;
|
||||
static constexpr GLuint BINDING_INPUT_BUFFER = 1;
|
||||
static constexpr GLuint BINDING_OUTPUT_IMAGE = 0;
|
||||
|
||||
program_manager.BindHostCompute(block_linear_unswizzle_2d_program.handle);
|
||||
glFlushMappedNamedBufferRange(map.Handle(), buffer_offset, image.guest_size_bytes);
|
||||
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_SWIZZLE_BUFFER, swizzle_table_buffer.handle);
|
||||
|
||||
const GLenum store_format = StoreFormat(BytesPerBlock(image.info.format));
|
||||
for (const SwizzleParameters& swizzle : swizzles) {
|
||||
const Extent3D num_tiles = swizzle.num_tiles;
|
||||
const size_t input_offset = swizzle.buffer_offset + buffer_offset;
|
||||
|
||||
const u32 num_dispatches_x = Common::DivCeil(num_tiles.width, WORKGROUP_SIZE.width);
|
||||
const u32 num_dispatches_y = Common::DivCeil(num_tiles.height, WORKGROUP_SIZE.height);
|
||||
|
||||
const auto params = MakeBlockLinearSwizzle2DParams(swizzle, image.info);
|
||||
glUniform3uiv(0, 1, params.origin.data());
|
||||
glUniform3iv(1, 1, params.destination.data());
|
||||
glUniform1ui(2, params.bytes_per_block_log2);
|
||||
glUniform1ui(3, params.layer_stride);
|
||||
glUniform1ui(4, params.block_size);
|
||||
glUniform1ui(5, params.x_shift);
|
||||
glUniform1ui(6, params.block_height);
|
||||
glUniform1ui(7, params.block_height_mask);
|
||||
glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_INPUT_BUFFER, map.Handle(),
|
||||
input_offset, image.guest_size_bytes - swizzle.buffer_offset);
|
||||
glBindImageTexture(BINDING_OUTPUT_IMAGE, image.Handle(), swizzle.level, GL_TRUE, 0,
|
||||
GL_WRITE_ONLY, store_format);
|
||||
glDispatchCompute(num_dispatches_x, num_dispatches_y, image.info.resources.layers);
|
||||
}
|
||||
program_manager.RestoreGuestCompute();
|
||||
}
|
||||
|
||||
void UtilShaders::BlockLinearUpload3D(Image& image, const ImageBufferMap& map, size_t buffer_offset,
|
||||
std::span<const SwizzleParameters> swizzles) {
|
||||
static constexpr Extent3D WORKGROUP_SIZE{16, 8, 8};
|
||||
|
||||
static constexpr GLuint BINDING_SWIZZLE_BUFFER = 0;
|
||||
static constexpr GLuint BINDING_INPUT_BUFFER = 1;
|
||||
static constexpr GLuint BINDING_OUTPUT_IMAGE = 0;
|
||||
|
||||
glFlushMappedNamedBufferRange(map.Handle(), buffer_offset, image.guest_size_bytes);
|
||||
program_manager.BindHostCompute(block_linear_unswizzle_3d_program.handle);
|
||||
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_SWIZZLE_BUFFER, swizzle_table_buffer.handle);
|
||||
|
||||
const GLenum store_format = StoreFormat(BytesPerBlock(image.info.format));
|
||||
for (const SwizzleParameters& swizzle : swizzles) {
|
||||
const Extent3D num_tiles = swizzle.num_tiles;
|
||||
const size_t input_offset = swizzle.buffer_offset + buffer_offset;
|
||||
|
||||
const u32 num_dispatches_x = Common::DivCeil(num_tiles.width, WORKGROUP_SIZE.width);
|
||||
const u32 num_dispatches_y = Common::DivCeil(num_tiles.height, WORKGROUP_SIZE.height);
|
||||
const u32 num_dispatches_z = Common::DivCeil(num_tiles.depth, WORKGROUP_SIZE.depth);
|
||||
|
||||
const auto params = MakeBlockLinearSwizzle3DParams(swizzle, image.info);
|
||||
glUniform3uiv(0, 1, params.origin.data());
|
||||
glUniform3iv(1, 1, params.destination.data());
|
||||
glUniform1ui(2, params.bytes_per_block_log2);
|
||||
glUniform1ui(3, params.slice_size);
|
||||
glUniform1ui(4, params.block_size);
|
||||
glUniform1ui(5, params.x_shift);
|
||||
glUniform1ui(6, params.block_height);
|
||||
glUniform1ui(7, params.block_height_mask);
|
||||
glUniform1ui(8, params.block_depth);
|
||||
glUniform1ui(9, params.block_depth_mask);
|
||||
glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_INPUT_BUFFER, map.Handle(),
|
||||
input_offset, image.guest_size_bytes - swizzle.buffer_offset);
|
||||
glBindImageTexture(BINDING_OUTPUT_IMAGE, image.Handle(), swizzle.level, GL_TRUE, 0,
|
||||
GL_WRITE_ONLY, store_format);
|
||||
glDispatchCompute(num_dispatches_x, num_dispatches_y, num_dispatches_z);
|
||||
}
|
||||
program_manager.RestoreGuestCompute();
|
||||
}
|
||||
|
||||
void UtilShaders::PitchUpload(Image& image, const ImageBufferMap& map, size_t buffer_offset,
|
||||
std::span<const SwizzleParameters> swizzles) {
|
||||
static constexpr Extent3D WORKGROUP_SIZE{32, 32, 1};
|
||||
static constexpr GLuint BINDING_INPUT_BUFFER = 0;
|
||||
static constexpr GLuint BINDING_OUTPUT_IMAGE = 0;
|
||||
static constexpr GLuint LOC_ORIGIN = 0;
|
||||
static constexpr GLuint LOC_DESTINATION = 1;
|
||||
static constexpr GLuint LOC_BYTES_PER_BLOCK = 2;
|
||||
static constexpr GLuint LOC_PITCH = 3;
|
||||
|
||||
const u32 bytes_per_block = BytesPerBlock(image.info.format);
|
||||
const GLenum format = StoreFormat(bytes_per_block);
|
||||
const u32 pitch = image.info.pitch;
|
||||
|
||||
UNIMPLEMENTED_IF_MSG(!std::has_single_bit(bytes_per_block),
|
||||
"Non-power of two images are not implemented");
|
||||
|
||||
program_manager.BindHostCompute(pitch_unswizzle_program.handle);
|
||||
glFlushMappedNamedBufferRange(map.Handle(), buffer_offset, image.guest_size_bytes);
|
||||
glUniform2ui(LOC_ORIGIN, 0, 0);
|
||||
glUniform2i(LOC_DESTINATION, 0, 0);
|
||||
glUniform1ui(LOC_BYTES_PER_BLOCK, bytes_per_block);
|
||||
glUniform1ui(LOC_PITCH, pitch);
|
||||
glBindImageTexture(BINDING_OUTPUT_IMAGE, image.Handle(), 0, GL_FALSE, 0, GL_WRITE_ONLY, format);
|
||||
for (const SwizzleParameters& swizzle : swizzles) {
|
||||
const Extent3D num_tiles = swizzle.num_tiles;
|
||||
const size_t input_offset = swizzle.buffer_offset + buffer_offset;
|
||||
|
||||
const u32 num_dispatches_x = Common::DivCeil(num_tiles.width, WORKGROUP_SIZE.width);
|
||||
const u32 num_dispatches_y = Common::DivCeil(num_tiles.height, WORKGROUP_SIZE.height);
|
||||
|
||||
glBindBufferRange(GL_SHADER_STORAGE_BUFFER, BINDING_INPUT_BUFFER, map.Handle(),
|
||||
input_offset, image.guest_size_bytes - swizzle.buffer_offset);
|
||||
glDispatchCompute(num_dispatches_x, num_dispatches_y, 1);
|
||||
}
|
||||
program_manager.RestoreGuestCompute();
|
||||
}
|
||||
|
||||
void UtilShaders::CopyBC4(Image& dst_image, Image& src_image, std::span<const ImageCopy> copies) {
|
||||
static constexpr GLuint BINDING_INPUT_IMAGE = 0;
|
||||
static constexpr GLuint BINDING_OUTPUT_IMAGE = 1;
|
||||
static constexpr GLuint LOC_SRC_OFFSET = 0;
|
||||
static constexpr GLuint LOC_DST_OFFSET = 1;
|
||||
|
||||
program_manager.BindHostCompute(copy_bc4_program.handle);
|
||||
|
||||
for (const ImageCopy& copy : copies) {
|
||||
ASSERT(copy.src_subresource.base_layer == 0);
|
||||
ASSERT(copy.src_subresource.num_layers == 1);
|
||||
ASSERT(copy.dst_subresource.base_layer == 0);
|
||||
ASSERT(copy.dst_subresource.num_layers == 1);
|
||||
|
||||
glUniform3ui(LOC_SRC_OFFSET, copy.src_offset.x, copy.src_offset.y, copy.src_offset.z);
|
||||
glUniform3ui(LOC_DST_OFFSET, copy.dst_offset.x, copy.dst_offset.y, copy.dst_offset.z);
|
||||
glBindImageTexture(BINDING_INPUT_IMAGE, src_image.Handle(), copy.src_subresource.base_level,
|
||||
GL_FALSE, 0, GL_READ_ONLY, GL_RG32UI);
|
||||
glBindImageTexture(BINDING_OUTPUT_IMAGE, dst_image.Handle(),
|
||||
copy.dst_subresource.base_level, GL_FALSE, 0, GL_WRITE_ONLY, GL_RGBA8UI);
|
||||
glDispatchCompute(copy.extent.width, copy.extent.height, copy.extent.depth);
|
||||
}
|
||||
program_manager.RestoreGuestCompute();
|
||||
}
|
||||
|
||||
GLenum StoreFormat(u32 bytes_per_block) {
|
||||
switch (bytes_per_block) {
|
||||
case 1:
|
||||
return GL_R8UI;
|
||||
case 2:
|
||||
return GL_R16UI;
|
||||
case 4:
|
||||
return GL_R32UI;
|
||||
case 8:
|
||||
return GL_RG32UI;
|
||||
case 16:
|
||||
return GL_RGBA32UI;
|
||||
}
|
||||
UNREACHABLE();
|
||||
return GL_R8UI;
|
||||
}
|
||||
|
||||
} // namespace OpenGL
|
51
src/video_core/renderer_opengl/util_shaders.h
Normal file
51
src/video_core/renderer_opengl/util_shaders.h
Normal file
|
@ -0,0 +1,51 @@
|
|||
// Copyright 2020 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <span>
|
||||
|
||||
#include <glad/glad.h>
|
||||
|
||||
#include "common/common_types.h"
|
||||
#include "video_core/renderer_opengl/gl_resource_manager.h"
|
||||
#include "video_core/texture_cache/types.h"
|
||||
|
||||
namespace OpenGL {
|
||||
|
||||
class Image;
|
||||
class ImageBufferMap;
|
||||
class ProgramManager;
|
||||
|
||||
class UtilShaders {
|
||||
public:
|
||||
explicit UtilShaders(ProgramManager& program_manager);
|
||||
~UtilShaders();
|
||||
|
||||
void BlockLinearUpload2D(Image& image, const ImageBufferMap& map, size_t buffer_offset,
|
||||
std::span<const VideoCommon::SwizzleParameters> swizzles);
|
||||
|
||||
void BlockLinearUpload3D(Image& image, const ImageBufferMap& map, size_t buffer_offset,
|
||||
std::span<const VideoCommon::SwizzleParameters> swizzles);
|
||||
|
||||
void PitchUpload(Image& image, const ImageBufferMap& map, size_t buffer_offset,
|
||||
std::span<const VideoCommon::SwizzleParameters> swizzles);
|
||||
|
||||
void CopyBC4(Image& dst_image, Image& src_image,
|
||||
std::span<const VideoCommon::ImageCopy> copies);
|
||||
|
||||
private:
|
||||
ProgramManager& program_manager;
|
||||
|
||||
OGLBuffer swizzle_table_buffer;
|
||||
|
||||
OGLProgram block_linear_unswizzle_2d_program;
|
||||
OGLProgram block_linear_unswizzle_3d_program;
|
||||
OGLProgram pitch_unswizzle_program;
|
||||
OGLProgram copy_bc4_program;
|
||||
};
|
||||
|
||||
GLenum StoreFormat(u32 bytes_per_block);
|
||||
|
||||
} // namespace OpenGL
|
|
@ -1,42 +0,0 @@
|
|||
// Copyright 2014 Citra Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include <fmt/format.h>
|
||||
#include <glad/glad.h>
|
||||
|
||||
#include "common/common_types.h"
|
||||
#include "video_core/renderer_opengl/gl_state_tracker.h"
|
||||
#include "video_core/renderer_opengl/utils.h"
|
||||
|
||||
namespace OpenGL {
|
||||
|
||||
void LabelGLObject(GLenum identifier, GLuint handle, VAddr addr, std::string_view extra_info) {
|
||||
if (!GLAD_GL_KHR_debug) {
|
||||
// We don't need to throw an error as this is just for debugging
|
||||
return;
|
||||
}
|
||||
|
||||
std::string object_label;
|
||||
if (extra_info.empty()) {
|
||||
switch (identifier) {
|
||||
case GL_TEXTURE:
|
||||
object_label = fmt::format("Texture@0x{:016X}", addr);
|
||||
break;
|
||||
case GL_PROGRAM:
|
||||
object_label = fmt::format("Shader@0x{:016X}", addr);
|
||||
break;
|
||||
default:
|
||||
object_label = fmt::format("Object(0x{:X})@0x{:016X}", identifier, addr);
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
object_label = fmt::format("{}@0x{:016X}", extra_info, addr);
|
||||
}
|
||||
glObjectLabel(identifier, handle, -1, static_cast<const GLchar*>(object_label.c_str()));
|
||||
}
|
||||
|
||||
} // namespace OpenGL
|
|
@ -1,16 +0,0 @@
|
|||
// Copyright 2014 Citra Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <string_view>
|
||||
#include <vector>
|
||||
#include <glad/glad.h>
|
||||
#include "common/common_types.h"
|
||||
|
||||
namespace OpenGL {
|
||||
|
||||
void LabelGLObject(GLenum identifier, GLuint handle, VAddr addr, std::string_view extra_info = {});
|
||||
|
||||
} // namespace OpenGL
|
624
src/video_core/renderer_vulkan/blit_image.cpp
Normal file
624
src/video_core/renderer_vulkan/blit_image.cpp
Normal file
|
@ -0,0 +1,624 @@
|
|||
// Copyright 2020 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include <algorithm>
|
||||
|
||||
#include "video_core/host_shaders/convert_depth_to_float_frag_spv.h"
|
||||
#include "video_core/host_shaders/convert_float_to_depth_frag_spv.h"
|
||||
#include "video_core/host_shaders/full_screen_triangle_vert_spv.h"
|
||||
#include "video_core/host_shaders/vulkan_blit_color_float_frag_spv.h"
|
||||
#include "video_core/host_shaders/vulkan_blit_depth_stencil_frag_spv.h"
|
||||
#include "video_core/renderer_vulkan/blit_image.h"
|
||||
#include "video_core/renderer_vulkan/maxwell_to_vk.h"
|
||||
#include "video_core/renderer_vulkan/vk_device.h"
|
||||
#include "video_core/renderer_vulkan/vk_scheduler.h"
|
||||
#include "video_core/renderer_vulkan/vk_shader_util.h"
|
||||
#include "video_core/renderer_vulkan/vk_state_tracker.h"
|
||||
#include "video_core/renderer_vulkan/vk_texture_cache.h"
|
||||
#include "video_core/renderer_vulkan/vk_update_descriptor.h"
|
||||
#include "video_core/renderer_vulkan/wrapper.h"
|
||||
#include "video_core/surface.h"
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
using VideoCommon::ImageViewType;
|
||||
|
||||
namespace {
|
||||
struct PushConstants {
|
||||
std::array<float, 2> tex_scale;
|
||||
std::array<float, 2> tex_offset;
|
||||
};
|
||||
|
||||
template <u32 binding>
|
||||
inline constexpr VkDescriptorSetLayoutBinding TEXTURE_DESCRIPTOR_SET_LAYOUT_BINDING{
|
||||
.binding = binding,
|
||||
.descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
|
||||
.descriptorCount = 1,
|
||||
.stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT,
|
||||
.pImmutableSamplers = nullptr,
|
||||
};
|
||||
constexpr std::array TWO_TEXTURES_DESCRIPTOR_SET_LAYOUT_BINDINGS{
|
||||
TEXTURE_DESCRIPTOR_SET_LAYOUT_BINDING<0>,
|
||||
TEXTURE_DESCRIPTOR_SET_LAYOUT_BINDING<1>,
|
||||
};
|
||||
constexpr VkDescriptorSetLayoutCreateInfo ONE_TEXTURE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO{
|
||||
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
|
||||
.pNext = nullptr,
|
||||
.flags = 0,
|
||||
.bindingCount = 1,
|
||||
.pBindings = &TEXTURE_DESCRIPTOR_SET_LAYOUT_BINDING<0>,
|
||||
};
|
||||
constexpr VkDescriptorSetLayoutCreateInfo TWO_TEXTURES_DESCRIPTOR_SET_LAYOUT_CREATE_INFO{
|
||||
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
|
||||
.pNext = nullptr,
|
||||
.flags = 0,
|
||||
.bindingCount = static_cast<u32>(TWO_TEXTURES_DESCRIPTOR_SET_LAYOUT_BINDINGS.size()),
|
||||
.pBindings = TWO_TEXTURES_DESCRIPTOR_SET_LAYOUT_BINDINGS.data(),
|
||||
};
|
||||
constexpr VkPushConstantRange PUSH_CONSTANT_RANGE{
|
||||
.stageFlags = VK_SHADER_STAGE_VERTEX_BIT,
|
||||
.offset = 0,
|
||||
.size = sizeof(PushConstants),
|
||||
};
|
||||
constexpr VkPipelineVertexInputStateCreateInfo PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO{
|
||||
.sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
|
||||
.pNext = nullptr,
|
||||
.flags = 0,
|
||||
.vertexBindingDescriptionCount = 0,
|
||||
.pVertexBindingDescriptions = nullptr,
|
||||
.vertexAttributeDescriptionCount = 0,
|
||||
.pVertexAttributeDescriptions = nullptr,
|
||||
};
|
||||
constexpr VkPipelineInputAssemblyStateCreateInfo PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO{
|
||||
.sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,
|
||||
.pNext = nullptr,
|
||||
.flags = 0,
|
||||
.topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST,
|
||||
.primitiveRestartEnable = VK_FALSE,
|
||||
};
|
||||
constexpr VkPipelineViewportStateCreateInfo PIPELINE_VIEWPORT_STATE_CREATE_INFO{
|
||||
.sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,
|
||||
.pNext = nullptr,
|
||||
.flags = 0,
|
||||
.viewportCount = 1,
|
||||
.pViewports = nullptr,
|
||||
.scissorCount = 1,
|
||||
.pScissors = nullptr,
|
||||
};
|
||||
constexpr VkPipelineRasterizationStateCreateInfo PIPELINE_RASTERIZATION_STATE_CREATE_INFO{
|
||||
.sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
|
||||
.pNext = nullptr,
|
||||
.flags = 0,
|
||||
.depthClampEnable = VK_FALSE,
|
||||
.rasterizerDiscardEnable = VK_FALSE,
|
||||
.polygonMode = VK_POLYGON_MODE_FILL,
|
||||
.cullMode = VK_CULL_MODE_BACK_BIT,
|
||||
.frontFace = VK_FRONT_FACE_CLOCKWISE,
|
||||
.depthBiasEnable = VK_FALSE,
|
||||
.depthBiasConstantFactor = 0.0f,
|
||||
.depthBiasClamp = 0.0f,
|
||||
.depthBiasSlopeFactor = 0.0f,
|
||||
.lineWidth = 1.0f,
|
||||
};
|
||||
constexpr VkPipelineMultisampleStateCreateInfo PIPELINE_MULTISAMPLE_STATE_CREATE_INFO{
|
||||
.sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
|
||||
.pNext = nullptr,
|
||||
.flags = 0,
|
||||
.rasterizationSamples = VK_SAMPLE_COUNT_1_BIT,
|
||||
.sampleShadingEnable = VK_FALSE,
|
||||
.minSampleShading = 0.0f,
|
||||
.pSampleMask = nullptr,
|
||||
.alphaToCoverageEnable = VK_FALSE,
|
||||
.alphaToOneEnable = VK_FALSE,
|
||||
};
|
||||
constexpr std::array DYNAMIC_STATES{
|
||||
VK_DYNAMIC_STATE_VIEWPORT,
|
||||
VK_DYNAMIC_STATE_SCISSOR,
|
||||
};
|
||||
constexpr VkPipelineDynamicStateCreateInfo PIPELINE_DYNAMIC_STATE_CREATE_INFO{
|
||||
.sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,
|
||||
.pNext = nullptr,
|
||||
.flags = 0,
|
||||
.dynamicStateCount = static_cast<u32>(DYNAMIC_STATES.size()),
|
||||
.pDynamicStates = DYNAMIC_STATES.data(),
|
||||
};
|
||||
constexpr VkPipelineColorBlendStateCreateInfo PIPELINE_COLOR_BLEND_STATE_EMPTY_CREATE_INFO{
|
||||
.sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
|
||||
.pNext = nullptr,
|
||||
.flags = 0,
|
||||
.logicOpEnable = VK_FALSE,
|
||||
.logicOp = VK_LOGIC_OP_CLEAR,
|
||||
.attachmentCount = 0,
|
||||
.pAttachments = nullptr,
|
||||
.blendConstants = {0.0f, 0.0f, 0.0f, 0.0f},
|
||||
};
|
||||
constexpr VkPipelineColorBlendAttachmentState PIPELINE_COLOR_BLEND_ATTACHMENT_STATE{
|
||||
.blendEnable = VK_FALSE,
|
||||
.srcColorBlendFactor = VK_BLEND_FACTOR_ZERO,
|
||||
.dstColorBlendFactor = VK_BLEND_FACTOR_ZERO,
|
||||
.colorBlendOp = VK_BLEND_OP_ADD,
|
||||
.srcAlphaBlendFactor = VK_BLEND_FACTOR_ZERO,
|
||||
.dstAlphaBlendFactor = VK_BLEND_FACTOR_ZERO,
|
||||
.alphaBlendOp = VK_BLEND_OP_ADD,
|
||||
.colorWriteMask = VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT |
|
||||
VK_COLOR_COMPONENT_B_BIT | VK_COLOR_COMPONENT_A_BIT,
|
||||
};
|
||||
constexpr VkPipelineColorBlendStateCreateInfo PIPELINE_COLOR_BLEND_STATE_GENERIC_CREATE_INFO{
|
||||
.sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
|
||||
.pNext = nullptr,
|
||||
.flags = 0,
|
||||
.logicOpEnable = VK_FALSE,
|
||||
.logicOp = VK_LOGIC_OP_CLEAR,
|
||||
.attachmentCount = 1,
|
||||
.pAttachments = &PIPELINE_COLOR_BLEND_ATTACHMENT_STATE,
|
||||
.blendConstants = {0.0f, 0.0f, 0.0f, 0.0f},
|
||||
};
|
||||
constexpr VkPipelineDepthStencilStateCreateInfo PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO{
|
||||
.sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,
|
||||
.pNext = nullptr,
|
||||
.flags = 0,
|
||||
.depthTestEnable = VK_TRUE,
|
||||
.depthWriteEnable = VK_TRUE,
|
||||
.depthCompareOp = VK_COMPARE_OP_ALWAYS,
|
||||
.depthBoundsTestEnable = VK_FALSE,
|
||||
.stencilTestEnable = VK_FALSE,
|
||||
.front = VkStencilOpState{},
|
||||
.back = VkStencilOpState{},
|
||||
.minDepthBounds = 0.0f,
|
||||
.maxDepthBounds = 0.0f,
|
||||
};
|
||||
|
||||
template <VkFilter filter>
|
||||
inline constexpr VkSamplerCreateInfo SAMPLER_CREATE_INFO{
|
||||
.sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO,
|
||||
.pNext = nullptr,
|
||||
.flags = 0,
|
||||
.magFilter = filter,
|
||||
.minFilter = filter,
|
||||
.mipmapMode = VK_SAMPLER_MIPMAP_MODE_NEAREST,
|
||||
.addressModeU = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER,
|
||||
.addressModeV = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER,
|
||||
.addressModeW = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER,
|
||||
.mipLodBias = 0.0f,
|
||||
.anisotropyEnable = VK_FALSE,
|
||||
.maxAnisotropy = 0.0f,
|
||||
.compareEnable = VK_FALSE,
|
||||
.compareOp = VK_COMPARE_OP_NEVER,
|
||||
.minLod = 0.0f,
|
||||
.maxLod = 0.0f,
|
||||
.borderColor = VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE,
|
||||
.unnormalizedCoordinates = VK_TRUE,
|
||||
};
|
||||
|
||||
constexpr VkPipelineLayoutCreateInfo PipelineLayoutCreateInfo(
|
||||
const VkDescriptorSetLayout* set_layout) {
|
||||
return VkPipelineLayoutCreateInfo{
|
||||
.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
|
||||
.pNext = nullptr,
|
||||
.flags = 0,
|
||||
.setLayoutCount = 1,
|
||||
.pSetLayouts = set_layout,
|
||||
.pushConstantRangeCount = 1,
|
||||
.pPushConstantRanges = &PUSH_CONSTANT_RANGE,
|
||||
};
|
||||
}
|
||||
|
||||
constexpr VkPipelineShaderStageCreateInfo PipelineShaderStageCreateInfo(VkShaderStageFlagBits stage,
|
||||
VkShaderModule shader) {
|
||||
return VkPipelineShaderStageCreateInfo{
|
||||
.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
|
||||
.pNext = nullptr,
|
||||
.flags = 0,
|
||||
.stage = stage,
|
||||
.module = shader,
|
||||
.pName = "main",
|
||||
.pSpecializationInfo = nullptr,
|
||||
};
|
||||
}
|
||||
|
||||
constexpr std::array<VkPipelineShaderStageCreateInfo, 2> MakeStages(
|
||||
VkShaderModule vertex_shader, VkShaderModule fragment_shader) {
|
||||
return std::array{
|
||||
PipelineShaderStageCreateInfo(VK_SHADER_STAGE_VERTEX_BIT, vertex_shader),
|
||||
PipelineShaderStageCreateInfo(VK_SHADER_STAGE_FRAGMENT_BIT, fragment_shader),
|
||||
};
|
||||
}
|
||||
|
||||
void UpdateOneTextureDescriptorSet(const VKDevice& device, VkDescriptorSet descriptor_set,
|
||||
VkSampler sampler, VkImageView image_view) {
|
||||
const VkDescriptorImageInfo image_info{
|
||||
.sampler = sampler,
|
||||
.imageView = image_view,
|
||||
.imageLayout = VK_IMAGE_LAYOUT_GENERAL,
|
||||
};
|
||||
const VkWriteDescriptorSet write_descriptor_set{
|
||||
.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
|
||||
.pNext = nullptr,
|
||||
.dstSet = descriptor_set,
|
||||
.dstBinding = 0,
|
||||
.dstArrayElement = 0,
|
||||
.descriptorCount = 1,
|
||||
.descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
|
||||
.pImageInfo = &image_info,
|
||||
.pBufferInfo = nullptr,
|
||||
.pTexelBufferView = nullptr,
|
||||
};
|
||||
device.GetLogical().UpdateDescriptorSets(write_descriptor_set, nullptr);
|
||||
}
|
||||
|
||||
void UpdateTwoTexturesDescriptorSet(const VKDevice& device, VkDescriptorSet descriptor_set,
|
||||
VkSampler sampler, VkImageView image_view_0,
|
||||
VkImageView image_view_1) {
|
||||
const VkDescriptorImageInfo image_info_0{
|
||||
.sampler = sampler,
|
||||
.imageView = image_view_0,
|
||||
.imageLayout = VK_IMAGE_LAYOUT_GENERAL,
|
||||
};
|
||||
const VkDescriptorImageInfo image_info_1{
|
||||
.sampler = sampler,
|
||||
.imageView = image_view_1,
|
||||
.imageLayout = VK_IMAGE_LAYOUT_GENERAL,
|
||||
};
|
||||
const std::array write_descriptor_sets{
|
||||
VkWriteDescriptorSet{
|
||||
.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
|
||||
.pNext = nullptr,
|
||||
.dstSet = descriptor_set,
|
||||
.dstBinding = 0,
|
||||
.dstArrayElement = 0,
|
||||
.descriptorCount = 1,
|
||||
.descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
|
||||
.pImageInfo = &image_info_0,
|
||||
.pBufferInfo = nullptr,
|
||||
.pTexelBufferView = nullptr,
|
||||
},
|
||||
VkWriteDescriptorSet{
|
||||
.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
|
||||
.pNext = nullptr,
|
||||
.dstSet = descriptor_set,
|
||||
.dstBinding = 1,
|
||||
.dstArrayElement = 0,
|
||||
.descriptorCount = 1,
|
||||
.descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
|
||||
.pImageInfo = &image_info_1,
|
||||
.pBufferInfo = nullptr,
|
||||
.pTexelBufferView = nullptr,
|
||||
},
|
||||
};
|
||||
device.GetLogical().UpdateDescriptorSets(write_descriptor_sets, nullptr);
|
||||
}
|
||||
|
||||
void BindBlitState(vk::CommandBuffer cmdbuf, VkPipelineLayout layout,
|
||||
const std::array<Offset2D, 2>& dst_region,
|
||||
const std::array<Offset2D, 2>& src_region) {
|
||||
const VkOffset2D offset{
|
||||
.x = std::min(dst_region[0].x, dst_region[1].x),
|
||||
.y = std::min(dst_region[0].y, dst_region[1].y),
|
||||
};
|
||||
const VkExtent2D extent{
|
||||
.width = static_cast<u32>(std::abs(dst_region[1].x - dst_region[0].x)),
|
||||
.height = static_cast<u32>(std::abs(dst_region[1].y - dst_region[0].y)),
|
||||
};
|
||||
const VkViewport viewport{
|
||||
.x = static_cast<float>(offset.x),
|
||||
.y = static_cast<float>(offset.y),
|
||||
.width = static_cast<float>(extent.width),
|
||||
.height = static_cast<float>(extent.height),
|
||||
.minDepth = 0.0f,
|
||||
.maxDepth = 1.0f,
|
||||
};
|
||||
// TODO: Support scissored blits
|
||||
const VkRect2D scissor{
|
||||
.offset = offset,
|
||||
.extent = extent,
|
||||
};
|
||||
const float scale_x = static_cast<float>(src_region[1].x - src_region[0].x);
|
||||
const float scale_y = static_cast<float>(src_region[1].y - src_region[0].y);
|
||||
const PushConstants push_constants{
|
||||
.tex_scale = {scale_x, scale_y},
|
||||
.tex_offset = {static_cast<float>(src_region[0].x), static_cast<float>(src_region[0].y)},
|
||||
};
|
||||
cmdbuf.SetViewport(0, viewport);
|
||||
cmdbuf.SetScissor(0, scissor);
|
||||
cmdbuf.PushConstants(layout, VK_SHADER_STAGE_VERTEX_BIT, push_constants);
|
||||
}
|
||||
|
||||
} // Anonymous namespace
|
||||
|
||||
BlitImageHelper::BlitImageHelper(const VKDevice& device_, VKScheduler& scheduler_,
|
||||
StateTracker& state_tracker_, VKDescriptorPool& descriptor_pool)
|
||||
: device{device_}, scheduler{scheduler_}, state_tracker{state_tracker_},
|
||||
one_texture_set_layout(device.GetLogical().CreateDescriptorSetLayout(
|
||||
ONE_TEXTURE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO)),
|
||||
two_textures_set_layout(device.GetLogical().CreateDescriptorSetLayout(
|
||||
TWO_TEXTURES_DESCRIPTOR_SET_LAYOUT_CREATE_INFO)),
|
||||
one_texture_descriptor_allocator(descriptor_pool, *one_texture_set_layout),
|
||||
two_textures_descriptor_allocator(descriptor_pool, *two_textures_set_layout),
|
||||
one_texture_pipeline_layout(device.GetLogical().CreatePipelineLayout(
|
||||
PipelineLayoutCreateInfo(one_texture_set_layout.address()))),
|
||||
two_textures_pipeline_layout(device.GetLogical().CreatePipelineLayout(
|
||||
PipelineLayoutCreateInfo(two_textures_set_layout.address()))),
|
||||
full_screen_vert(BuildShader(device, FULL_SCREEN_TRIANGLE_VERT_SPV)),
|
||||
blit_color_to_color_frag(BuildShader(device, VULKAN_BLIT_COLOR_FLOAT_FRAG_SPV)),
|
||||
convert_depth_to_float_frag(BuildShader(device, CONVERT_DEPTH_TO_FLOAT_FRAG_SPV)),
|
||||
convert_float_to_depth_frag(BuildShader(device, CONVERT_FLOAT_TO_DEPTH_FRAG_SPV)),
|
||||
linear_sampler(device.GetLogical().CreateSampler(SAMPLER_CREATE_INFO<VK_FILTER_LINEAR>)),
|
||||
nearest_sampler(device.GetLogical().CreateSampler(SAMPLER_CREATE_INFO<VK_FILTER_NEAREST>)) {
|
||||
if (device.IsExtShaderStencilExportSupported()) {
|
||||
blit_depth_stencil_frag = BuildShader(device, VULKAN_BLIT_DEPTH_STENCIL_FRAG_SPV);
|
||||
}
|
||||
}
|
||||
|
||||
BlitImageHelper::~BlitImageHelper() = default;
|
||||
|
||||
void BlitImageHelper::BlitColor(const Framebuffer* dst_framebuffer, const ImageView& src_image_view,
|
||||
const std::array<Offset2D, 2>& dst_region,
|
||||
const std::array<Offset2D, 2>& src_region,
|
||||
Tegra::Engines::Fermi2D::Filter filter,
|
||||
Tegra::Engines::Fermi2D::Operation operation) {
|
||||
const bool is_linear = filter == Tegra::Engines::Fermi2D::Filter::Bilinear;
|
||||
const BlitImagePipelineKey key{
|
||||
.renderpass = dst_framebuffer->RenderPass(),
|
||||
.operation = operation,
|
||||
};
|
||||
const VkPipelineLayout layout = *one_texture_pipeline_layout;
|
||||
const VkImageView src_view = src_image_view.Handle(ImageViewType::e2D);
|
||||
const VkSampler sampler = is_linear ? *linear_sampler : *nearest_sampler;
|
||||
const VkPipeline pipeline = FindOrEmplacePipeline(key);
|
||||
const VkDescriptorSet descriptor_set = one_texture_descriptor_allocator.Commit();
|
||||
scheduler.RequestRenderpass(dst_framebuffer);
|
||||
scheduler.Record([dst_region, src_region, pipeline, layout, sampler, src_view, descriptor_set,
|
||||
&device = device](vk::CommandBuffer cmdbuf) {
|
||||
// TODO: Barriers
|
||||
UpdateOneTextureDescriptorSet(device, descriptor_set, sampler, src_view);
|
||||
cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline);
|
||||
cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, layout, 0, descriptor_set,
|
||||
nullptr);
|
||||
BindBlitState(cmdbuf, layout, dst_region, src_region);
|
||||
cmdbuf.Draw(3, 1, 0, 0);
|
||||
});
|
||||
scheduler.InvalidateState();
|
||||
}
|
||||
|
||||
void BlitImageHelper::BlitDepthStencil(const Framebuffer* dst_framebuffer,
|
||||
VkImageView src_depth_view, VkImageView src_stencil_view,
|
||||
const std::array<Offset2D, 2>& dst_region,
|
||||
const std::array<Offset2D, 2>& src_region,
|
||||
Tegra::Engines::Fermi2D::Filter filter,
|
||||
Tegra::Engines::Fermi2D::Operation operation) {
|
||||
ASSERT(filter == Tegra::Engines::Fermi2D::Filter::Point);
|
||||
ASSERT(operation == Tegra::Engines::Fermi2D::Operation::SrcCopy);
|
||||
|
||||
const VkPipelineLayout layout = *two_textures_pipeline_layout;
|
||||
const VkSampler sampler = *nearest_sampler;
|
||||
const VkPipeline pipeline = BlitDepthStencilPipeline(dst_framebuffer->RenderPass());
|
||||
const VkDescriptorSet descriptor_set = two_textures_descriptor_allocator.Commit();
|
||||
scheduler.RequestRenderpass(dst_framebuffer);
|
||||
scheduler.Record([dst_region, src_region, pipeline, layout, sampler, src_depth_view,
|
||||
src_stencil_view, descriptor_set,
|
||||
&device = device](vk::CommandBuffer cmdbuf) {
|
||||
// TODO: Barriers
|
||||
UpdateTwoTexturesDescriptorSet(device, descriptor_set, sampler, src_depth_view,
|
||||
src_stencil_view);
|
||||
cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline);
|
||||
cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, layout, 0, descriptor_set,
|
||||
nullptr);
|
||||
BindBlitState(cmdbuf, layout, dst_region, src_region);
|
||||
cmdbuf.Draw(3, 1, 0, 0);
|
||||
});
|
||||
scheduler.InvalidateState();
|
||||
}
|
||||
|
||||
void BlitImageHelper::ConvertD32ToR32(const Framebuffer* dst_framebuffer,
|
||||
const ImageView& src_image_view) {
|
||||
ConvertDepthToColorPipeline(convert_d32_to_r32_pipeline, dst_framebuffer->RenderPass());
|
||||
Convert(*convert_d32_to_r32_pipeline, dst_framebuffer, src_image_view);
|
||||
}
|
||||
|
||||
void BlitImageHelper::ConvertR32ToD32(const Framebuffer* dst_framebuffer,
|
||||
const ImageView& src_image_view) {
|
||||
|
||||
ConvertColorToDepthPipeline(convert_r32_to_d32_pipeline, dst_framebuffer->RenderPass());
|
||||
Convert(*convert_r32_to_d32_pipeline, dst_framebuffer, src_image_view);
|
||||
}
|
||||
|
||||
void BlitImageHelper::ConvertD16ToR16(const Framebuffer* dst_framebuffer,
|
||||
const ImageView& src_image_view) {
|
||||
ConvertDepthToColorPipeline(convert_d16_to_r16_pipeline, dst_framebuffer->RenderPass());
|
||||
Convert(*convert_d16_to_r16_pipeline, dst_framebuffer, src_image_view);
|
||||
}
|
||||
|
||||
void BlitImageHelper::ConvertR16ToD16(const Framebuffer* dst_framebuffer,
|
||||
const ImageView& src_image_view) {
|
||||
ConvertColorToDepthPipeline(convert_r16_to_d16_pipeline, dst_framebuffer->RenderPass());
|
||||
Convert(*convert_r16_to_d16_pipeline, dst_framebuffer, src_image_view);
|
||||
}
|
||||
|
||||
void BlitImageHelper::Convert(VkPipeline pipeline, const Framebuffer* dst_framebuffer,
|
||||
const ImageView& src_image_view) {
|
||||
const VkPipelineLayout layout = *one_texture_pipeline_layout;
|
||||
const VkImageView src_view = src_image_view.Handle(ImageViewType::e2D);
|
||||
const VkSampler sampler = *nearest_sampler;
|
||||
const VkDescriptorSet descriptor_set = one_texture_descriptor_allocator.Commit();
|
||||
const VkExtent2D extent{
|
||||
.width = src_image_view.size.width,
|
||||
.height = src_image_view.size.height,
|
||||
};
|
||||
scheduler.RequestRenderpass(dst_framebuffer);
|
||||
scheduler.Record([pipeline, layout, sampler, src_view, descriptor_set, extent,
|
||||
&device = device](vk::CommandBuffer cmdbuf) {
|
||||
const VkOffset2D offset{
|
||||
.x = 0,
|
||||
.y = 0,
|
||||
};
|
||||
const VkViewport viewport{
|
||||
.x = 0.0f,
|
||||
.y = 0.0f,
|
||||
.width = static_cast<float>(extent.width),
|
||||
.height = static_cast<float>(extent.height),
|
||||
.minDepth = 0.0f,
|
||||
.maxDepth = 0.0f,
|
||||
};
|
||||
const VkRect2D scissor{
|
||||
.offset = offset,
|
||||
.extent = extent,
|
||||
};
|
||||
const PushConstants push_constants{
|
||||
.tex_scale = {viewport.width, viewport.height},
|
||||
.tex_offset = {0.0f, 0.0f},
|
||||
};
|
||||
UpdateOneTextureDescriptorSet(device, descriptor_set, sampler, src_view);
|
||||
|
||||
// TODO: Barriers
|
||||
cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline);
|
||||
cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, layout, 0, descriptor_set,
|
||||
nullptr);
|
||||
cmdbuf.SetViewport(0, viewport);
|
||||
cmdbuf.SetScissor(0, scissor);
|
||||
cmdbuf.PushConstants(layout, VK_SHADER_STAGE_VERTEX_BIT, push_constants);
|
||||
cmdbuf.Draw(3, 1, 0, 0);
|
||||
});
|
||||
scheduler.InvalidateState();
|
||||
}
|
||||
|
||||
VkPipeline BlitImageHelper::FindOrEmplacePipeline(const BlitImagePipelineKey& key) {
|
||||
const auto it = std::ranges::find(blit_color_keys, key);
|
||||
if (it != blit_color_keys.end()) {
|
||||
return *blit_color_pipelines[std::distance(blit_color_keys.begin(), it)];
|
||||
}
|
||||
blit_color_keys.push_back(key);
|
||||
|
||||
const std::array stages = MakeStages(*full_screen_vert, *blit_color_to_color_frag);
|
||||
const VkPipelineColorBlendAttachmentState blend_attachment{
|
||||
.blendEnable = VK_FALSE,
|
||||
.srcColorBlendFactor = VK_BLEND_FACTOR_ZERO,
|
||||
.dstColorBlendFactor = VK_BLEND_FACTOR_ZERO,
|
||||
.colorBlendOp = VK_BLEND_OP_ADD,
|
||||
.srcAlphaBlendFactor = VK_BLEND_FACTOR_ZERO,
|
||||
.dstAlphaBlendFactor = VK_BLEND_FACTOR_ZERO,
|
||||
.alphaBlendOp = VK_BLEND_OP_ADD,
|
||||
.colorWriteMask = VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT |
|
||||
VK_COLOR_COMPONENT_B_BIT | VK_COLOR_COMPONENT_A_BIT,
|
||||
};
|
||||
// TODO: programmable blending
|
||||
const VkPipelineColorBlendStateCreateInfo color_blend_create_info{
|
||||
.sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
|
||||
.pNext = nullptr,
|
||||
.flags = 0,
|
||||
.logicOpEnable = VK_FALSE,
|
||||
.logicOp = VK_LOGIC_OP_CLEAR,
|
||||
.attachmentCount = 1,
|
||||
.pAttachments = &blend_attachment,
|
||||
.blendConstants = {0.0f, 0.0f, 0.0f, 0.0f},
|
||||
};
|
||||
blit_color_pipelines.push_back(device.GetLogical().CreateGraphicsPipeline({
|
||||
.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
|
||||
.pNext = nullptr,
|
||||
.flags = 0,
|
||||
.stageCount = static_cast<u32>(stages.size()),
|
||||
.pStages = stages.data(),
|
||||
.pVertexInputState = &PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
|
||||
.pInputAssemblyState = &PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,
|
||||
.pTessellationState = nullptr,
|
||||
.pViewportState = &PIPELINE_VIEWPORT_STATE_CREATE_INFO,
|
||||
.pRasterizationState = &PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
|
||||
.pMultisampleState = &PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
|
||||
.pDepthStencilState = nullptr,
|
||||
.pColorBlendState = &color_blend_create_info,
|
||||
.pDynamicState = &PIPELINE_DYNAMIC_STATE_CREATE_INFO,
|
||||
.layout = *one_texture_pipeline_layout,
|
||||
.renderPass = key.renderpass,
|
||||
.subpass = 0,
|
||||
.basePipelineHandle = VK_NULL_HANDLE,
|
||||
.basePipelineIndex = 0,
|
||||
}));
|
||||
return *blit_color_pipelines.back();
|
||||
}
|
||||
|
||||
VkPipeline BlitImageHelper::BlitDepthStencilPipeline(VkRenderPass renderpass) {
|
||||
if (blit_depth_stencil_pipeline) {
|
||||
return *blit_depth_stencil_pipeline;
|
||||
}
|
||||
const std::array stages = MakeStages(*full_screen_vert, *blit_depth_stencil_frag);
|
||||
blit_depth_stencil_pipeline = device.GetLogical().CreateGraphicsPipeline({
|
||||
.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
|
||||
.pNext = nullptr,
|
||||
.flags = 0,
|
||||
.stageCount = static_cast<u32>(stages.size()),
|
||||
.pStages = stages.data(),
|
||||
.pVertexInputState = &PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
|
||||
.pInputAssemblyState = &PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,
|
||||
.pTessellationState = nullptr,
|
||||
.pViewportState = &PIPELINE_VIEWPORT_STATE_CREATE_INFO,
|
||||
.pRasterizationState = &PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
|
||||
.pMultisampleState = &PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
|
||||
.pDepthStencilState = &PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,
|
||||
.pColorBlendState = &PIPELINE_COLOR_BLEND_STATE_EMPTY_CREATE_INFO,
|
||||
.pDynamicState = &PIPELINE_DYNAMIC_STATE_CREATE_INFO,
|
||||
.layout = *two_textures_pipeline_layout,
|
||||
.renderPass = renderpass,
|
||||
.subpass = 0,
|
||||
.basePipelineHandle = VK_NULL_HANDLE,
|
||||
.basePipelineIndex = 0,
|
||||
});
|
||||
return *blit_depth_stencil_pipeline;
|
||||
}
|
||||
|
||||
void BlitImageHelper::ConvertDepthToColorPipeline(vk::Pipeline& pipeline, VkRenderPass renderpass) {
|
||||
if (pipeline) {
|
||||
return;
|
||||
}
|
||||
const std::array stages = MakeStages(*full_screen_vert, *convert_depth_to_float_frag);
|
||||
pipeline = device.GetLogical().CreateGraphicsPipeline({
|
||||
.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
|
||||
.pNext = nullptr,
|
||||
.flags = 0,
|
||||
.stageCount = static_cast<u32>(stages.size()),
|
||||
.pStages = stages.data(),
|
||||
.pVertexInputState = &PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
|
||||
.pInputAssemblyState = &PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,
|
||||
.pTessellationState = nullptr,
|
||||
.pViewportState = &PIPELINE_VIEWPORT_STATE_CREATE_INFO,
|
||||
.pRasterizationState = &PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
|
||||
.pMultisampleState = &PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
|
||||
.pDepthStencilState = nullptr,
|
||||
.pColorBlendState = &PIPELINE_COLOR_BLEND_STATE_GENERIC_CREATE_INFO,
|
||||
.pDynamicState = &PIPELINE_DYNAMIC_STATE_CREATE_INFO,
|
||||
.layout = *one_texture_pipeline_layout,
|
||||
.renderPass = renderpass,
|
||||
.subpass = 0,
|
||||
.basePipelineHandle = VK_NULL_HANDLE,
|
||||
.basePipelineIndex = 0,
|
||||
});
|
||||
}
|
||||
|
||||
void BlitImageHelper::ConvertColorToDepthPipeline(vk::Pipeline& pipeline, VkRenderPass renderpass) {
|
||||
if (pipeline) {
|
||||
return;
|
||||
}
|
||||
const std::array stages = MakeStages(*full_screen_vert, *convert_float_to_depth_frag);
|
||||
pipeline = device.GetLogical().CreateGraphicsPipeline({
|
||||
.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
|
||||
.pNext = nullptr,
|
||||
.flags = 0,
|
||||
.stageCount = static_cast<u32>(stages.size()),
|
||||
.pStages = stages.data(),
|
||||
.pVertexInputState = &PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
|
||||
.pInputAssemblyState = &PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,
|
||||
.pTessellationState = nullptr,
|
||||
.pViewportState = &PIPELINE_VIEWPORT_STATE_CREATE_INFO,
|
||||
.pRasterizationState = &PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
|
||||
.pMultisampleState = &PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
|
||||
.pDepthStencilState = &PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,
|
||||
.pColorBlendState = &PIPELINE_COLOR_BLEND_STATE_EMPTY_CREATE_INFO,
|
||||
.pDynamicState = &PIPELINE_DYNAMIC_STATE_CREATE_INFO,
|
||||
.layout = *one_texture_pipeline_layout,
|
||||
.renderPass = renderpass,
|
||||
.subpass = 0,
|
||||
.basePipelineHandle = VK_NULL_HANDLE,
|
||||
.basePipelineIndex = 0,
|
||||
});
|
||||
}
|
||||
|
||||
} // namespace Vulkan
|
97
src/video_core/renderer_vulkan/blit_image.h
Normal file
97
src/video_core/renderer_vulkan/blit_image.h
Normal file
|
@ -0,0 +1,97 @@
|
|||
// Copyright 2020 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <compare>
|
||||
|
||||
#include "video_core/engines/fermi_2d.h"
|
||||
#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
|
||||
#include "video_core/renderer_vulkan/wrapper.h"
|
||||
#include "video_core/texture_cache/types.h"
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
using VideoCommon::Offset2D;
|
||||
|
||||
class VKDevice;
|
||||
class VKScheduler;
|
||||
class StateTracker;
|
||||
|
||||
class Framebuffer;
|
||||
class ImageView;
|
||||
|
||||
struct BlitImagePipelineKey {
|
||||
constexpr auto operator<=>(const BlitImagePipelineKey&) const noexcept = default;
|
||||
|
||||
VkRenderPass renderpass;
|
||||
Tegra::Engines::Fermi2D::Operation operation;
|
||||
};
|
||||
|
||||
class BlitImageHelper {
|
||||
public:
|
||||
explicit BlitImageHelper(const VKDevice& device, VKScheduler& scheduler,
|
||||
StateTracker& state_tracker, VKDescriptorPool& descriptor_pool);
|
||||
~BlitImageHelper();
|
||||
|
||||
void BlitColor(const Framebuffer* dst_framebuffer, const ImageView& src_image_view,
|
||||
const std::array<Offset2D, 2>& dst_region,
|
||||
const std::array<Offset2D, 2>& src_region,
|
||||
Tegra::Engines::Fermi2D::Filter filter,
|
||||
Tegra::Engines::Fermi2D::Operation operation);
|
||||
|
||||
void BlitDepthStencil(const Framebuffer* dst_framebuffer, VkImageView src_depth_view,
|
||||
VkImageView src_stencil_view, const std::array<Offset2D, 2>& dst_region,
|
||||
const std::array<Offset2D, 2>& src_region,
|
||||
Tegra::Engines::Fermi2D::Filter filter,
|
||||
Tegra::Engines::Fermi2D::Operation operation);
|
||||
|
||||
void ConvertD32ToR32(const Framebuffer* dst_framebuffer, const ImageView& src_image_view);
|
||||
|
||||
void ConvertR32ToD32(const Framebuffer* dst_framebuffer, const ImageView& src_image_view);
|
||||
|
||||
void ConvertD16ToR16(const Framebuffer* dst_framebuffer, const ImageView& src_image_view);
|
||||
|
||||
void ConvertR16ToD16(const Framebuffer* dst_framebuffer, const ImageView& src_image_view);
|
||||
|
||||
private:
|
||||
void Convert(VkPipeline pipeline, const Framebuffer* dst_framebuffer,
|
||||
const ImageView& src_image_view);
|
||||
|
||||
[[nodiscard]] VkPipeline FindOrEmplacePipeline(const BlitImagePipelineKey& key);
|
||||
|
||||
[[nodiscard]] VkPipeline BlitDepthStencilPipeline(VkRenderPass renderpass);
|
||||
|
||||
void ConvertDepthToColorPipeline(vk::Pipeline& pipeline, VkRenderPass renderpass);
|
||||
|
||||
void ConvertColorToDepthPipeline(vk::Pipeline& pipeline, VkRenderPass renderpass);
|
||||
|
||||
const VKDevice& device;
|
||||
VKScheduler& scheduler;
|
||||
StateTracker& state_tracker;
|
||||
|
||||
vk::DescriptorSetLayout one_texture_set_layout;
|
||||
vk::DescriptorSetLayout two_textures_set_layout;
|
||||
DescriptorAllocator one_texture_descriptor_allocator;
|
||||
DescriptorAllocator two_textures_descriptor_allocator;
|
||||
vk::PipelineLayout one_texture_pipeline_layout;
|
||||
vk::PipelineLayout two_textures_pipeline_layout;
|
||||
vk::ShaderModule full_screen_vert;
|
||||
vk::ShaderModule blit_color_to_color_frag;
|
||||
vk::ShaderModule blit_depth_stencil_frag;
|
||||
vk::ShaderModule convert_depth_to_float_frag;
|
||||
vk::ShaderModule convert_float_to_depth_frag;
|
||||
vk::Sampler linear_sampler;
|
||||
vk::Sampler nearest_sampler;
|
||||
|
||||
std::vector<BlitImagePipelineKey> blit_color_keys;
|
||||
std::vector<vk::Pipeline> blit_color_pipelines;
|
||||
vk::Pipeline blit_depth_stencil_pipeline;
|
||||
vk::Pipeline convert_d32_to_r32_pipeline;
|
||||
vk::Pipeline convert_r32_to_d32_pipeline;
|
||||
vk::Pipeline convert_d16_to_r16_pipeline;
|
||||
vk::Pipeline convert_r16_to_d16_pipeline;
|
||||
};
|
||||
|
||||
} // namespace Vulkan
|
|
@ -60,6 +60,7 @@ void FixedPipelineState::Fill(const Maxwell& regs, bool has_extended_dynamic_sta
|
|||
logic_op.Assign(PackLogicOp(regs.logic_op.operation));
|
||||
rasterize_enable.Assign(regs.rasterize_enable != 0 ? 1 : 0);
|
||||
topology.Assign(regs.draw.topology);
|
||||
msaa_mode.Assign(regs.multisample_mode);
|
||||
|
||||
raw2 = 0;
|
||||
const auto test_func =
|
||||
|
|
|
@ -186,6 +186,7 @@ struct FixedPipelineState {
|
|||
BitField<19, 4, u32> logic_op;
|
||||
BitField<23, 1, u32> rasterize_enable;
|
||||
BitField<24, 4, Maxwell::PrimitiveTopology> topology;
|
||||
BitField<28, 4, Tegra::Texture::MsaaMode> msaa_mode;
|
||||
};
|
||||
union {
|
||||
u32 raw2;
|
||||
|
|
|
@ -122,7 +122,7 @@ struct FormatTuple {
|
|||
{VK_FORMAT_A8B8G8R8_SINT_PACK32, Attachable | Storage}, // A8B8G8R8_SINT
|
||||
{VK_FORMAT_A8B8G8R8_UINT_PACK32, Attachable | Storage}, // A8B8G8R8_UINT
|
||||
{VK_FORMAT_R5G6B5_UNORM_PACK16, Attachable}, // R5G6B5_UNORM
|
||||
{VK_FORMAT_B5G6R5_UNORM_PACK16, Attachable}, // B5G6R5_UNORM
|
||||
{VK_FORMAT_B5G6R5_UNORM_PACK16}, // B5G6R5_UNORM
|
||||
{VK_FORMAT_A1R5G5B5_UNORM_PACK16, Attachable}, // A1R5G5B5_UNORM
|
||||
{VK_FORMAT_A2B10G10R10_UNORM_PACK32, Attachable | Storage}, // A2B10G10R10_UNORM
|
||||
{VK_FORMAT_A2B10G10R10_UINT_PACK32, Attachable | Storage}, // A2B10G10R10_UINT
|
||||
|
@ -163,7 +163,7 @@ struct FormatTuple {
|
|||
{VK_FORMAT_R16G16_UNORM, Attachable | Storage}, // R16G16_UNORM
|
||||
{VK_FORMAT_R16G16_SFLOAT, Attachable | Storage}, // R16G16_FLOAT
|
||||
{VK_FORMAT_UNDEFINED}, // R16G16_UINT
|
||||
{VK_FORMAT_UNDEFINED}, // R16G16_SINT
|
||||
{VK_FORMAT_R16G16_SINT, Attachable | Storage}, // R16G16_SINT
|
||||
{VK_FORMAT_R16G16_SNORM, Attachable | Storage}, // R16G16_SNORM
|
||||
{VK_FORMAT_UNDEFINED}, // R32G32B32_FLOAT
|
||||
{VK_FORMAT_R8G8B8A8_SRGB, Attachable}, // A8B8G8R8_SRGB
|
||||
|
@ -233,18 +233,20 @@ FormatInfo SurfaceFormat(const VKDevice& device, FormatType format_type, PixelFo
|
|||
|
||||
// Use A8B8G8R8_UNORM on hardware that doesn't support ASTC natively
|
||||
if (!device.IsOptimalAstcSupported() && VideoCore::Surface::IsPixelFormatASTC(pixel_format)) {
|
||||
tuple.format = VideoCore::Surface::IsPixelFormatSRGB(pixel_format)
|
||||
? VK_FORMAT_A8B8G8R8_SRGB_PACK32
|
||||
: VK_FORMAT_A8B8G8R8_UNORM_PACK32;
|
||||
const bool is_srgb = VideoCore::Surface::IsPixelFormatSRGB(pixel_format);
|
||||
tuple.format = is_srgb ? VK_FORMAT_A8B8G8R8_SRGB_PACK32 : VK_FORMAT_A8B8G8R8_UNORM_PACK32;
|
||||
}
|
||||
const bool attachable = tuple.usage & Attachable;
|
||||
const bool storage = tuple.usage & Storage;
|
||||
|
||||
VkFormatFeatureFlags usage;
|
||||
if (format_type == FormatType::Buffer) {
|
||||
switch (format_type) {
|
||||
case FormatType::Buffer:
|
||||
usage =
|
||||
VK_FORMAT_FEATURE_STORAGE_TEXEL_BUFFER_BIT | VK_FORMAT_FEATURE_UNIFORM_TEXEL_BUFFER_BIT;
|
||||
} else {
|
||||
break;
|
||||
case FormatType::Linear:
|
||||
case FormatType::Optimal:
|
||||
usage = VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT | VK_FORMAT_FEATURE_TRANSFER_DST_BIT |
|
||||
VK_FORMAT_FEATURE_TRANSFER_SRC_BIT;
|
||||
if (attachable) {
|
||||
|
@ -254,6 +256,7 @@ FormatInfo SurfaceFormat(const VKDevice& device, FormatType format_type, PixelFo
|
|||
if (storage) {
|
||||
usage |= VK_FORMAT_FEATURE_STORAGE_IMAGE_BIT;
|
||||
}
|
||||
break;
|
||||
}
|
||||
return {device.GetSupportedFormat(tuple.format, usage, format_type), attachable, storage};
|
||||
}
|
||||
|
@ -724,4 +727,17 @@ VkViewportCoordinateSwizzleNV ViewportSwizzle(Maxwell::ViewportSwizzle swizzle)
|
|||
return {};
|
||||
}
|
||||
|
||||
VkSamplerReductionMode SamplerReduction(Tegra::Texture::SamplerReduction reduction) {
|
||||
switch (reduction) {
|
||||
case Tegra::Texture::SamplerReduction::WeightedAverage:
|
||||
return VK_SAMPLER_REDUCTION_MODE_WEIGHTED_AVERAGE_EXT;
|
||||
case Tegra::Texture::SamplerReduction::Min:
|
||||
return VK_SAMPLER_REDUCTION_MODE_MIN_EXT;
|
||||
case Tegra::Texture::SamplerReduction::Max:
|
||||
return VK_SAMPLER_REDUCTION_MODE_MAX_EXT;
|
||||
}
|
||||
UNREACHABLE_MSG("Invalid sampler mode={}", static_cast<int>(reduction));
|
||||
return VK_SAMPLER_REDUCTION_MODE_WEIGHTED_AVERAGE_EXT;
|
||||
}
|
||||
|
||||
} // namespace Vulkan::MaxwellToVK
|
||||
|
|
|
@ -61,4 +61,6 @@ VkComponentSwizzle SwizzleSource(Tegra::Texture::SwizzleSource swizzle);
|
|||
|
||||
VkViewportCoordinateSwizzleNV ViewportSwizzle(Maxwell::ViewportSwizzle swizzle);
|
||||
|
||||
VkSamplerReductionMode SamplerReduction(Tegra::Texture::SamplerReduction reduction);
|
||||
|
||||
} // namespace Vulkan::MaxwellToVK
|
||||
|
|
|
@ -92,9 +92,9 @@ Common::DynamicLibrary OpenVulkanLibrary() {
|
|||
return library;
|
||||
}
|
||||
|
||||
std::pair<vk::Instance, u32> CreateInstance(
|
||||
Common::DynamicLibrary& library, vk::InstanceDispatch& dld,
|
||||
WindowSystemType window_type = WindowSystemType::Headless, bool enable_layers = false) {
|
||||
std::pair<vk::Instance, u32> CreateInstance(Common::DynamicLibrary& library,
|
||||
vk::InstanceDispatch& dld, WindowSystemType window_type,
|
||||
bool enable_debug_utils, bool enable_layers) {
|
||||
if (!library.IsOpen()) {
|
||||
LOG_ERROR(Render_Vulkan, "Vulkan library not available");
|
||||
return {};
|
||||
|
@ -133,7 +133,7 @@ std::pair<vk::Instance, u32> CreateInstance(
|
|||
if (window_type != Core::Frontend::WindowSystemType::Headless) {
|
||||
extensions.push_back(VK_KHR_SURFACE_EXTENSION_NAME);
|
||||
}
|
||||
if (enable_layers) {
|
||||
if (enable_debug_utils) {
|
||||
extensions.push_back(VK_EXT_DEBUG_UTILS_EXTENSION_NAME);
|
||||
}
|
||||
extensions.push_back(VK_KHR_GET_PHYSICAL_DEVICE_PROPERTIES_2_EXTENSION_NAME);
|
||||
|
@ -287,7 +287,7 @@ void RendererVulkan::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
|
|||
bool RendererVulkan::Init() {
|
||||
library = OpenVulkanLibrary();
|
||||
std::tie(instance, instance_version) = CreateInstance(
|
||||
library, dld, render_window.GetWindowInfo().type, Settings::values.renderer_debug);
|
||||
library, dld, render_window.GetWindowInfo().type, true, Settings::values.renderer_debug);
|
||||
if (!instance || !CreateDebugCallback() || !CreateSurface() || !PickDevices()) {
|
||||
return false;
|
||||
}
|
||||
|
@ -447,7 +447,8 @@ void RendererVulkan::Report() const {
|
|||
std::vector<std::string> RendererVulkan::EnumerateDevices() {
|
||||
vk::InstanceDispatch dld;
|
||||
Common::DynamicLibrary library = OpenVulkanLibrary();
|
||||
vk::Instance instance = CreateInstance(library, dld).first;
|
||||
vk::Instance instance =
|
||||
CreateInstance(library, dld, WindowSystemType::Headless, false, false).first;
|
||||
if (!instance) {
|
||||
return {};
|
||||
}
|
||||
|
|
|
@ -33,10 +33,9 @@ class VKDevice;
|
|||
class VKMemoryManager;
|
||||
class VKSwapchain;
|
||||
class VKScheduler;
|
||||
class VKImage;
|
||||
|
||||
struct VKScreenInfo {
|
||||
VKImage* image{};
|
||||
VkImageView image_view{};
|
||||
u32 width{};
|
||||
u32 height{};
|
||||
bool is_srgb{};
|
||||
|
|
|
@ -16,12 +16,12 @@
|
|||
#include "core/frontend/emu_window.h"
|
||||
#include "core/memory.h"
|
||||
#include "video_core/gpu.h"
|
||||
#include "video_core/morton.h"
|
||||
#include "video_core/host_shaders/vulkan_present_frag_spv.h"
|
||||
#include "video_core/host_shaders/vulkan_present_vert_spv.h"
|
||||
#include "video_core/rasterizer_interface.h"
|
||||
#include "video_core/renderer_vulkan/renderer_vulkan.h"
|
||||
#include "video_core/renderer_vulkan/vk_blit_screen.h"
|
||||
#include "video_core/renderer_vulkan/vk_device.h"
|
||||
#include "video_core/renderer_vulkan/vk_image.h"
|
||||
#include "video_core/renderer_vulkan/vk_master_semaphore.h"
|
||||
#include "video_core/renderer_vulkan/vk_memory_manager.h"
|
||||
#include "video_core/renderer_vulkan/vk_scheduler.h"
|
||||
|
@ -29,108 +29,12 @@
|
|||
#include "video_core/renderer_vulkan/vk_swapchain.h"
|
||||
#include "video_core/renderer_vulkan/wrapper.h"
|
||||
#include "video_core/surface.h"
|
||||
#include "video_core/textures/decoders.h"
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
namespace {
|
||||
|
||||
// Generated from the "shaders/" directory, read the instructions there.
|
||||
constexpr u8 blit_vertex_code[] = {
|
||||
0x03, 0x02, 0x23, 0x07, 0x00, 0x00, 0x01, 0x00, 0x07, 0x00, 0x08, 0x00, 0x27, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x11, 0x00, 0x02, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x06, 0x00,
|
||||
0x01, 0x00, 0x00, 0x00, 0x47, 0x4c, 0x53, 0x4c, 0x2e, 0x73, 0x74, 0x64, 0x2e, 0x34, 0x35, 0x30,
|
||||
0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
|
||||
0x0f, 0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x6d, 0x61, 0x69, 0x6e,
|
||||
0x00, 0x00, 0x00, 0x00, 0x0d, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00,
|
||||
0x25, 0x00, 0x00, 0x00, 0x48, 0x00, 0x05, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x0b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x48, 0x00, 0x05, 0x00, 0x0b, 0x00, 0x00, 0x00,
|
||||
0x01, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x48, 0x00, 0x05, 0x00,
|
||||
0x0b, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
|
||||
0x48, 0x00, 0x05, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00,
|
||||
0x04, 0x00, 0x00, 0x00, 0x47, 0x00, 0x03, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
|
||||
0x48, 0x00, 0x04, 0x00, 0x11, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00,
|
||||
0x48, 0x00, 0x05, 0x00, 0x11, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x48, 0x00, 0x05, 0x00, 0x11, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x07, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x47, 0x00, 0x03, 0x00, 0x11, 0x00, 0x00, 0x00,
|
||||
0x02, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x13, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x13, 0x00, 0x00, 0x00, 0x21, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x19, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x24, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x25, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x00, 0x00,
|
||||
0x01, 0x00, 0x00, 0x00, 0x13, 0x00, 0x02, 0x00, 0x02, 0x00, 0x00, 0x00, 0x21, 0x00, 0x03, 0x00,
|
||||
0x03, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x16, 0x00, 0x03, 0x00, 0x06, 0x00, 0x00, 0x00,
|
||||
0x20, 0x00, 0x00, 0x00, 0x17, 0x00, 0x04, 0x00, 0x07, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00,
|
||||
0x04, 0x00, 0x00, 0x00, 0x15, 0x00, 0x04, 0x00, 0x08, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x08, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00,
|
||||
0x01, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x04, 0x00, 0x0a, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00,
|
||||
0x09, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x06, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00,
|
||||
0x06, 0x00, 0x00, 0x00, 0x0a, 0x00, 0x00, 0x00, 0x0a, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00,
|
||||
0x0c, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00,
|
||||
0x0c, 0x00, 0x00, 0x00, 0x0d, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x15, 0x00, 0x04, 0x00,
|
||||
0x0e, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00,
|
||||
0x0e, 0x00, 0x00, 0x00, 0x0f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x00, 0x04, 0x00,
|
||||
0x10, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x03, 0x00,
|
||||
0x11, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x12, 0x00, 0x00, 0x00,
|
||||
0x02, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x12, 0x00, 0x00, 0x00,
|
||||
0x13, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x14, 0x00, 0x00, 0x00,
|
||||
0x02, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x17, 0x00, 0x04, 0x00, 0x17, 0x00, 0x00, 0x00,
|
||||
0x06, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x18, 0x00, 0x00, 0x00,
|
||||
0x01, 0x00, 0x00, 0x00, 0x17, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x18, 0x00, 0x00, 0x00,
|
||||
0x19, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00,
|
||||
0x1b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00,
|
||||
0x1c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x3f, 0x20, 0x00, 0x04, 0x00, 0x21, 0x00, 0x00, 0x00,
|
||||
0x03, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x23, 0x00, 0x00, 0x00,
|
||||
0x03, 0x00, 0x00, 0x00, 0x17, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x23, 0x00, 0x00, 0x00,
|
||||
0x24, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x18, 0x00, 0x00, 0x00,
|
||||
0x25, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x36, 0x00, 0x05, 0x00, 0x02, 0x00, 0x00, 0x00,
|
||||
0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00,
|
||||
0x05, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, 0x14, 0x00, 0x00, 0x00, 0x15, 0x00, 0x00, 0x00,
|
||||
0x13, 0x00, 0x00, 0x00, 0x0f, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00, 0x10, 0x00, 0x00, 0x00,
|
||||
0x16, 0x00, 0x00, 0x00, 0x15, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00, 0x17, 0x00, 0x00, 0x00,
|
||||
0x1a, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, 0x51, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00,
|
||||
0x1d, 0x00, 0x00, 0x00, 0x1a, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x51, 0x00, 0x05, 0x00,
|
||||
0x06, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x00, 0x00, 0x1a, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
|
||||
0x50, 0x00, 0x07, 0x00, 0x07, 0x00, 0x00, 0x00, 0x1f, 0x00, 0x00, 0x00, 0x1d, 0x00, 0x00, 0x00,
|
||||
0x1e, 0x00, 0x00, 0x00, 0x1b, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x91, 0x00, 0x05, 0x00,
|
||||
0x07, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x16, 0x00, 0x00, 0x00, 0x1f, 0x00, 0x00, 0x00,
|
||||
0x41, 0x00, 0x05, 0x00, 0x21, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x0d, 0x00, 0x00, 0x00,
|
||||
0x0f, 0x00, 0x00, 0x00, 0x3e, 0x00, 0x03, 0x00, 0x22, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00,
|
||||
0x3d, 0x00, 0x04, 0x00, 0x17, 0x00, 0x00, 0x00, 0x26, 0x00, 0x00, 0x00, 0x25, 0x00, 0x00, 0x00,
|
||||
0x3e, 0x00, 0x03, 0x00, 0x24, 0x00, 0x00, 0x00, 0x26, 0x00, 0x00, 0x00, 0xfd, 0x00, 0x01, 0x00,
|
||||
0x38, 0x00, 0x01, 0x00};
|
||||
|
||||
constexpr u8 blit_fragment_code[] = {
|
||||
0x03, 0x02, 0x23, 0x07, 0x00, 0x00, 0x01, 0x00, 0x07, 0x00, 0x08, 0x00, 0x14, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x11, 0x00, 0x02, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x06, 0x00,
|
||||
0x01, 0x00, 0x00, 0x00, 0x47, 0x4c, 0x53, 0x4c, 0x2e, 0x73, 0x74, 0x64, 0x2e, 0x34, 0x35, 0x30,
|
||||
0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
|
||||
0x0f, 0x00, 0x07, 0x00, 0x04, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x6d, 0x61, 0x69, 0x6e,
|
||||
0x00, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, 0x10, 0x00, 0x03, 0x00,
|
||||
0x04, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00,
|
||||
0x1e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x0d, 0x00, 0x00, 0x00,
|
||||
0x22, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x0d, 0x00, 0x00, 0x00,
|
||||
0x21, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x11, 0x00, 0x00, 0x00,
|
||||
0x1e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x13, 0x00, 0x02, 0x00, 0x02, 0x00, 0x00, 0x00,
|
||||
0x21, 0x00, 0x03, 0x00, 0x03, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x16, 0x00, 0x03, 0x00,
|
||||
0x06, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x17, 0x00, 0x04, 0x00, 0x07, 0x00, 0x00, 0x00,
|
||||
0x06, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x08, 0x00, 0x00, 0x00,
|
||||
0x03, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x08, 0x00, 0x00, 0x00,
|
||||
0x09, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x19, 0x00, 0x09, 0x00, 0x0a, 0x00, 0x00, 0x00,
|
||||
0x06, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1b, 0x00, 0x03, 0x00,
|
||||
0x0b, 0x00, 0x00, 0x00, 0x0a, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00,
|
||||
0x0d, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x17, 0x00, 0x04, 0x00, 0x0f, 0x00, 0x00, 0x00,
|
||||
0x06, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x10, 0x00, 0x00, 0x00,
|
||||
0x01, 0x00, 0x00, 0x00, 0x0f, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x10, 0x00, 0x00, 0x00,
|
||||
0x11, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x36, 0x00, 0x05, 0x00, 0x02, 0x00, 0x00, 0x00,
|
||||
0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00,
|
||||
0x05, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x00, 0x00,
|
||||
0x0d, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00, 0x0f, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00,
|
||||
0x11, 0x00, 0x00, 0x00, 0x57, 0x00, 0x05, 0x00, 0x07, 0x00, 0x00, 0x00, 0x13, 0x00, 0x00, 0x00,
|
||||
0x0e, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00, 0x3e, 0x00, 0x03, 0x00, 0x09, 0x00, 0x00, 0x00,
|
||||
0x13, 0x00, 0x00, 0x00, 0xfd, 0x00, 0x01, 0x00, 0x38, 0x00, 0x01, 0x00};
|
||||
|
||||
struct ScreenRectVertex {
|
||||
ScreenRectVertex() = default;
|
||||
explicit ScreenRectVertex(f32 x, f32 y, f32 u, f32 v) : position{{x, y}}, tex_coord{{u, v}} {}
|
||||
|
@ -173,9 +77,9 @@ constexpr std::array<f32, 4 * 4> MakeOrthographicMatrix(f32 width, f32 height) {
|
|||
// clang-format on
|
||||
}
|
||||
|
||||
std::size_t GetBytesPerPixel(const Tegra::FramebufferConfig& framebuffer) {
|
||||
u32 GetBytesPerPixel(const Tegra::FramebufferConfig& framebuffer) {
|
||||
using namespace VideoCore::Surface;
|
||||
return GetBytesPerPixel(PixelFormatFromGPUPixelFormat(framebuffer.pixel_format));
|
||||
return BytesPerBlock(PixelFormatFromGPUPixelFormat(framebuffer.pixel_format));
|
||||
}
|
||||
|
||||
std::size_t GetSizeInBytes(const Tegra::FramebufferConfig& framebuffer) {
|
||||
|
@ -239,34 +143,30 @@ VkSemaphore VKBlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer, bool
|
|||
scheduler.Wait(resource_ticks[image_index]);
|
||||
resource_ticks[image_index] = scheduler.CurrentTick();
|
||||
|
||||
VKImage* blit_image = use_accelerated ? screen_info.image : raw_images[image_index].get();
|
||||
|
||||
UpdateDescriptorSet(image_index, blit_image->GetPresentView());
|
||||
UpdateDescriptorSet(image_index,
|
||||
use_accelerated ? screen_info.image_view : *raw_image_views[image_index]);
|
||||
|
||||
BufferData data;
|
||||
SetUniformData(data, framebuffer);
|
||||
SetVertexData(data, framebuffer);
|
||||
|
||||
auto map = buffer_commit->Map();
|
||||
std::memcpy(map.GetAddress(), &data, sizeof(data));
|
||||
std::memcpy(map.Address(), &data, sizeof(data));
|
||||
|
||||
if (!use_accelerated) {
|
||||
const u64 image_offset = GetRawImageOffset(framebuffer, image_index);
|
||||
|
||||
const auto pixel_format =
|
||||
VideoCore::Surface::PixelFormatFromGPUPixelFormat(framebuffer.pixel_format);
|
||||
const VAddr framebuffer_addr = framebuffer.address + framebuffer.offset;
|
||||
const auto host_ptr = cpu_memory.GetPointer(framebuffer_addr);
|
||||
rasterizer.FlushRegion(ToCacheAddr(host_ptr), GetSizeInBytes(framebuffer));
|
||||
const u8* const host_ptr = cpu_memory.GetPointer(framebuffer_addr);
|
||||
const size_t size_bytes = GetSizeInBytes(framebuffer);
|
||||
rasterizer.FlushRegion(ToCacheAddr(host_ptr), size_bytes);
|
||||
|
||||
// TODO(Rodrigo): Read this from HLE
|
||||
constexpr u32 block_height_log2 = 4;
|
||||
VideoCore::MortonSwizzle(VideoCore::MortonSwizzleMode::MortonToLinear, pixel_format,
|
||||
framebuffer.stride, block_height_log2, framebuffer.height, 0, 1, 1,
|
||||
map.GetAddress() + image_offset, host_ptr);
|
||||
|
||||
blit_image->Transition(0, 1, 0, 1, VK_PIPELINE_STAGE_TRANSFER_BIT,
|
||||
VK_ACCESS_TRANSFER_WRITE_BIT, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL);
|
||||
const u32 bytes_per_pixel = GetBytesPerPixel(framebuffer);
|
||||
Tegra::Texture::UnswizzleTexture(
|
||||
std::span(map.Address() + image_offset, size_bytes), std::span(host_ptr, size_bytes),
|
||||
bytes_per_pixel, framebuffer.width, framebuffer.height, 1, block_height_log2, 0);
|
||||
|
||||
const VkBufferImageCopy copy{
|
||||
.bufferOffset = image_offset,
|
||||
|
@ -288,15 +188,44 @@ VkSemaphore VKBlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer, bool
|
|||
},
|
||||
};
|
||||
scheduler.Record(
|
||||
[buffer = *buffer, image = *blit_image->GetHandle(), copy](vk::CommandBuffer cmdbuf) {
|
||||
cmdbuf.CopyBufferToImage(buffer, image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, copy);
|
||||
[buffer = *buffer, image = *raw_images[image_index], copy](vk::CommandBuffer cmdbuf) {
|
||||
const VkImageMemoryBarrier base_barrier{
|
||||
.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
|
||||
.pNext = nullptr,
|
||||
.srcAccessMask = 0,
|
||||
.dstAccessMask = 0,
|
||||
.oldLayout = VK_IMAGE_LAYOUT_GENERAL,
|
||||
.newLayout = VK_IMAGE_LAYOUT_GENERAL,
|
||||
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
||||
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
||||
.image = image,
|
||||
.subresourceRange =
|
||||
{
|
||||
.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
|
||||
.baseMipLevel = 0,
|
||||
.levelCount = 1,
|
||||
.baseArrayLayer = 0,
|
||||
.layerCount = 1,
|
||||
},
|
||||
};
|
||||
VkImageMemoryBarrier read_barrier = base_barrier;
|
||||
read_barrier.srcAccessMask = VK_ACCESS_HOST_WRITE_BIT;
|
||||
read_barrier.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
|
||||
read_barrier.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED;
|
||||
|
||||
VkImageMemoryBarrier write_barrier = base_barrier;
|
||||
write_barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
|
||||
write_barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT;
|
||||
|
||||
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_HOST_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT,
|
||||
0, read_barrier);
|
||||
cmdbuf.CopyBufferToImage(buffer, image, VK_IMAGE_LAYOUT_GENERAL, copy);
|
||||
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT,
|
||||
VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, 0, write_barrier);
|
||||
});
|
||||
}
|
||||
map.Release();
|
||||
|
||||
blit_image->Transition(0, 1, 0, 1, VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT,
|
||||
VK_ACCESS_SHADER_READ_BIT, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL);
|
||||
|
||||
scheduler.Record([renderpass = *renderpass, framebuffer = *framebuffers[image_index],
|
||||
descriptor_set = descriptor_sets[image_index], buffer = *buffer,
|
||||
size = swapchain.GetSize(), pipeline = *pipeline,
|
||||
|
@ -304,31 +233,31 @@ VkSemaphore VKBlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer, bool
|
|||
const VkClearValue clear_color{
|
||||
.color = {.float32 = {0.0f, 0.0f, 0.0f, 0.0f}},
|
||||
};
|
||||
|
||||
VkRenderPassBeginInfo renderpass_bi;
|
||||
renderpass_bi.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO;
|
||||
renderpass_bi.pNext = nullptr;
|
||||
renderpass_bi.renderPass = renderpass;
|
||||
renderpass_bi.framebuffer = framebuffer;
|
||||
renderpass_bi.renderArea.offset.x = 0;
|
||||
renderpass_bi.renderArea.offset.y = 0;
|
||||
renderpass_bi.renderArea.extent = size;
|
||||
renderpass_bi.clearValueCount = 1;
|
||||
renderpass_bi.pClearValues = &clear_color;
|
||||
|
||||
VkViewport viewport;
|
||||
viewport.x = 0.0f;
|
||||
viewport.y = 0.0f;
|
||||
viewport.width = static_cast<float>(size.width);
|
||||
viewport.height = static_cast<float>(size.height);
|
||||
viewport.minDepth = 0.0f;
|
||||
viewport.maxDepth = 1.0f;
|
||||
|
||||
VkRect2D scissor;
|
||||
scissor.offset.x = 0;
|
||||
scissor.offset.y = 0;
|
||||
scissor.extent = size;
|
||||
|
||||
const VkRenderPassBeginInfo renderpass_bi{
|
||||
.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
|
||||
.pNext = nullptr,
|
||||
.renderPass = renderpass,
|
||||
.framebuffer = framebuffer,
|
||||
.renderArea =
|
||||
{
|
||||
.offset = {0, 0},
|
||||
.extent = size,
|
||||
},
|
||||
.clearValueCount = 1,
|
||||
.pClearValues = &clear_color,
|
||||
};
|
||||
const VkViewport viewport{
|
||||
.x = 0.0f,
|
||||
.y = 0.0f,
|
||||
.width = static_cast<float>(size.width),
|
||||
.height = static_cast<float>(size.height),
|
||||
.minDepth = 0.0f,
|
||||
.maxDepth = 1.0f,
|
||||
};
|
||||
const VkRect2D scissor{
|
||||
.offset = {0, 0},
|
||||
.extent = size,
|
||||
};
|
||||
cmdbuf.BeginRenderPass(renderpass_bi, VK_SUBPASS_CONTENTS_INLINE);
|
||||
cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline);
|
||||
cmdbuf.SetViewport(0, viewport);
|
||||
|
@ -372,8 +301,8 @@ void VKBlitScreen::RefreshResources(const Tegra::FramebufferConfig& framebuffer)
|
|||
}
|
||||
|
||||
void VKBlitScreen::CreateShaders() {
|
||||
vertex_shader = BuildShader(device, sizeof(blit_vertex_code), blit_vertex_code);
|
||||
fragment_shader = BuildShader(device, sizeof(blit_fragment_code), blit_fragment_code);
|
||||
vertex_shader = BuildShader(device, VULKAN_PRESENT_VERT_SPV);
|
||||
fragment_shader = BuildShader(device, VULKAN_PRESENT_FRAG_SPV);
|
||||
}
|
||||
|
||||
void VKBlitScreen::CreateSemaphores() {
|
||||
|
@ -420,7 +349,7 @@ void VKBlitScreen::CreateRenderPass() {
|
|||
|
||||
const VkAttachmentReference color_attachment_ref{
|
||||
.attachment = 0,
|
||||
.layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
|
||||
.layout = VK_IMAGE_LAYOUT_GENERAL,
|
||||
};
|
||||
|
||||
const VkSubpassDescription subpass_description{
|
||||
|
@ -735,34 +664,56 @@ void VKBlitScreen::CreateStagingBuffer(const Tegra::FramebufferConfig& framebuff
|
|||
|
||||
void VKBlitScreen::CreateRawImages(const Tegra::FramebufferConfig& framebuffer) {
|
||||
raw_images.resize(image_count);
|
||||
raw_image_views.resize(image_count);
|
||||
raw_buffer_commits.resize(image_count);
|
||||
|
||||
const VkImageCreateInfo ci{
|
||||
.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
|
||||
.pNext = nullptr,
|
||||
.flags = 0,
|
||||
.imageType = VK_IMAGE_TYPE_2D,
|
||||
.format = GetFormat(framebuffer),
|
||||
.extent =
|
||||
{
|
||||
.width = framebuffer.width,
|
||||
.height = framebuffer.height,
|
||||
.depth = 1,
|
||||
},
|
||||
.mipLevels = 1,
|
||||
.arrayLayers = 1,
|
||||
.samples = VK_SAMPLE_COUNT_1_BIT,
|
||||
.tiling = VK_IMAGE_TILING_LINEAR,
|
||||
.usage = VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT,
|
||||
.sharingMode = VK_SHARING_MODE_EXCLUSIVE,
|
||||
.queueFamilyIndexCount = 0,
|
||||
.pQueueFamilyIndices = nullptr,
|
||||
.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED,
|
||||
};
|
||||
|
||||
for (std::size_t i = 0; i < image_count; ++i) {
|
||||
raw_images[i] = std::make_unique<VKImage>(device, scheduler, ci, VK_IMAGE_ASPECT_COLOR_BIT);
|
||||
raw_buffer_commits[i] = memory_manager.Commit(raw_images[i]->GetHandle(), false);
|
||||
for (size_t i = 0; i < image_count; ++i) {
|
||||
raw_images[i] = device.GetLogical().CreateImage(VkImageCreateInfo{
|
||||
.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
|
||||
.pNext = nullptr,
|
||||
.flags = 0,
|
||||
.imageType = VK_IMAGE_TYPE_2D,
|
||||
.format = GetFormat(framebuffer),
|
||||
.extent =
|
||||
{
|
||||
.width = framebuffer.width,
|
||||
.height = framebuffer.height,
|
||||
.depth = 1,
|
||||
},
|
||||
.mipLevels = 1,
|
||||
.arrayLayers = 1,
|
||||
.samples = VK_SAMPLE_COUNT_1_BIT,
|
||||
.tiling = VK_IMAGE_TILING_LINEAR,
|
||||
.usage = VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT,
|
||||
.sharingMode = VK_SHARING_MODE_EXCLUSIVE,
|
||||
.queueFamilyIndexCount = 0,
|
||||
.pQueueFamilyIndices = nullptr,
|
||||
.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED,
|
||||
});
|
||||
raw_buffer_commits[i] = memory_manager.Commit(raw_images[i], false);
|
||||
raw_image_views[i] = device.GetLogical().CreateImageView(VkImageViewCreateInfo{
|
||||
.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
|
||||
.pNext = nullptr,
|
||||
.flags = 0,
|
||||
.image = *raw_images[i],
|
||||
.viewType = VK_IMAGE_VIEW_TYPE_2D,
|
||||
.format = GetFormat(framebuffer),
|
||||
.components =
|
||||
{
|
||||
.r = VK_COMPONENT_SWIZZLE_IDENTITY,
|
||||
.g = VK_COMPONENT_SWIZZLE_IDENTITY,
|
||||
.b = VK_COMPONENT_SWIZZLE_IDENTITY,
|
||||
.a = VK_COMPONENT_SWIZZLE_IDENTITY,
|
||||
},
|
||||
.subresourceRange =
|
||||
{
|
||||
.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
|
||||
.baseMipLevel = 0,
|
||||
.levelCount = 1,
|
||||
.baseArrayLayer = 0,
|
||||
.layerCount = 1,
|
||||
},
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -789,7 +740,7 @@ void VKBlitScreen::UpdateDescriptorSet(std::size_t image_index, VkImageView imag
|
|||
const VkDescriptorImageInfo image_info{
|
||||
.sampler = *sampler,
|
||||
.imageView = image_view,
|
||||
.imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
|
||||
.imageLayout = VK_IMAGE_LAYOUT_GENERAL,
|
||||
};
|
||||
|
||||
const VkWriteDescriptorSet sampler_write{
|
||||
|
|
|
@ -35,7 +35,6 @@ struct ScreenInfo;
|
|||
|
||||
class RasterizerVulkan;
|
||||
class VKDevice;
|
||||
class VKImage;
|
||||
class VKScheduler;
|
||||
class VKSwapchain;
|
||||
|
||||
|
@ -110,7 +109,8 @@ private:
|
|||
std::vector<u64> resource_ticks;
|
||||
|
||||
std::vector<vk::Semaphore> semaphores;
|
||||
std::vector<std::unique_ptr<VKImage>> raw_images;
|
||||
std::vector<vk::Image> raw_images;
|
||||
std::vector<vk::ImageView> raw_image_views;
|
||||
std::vector<VKMemoryCommit> raw_buffer_commits;
|
||||
u32 raw_width = 0;
|
||||
u32 raw_height = 0;
|
||||
|
|
|
@ -31,15 +31,19 @@ constexpr VkAccessFlags UPLOAD_ACCESS_BARRIERS =
|
|||
VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_UNIFORM_READ_BIT |
|
||||
VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT | VK_ACCESS_INDEX_READ_BIT;
|
||||
|
||||
constexpr VkAccessFlags TRANSFORM_FEEDBACK_WRITE_ACCESS =
|
||||
VK_ACCESS_TRANSFORM_FEEDBACK_WRITE_BIT_EXT | VK_ACCESS_TRANSFORM_FEEDBACK_COUNTER_WRITE_BIT_EXT;
|
||||
|
||||
std::unique_ptr<VKStreamBuffer> CreateStreamBuffer(const VKDevice& device, VKScheduler& scheduler) {
|
||||
return std::make_unique<VKStreamBuffer>(device, scheduler, BUFFER_USAGE);
|
||||
return std::make_unique<VKStreamBuffer>(device, scheduler);
|
||||
}
|
||||
|
||||
} // Anonymous namespace
|
||||
|
||||
Buffer::Buffer(const VKDevice& device, VKMemoryManager& memory_manager, VKScheduler& scheduler_,
|
||||
Buffer::Buffer(const VKDevice& device_, VKMemoryManager& memory_manager, VKScheduler& scheduler_,
|
||||
VKStagingBufferPool& staging_pool_, VAddr cpu_addr_, std::size_t size_)
|
||||
: BufferBlock{cpu_addr_, size_}, scheduler{scheduler_}, staging_pool{staging_pool_} {
|
||||
: BufferBlock{cpu_addr_, size_}, device{device_}, scheduler{scheduler_}, staging_pool{
|
||||
staging_pool_} {
|
||||
const VkBufferCreateInfo ci{
|
||||
.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
|
||||
.pNext = nullptr,
|
||||
|
@ -64,24 +68,39 @@ void Buffer::Upload(std::size_t offset, std::size_t data_size, const u8* data) {
|
|||
scheduler.RequestOutsideRenderPassOperationContext();
|
||||
|
||||
const VkBuffer handle = Handle();
|
||||
scheduler.Record(
|
||||
[staging = *staging.handle, handle, offset, data_size](vk::CommandBuffer cmdbuf) {
|
||||
cmdbuf.CopyBuffer(staging, handle, VkBufferCopy{0, offset, data_size});
|
||||
|
||||
const VkBufferMemoryBarrier barrier{
|
||||
.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
|
||||
.pNext = nullptr,
|
||||
.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
|
||||
.dstAccessMask = UPLOAD_ACCESS_BARRIERS,
|
||||
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
||||
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
||||
.buffer = handle,
|
||||
.offset = offset,
|
||||
.size = data_size,
|
||||
};
|
||||
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, UPLOAD_PIPELINE_STAGE, 0, {},
|
||||
barrier, {});
|
||||
});
|
||||
scheduler.Record([staging = *staging.handle, handle, offset, data_size,
|
||||
&device = device](vk::CommandBuffer cmdbuf) {
|
||||
const VkBufferMemoryBarrier read_barrier{
|
||||
.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
|
||||
.pNext = nullptr,
|
||||
.srcAccessMask =
|
||||
VK_ACCESS_SHADER_WRITE_BIT | VK_ACCESS_TRANSFER_WRITE_BIT |
|
||||
VK_ACCESS_HOST_WRITE_BIT |
|
||||
(device.IsExtTransformFeedbackSupported() ? TRANSFORM_FEEDBACK_WRITE_ACCESS : 0),
|
||||
.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT,
|
||||
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
||||
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
||||
.buffer = handle,
|
||||
.offset = offset,
|
||||
.size = data_size,
|
||||
};
|
||||
const VkBufferMemoryBarrier write_barrier{
|
||||
.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
|
||||
.pNext = nullptr,
|
||||
.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
|
||||
.dstAccessMask = UPLOAD_ACCESS_BARRIERS,
|
||||
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
||||
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
||||
.buffer = handle,
|
||||
.offset = offset,
|
||||
.size = data_size,
|
||||
};
|
||||
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT,
|
||||
0, read_barrier);
|
||||
cmdbuf.CopyBuffer(staging, handle, VkBufferCopy{0, offset, data_size});
|
||||
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, UPLOAD_PIPELINE_STAGE, 0,
|
||||
write_barrier);
|
||||
});
|
||||
}
|
||||
|
||||
void Buffer::Download(std::size_t offset, std::size_t data_size, u8* data) {
|
||||
|
@ -150,8 +169,10 @@ void Buffer::CopyFrom(const Buffer& src, std::size_t src_offset, std::size_t dst
|
|||
VKBufferCache::VKBufferCache(VideoCore::RasterizerInterface& rasterizer_,
|
||||
Tegra::MemoryManager& gpu_memory_, Core::Memory::Memory& cpu_memory_,
|
||||
const VKDevice& device_, VKMemoryManager& memory_manager_,
|
||||
VKScheduler& scheduler_, VKStagingBufferPool& staging_pool_)
|
||||
: BufferCache{rasterizer_, gpu_memory_, cpu_memory_, CreateStreamBuffer(device_, scheduler_)},
|
||||
VKScheduler& scheduler_, VKStreamBuffer& stream_buffer_,
|
||||
VKStagingBufferPool& staging_pool_)
|
||||
: VideoCommon::BufferCache<Buffer, VkBuffer, VKStreamBuffer>{rasterizer_, gpu_memory_,
|
||||
cpu_memory_, stream_buffer_},
|
||||
device{device_}, memory_manager{memory_manager_}, scheduler{scheduler_}, staging_pool{
|
||||
staging_pool_} {}
|
||||
|
||||
|
|
|
@ -41,6 +41,7 @@ public:
|
|||
}
|
||||
|
||||
private:
|
||||
const VKDevice& device;
|
||||
VKScheduler& scheduler;
|
||||
VKStagingBufferPool& staging_pool;
|
||||
|
||||
|
@ -49,10 +50,11 @@ private:
|
|||
|
||||
class VKBufferCache final : public VideoCommon::BufferCache<Buffer, VkBuffer, VKStreamBuffer> {
|
||||
public:
|
||||
explicit VKBufferCache(VideoCore::RasterizerInterface& rasterizer_,
|
||||
Tegra::MemoryManager& gpu_memory_, Core::Memory::Memory& cpu_memory_,
|
||||
const VKDevice& device_, VKMemoryManager& memory_manager_,
|
||||
VKScheduler& scheduler_, VKStagingBufferPool& staging_pool_);
|
||||
explicit VKBufferCache(VideoCore::RasterizerInterface& rasterizer,
|
||||
Tegra::MemoryManager& gpu_memory, Core::Memory::Memory& cpu_memory,
|
||||
const VKDevice& device, VKMemoryManager& memory_manager,
|
||||
VKScheduler& scheduler, VKStreamBuffer& stream_buffer,
|
||||
VKStagingBufferPool& staging_pool);
|
||||
~VKBufferCache();
|
||||
|
||||
BufferInfo GetEmptyBuffer(std::size_t size) override;
|
||||
|
|
|
@ -10,6 +10,9 @@
|
|||
#include "common/alignment.h"
|
||||
#include "common/assert.h"
|
||||
#include "common/common_types.h"
|
||||
#include "video_core/host_shaders/vulkan_quad_array_comp_spv.h"
|
||||
#include "video_core/host_shaders/vulkan_quad_indexed_comp_spv.h"
|
||||
#include "video_core/host_shaders/vulkan_uint8_comp_spv.h"
|
||||
#include "video_core/renderer_vulkan/vk_compute_pass.h"
|
||||
#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
|
||||
#include "video_core/renderer_vulkan/vk_device.h"
|
||||
|
@ -22,99 +25,6 @@ namespace Vulkan {
|
|||
|
||||
namespace {
|
||||
|
||||
// Quad array SPIR-V module. Generated from the "shaders/" directory, read the instructions there.
|
||||
constexpr u8 quad_array[] = {
|
||||
0x03, 0x02, 0x23, 0x07, 0x00, 0x00, 0x01, 0x00, 0x07, 0x00, 0x08, 0x00, 0x54, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x11, 0x00, 0x02, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x06, 0x00,
|
||||
0x01, 0x00, 0x00, 0x00, 0x47, 0x4c, 0x53, 0x4c, 0x2e, 0x73, 0x74, 0x64, 0x2e, 0x34, 0x35, 0x30,
|
||||
0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
|
||||
0x0f, 0x00, 0x06, 0x00, 0x05, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x6d, 0x61, 0x69, 0x6e,
|
||||
0x00, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x10, 0x00, 0x06, 0x00, 0x04, 0x00, 0x00, 0x00,
|
||||
0x11, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
|
||||
0x47, 0x00, 0x04, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00,
|
||||
0x47, 0x00, 0x04, 0x00, 0x13, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
|
||||
0x48, 0x00, 0x05, 0x00, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x03, 0x00, 0x14, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
|
||||
0x47, 0x00, 0x04, 0x00, 0x16, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x47, 0x00, 0x04, 0x00, 0x16, 0x00, 0x00, 0x00, 0x21, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x48, 0x00, 0x05, 0x00, 0x29, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x03, 0x00, 0x29, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
|
||||
0x47, 0x00, 0x04, 0x00, 0x4a, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00,
|
||||
0x13, 0x00, 0x02, 0x00, 0x02, 0x00, 0x00, 0x00, 0x21, 0x00, 0x03, 0x00, 0x03, 0x00, 0x00, 0x00,
|
||||
0x02, 0x00, 0x00, 0x00, 0x15, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x07, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00,
|
||||
0x06, 0x00, 0x00, 0x00, 0x17, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00,
|
||||
0x03, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x0a, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
|
||||
0x09, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x0a, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00,
|
||||
0x01, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x0d, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
|
||||
0x06, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00,
|
||||
0x06, 0x00, 0x00, 0x00, 0x1d, 0x00, 0x03, 0x00, 0x13, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00,
|
||||
0x1e, 0x00, 0x03, 0x00, 0x14, 0x00, 0x00, 0x00, 0x13, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00,
|
||||
0x15, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00,
|
||||
0x15, 0x00, 0x00, 0x00, 0x16, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x15, 0x00, 0x04, 0x00,
|
||||
0x18, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x02, 0x00,
|
||||
0x1b, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x03, 0x00, 0x29, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00,
|
||||
0x20, 0x00, 0x04, 0x00, 0x2a, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, 0x29, 0x00, 0x00, 0x00,
|
||||
0x3b, 0x00, 0x04, 0x00, 0x2a, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00,
|
||||
0x2b, 0x00, 0x04, 0x00, 0x18, 0x00, 0x00, 0x00, 0x2c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x20, 0x00, 0x04, 0x00, 0x2d, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00,
|
||||
0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x31, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
|
||||
0x1c, 0x00, 0x04, 0x00, 0x34, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00,
|
||||
0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x35, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
|
||||
0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
|
||||
0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x37, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
|
||||
0x2c, 0x00, 0x09, 0x00, 0x34, 0x00, 0x00, 0x00, 0x38, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00,
|
||||
0x35, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x00,
|
||||
0x37, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x3a, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00,
|
||||
0x34, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x44, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
|
||||
0x06, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x18, 0x00, 0x00, 0x00, 0x47, 0x00, 0x00, 0x00,
|
||||
0x01, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x49, 0x00, 0x00, 0x00,
|
||||
0x00, 0x04, 0x00, 0x00, 0x2c, 0x00, 0x06, 0x00, 0x09, 0x00, 0x00, 0x00, 0x4a, 0x00, 0x00, 0x00,
|
||||
0x49, 0x00, 0x00, 0x00, 0x35, 0x00, 0x00, 0x00, 0x35, 0x00, 0x00, 0x00, 0x36, 0x00, 0x05, 0x00,
|
||||
0x02, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
|
||||
0xf8, 0x00, 0x02, 0x00, 0x05, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x3a, 0x00, 0x00, 0x00,
|
||||
0x3b, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0xf9, 0x00, 0x02, 0x00, 0x4c, 0x00, 0x00, 0x00,
|
||||
0xf8, 0x00, 0x02, 0x00, 0x4c, 0x00, 0x00, 0x00, 0xf6, 0x00, 0x04, 0x00, 0x4b, 0x00, 0x00, 0x00,
|
||||
0x4e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xf9, 0x00, 0x02, 0x00, 0x4d, 0x00, 0x00, 0x00,
|
||||
0xf8, 0x00, 0x02, 0x00, 0x4d, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, 0x0d, 0x00, 0x00, 0x00,
|
||||
0x0e, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00,
|
||||
0x06, 0x00, 0x00, 0x00, 0x0f, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x84, 0x00, 0x05, 0x00,
|
||||
0x06, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00, 0x0f, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00,
|
||||
0x44, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00, 0x17, 0x00, 0x00, 0x00, 0x16, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x7c, 0x00, 0x04, 0x00, 0x18, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00,
|
||||
0x17, 0x00, 0x00, 0x00, 0x7c, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x1a, 0x00, 0x00, 0x00,
|
||||
0x19, 0x00, 0x00, 0x00, 0xae, 0x00, 0x05, 0x00, 0x1b, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00,
|
||||
0x12, 0x00, 0x00, 0x00, 0x1a, 0x00, 0x00, 0x00, 0xf7, 0x00, 0x03, 0x00, 0x1e, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0xfa, 0x00, 0x04, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x1d, 0x00, 0x00, 0x00,
|
||||
0x1e, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x1d, 0x00, 0x00, 0x00, 0xf9, 0x00, 0x02, 0x00,
|
||||
0x4b, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x1e, 0x00, 0x00, 0x00, 0xf9, 0x00, 0x02, 0x00,
|
||||
0x21, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x21, 0x00, 0x00, 0x00, 0xf5, 0x00, 0x07, 0x00,
|
||||
0x06, 0x00, 0x00, 0x00, 0x53, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x00, 0x00,
|
||||
0x48, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0xb0, 0x00, 0x05, 0x00, 0x1b, 0x00, 0x00, 0x00,
|
||||
0x27, 0x00, 0x00, 0x00, 0x53, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, 0xf6, 0x00, 0x04, 0x00,
|
||||
0x23, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfa, 0x00, 0x04, 0x00,
|
||||
0x27, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00,
|
||||
0x22, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, 0x2d, 0x00, 0x00, 0x00, 0x2e, 0x00, 0x00, 0x00,
|
||||
0x2b, 0x00, 0x00, 0x00, 0x2c, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00,
|
||||
0x2f, 0x00, 0x00, 0x00, 0x2e, 0x00, 0x00, 0x00, 0x84, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00,
|
||||
0x32, 0x00, 0x00, 0x00, 0x0f, 0x00, 0x00, 0x00, 0x31, 0x00, 0x00, 0x00, 0x80, 0x00, 0x05, 0x00,
|
||||
0x06, 0x00, 0x00, 0x00, 0x33, 0x00, 0x00, 0x00, 0x2f, 0x00, 0x00, 0x00, 0x32, 0x00, 0x00, 0x00,
|
||||
0x3e, 0x00, 0x03, 0x00, 0x3b, 0x00, 0x00, 0x00, 0x38, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00,
|
||||
0x07, 0x00, 0x00, 0x00, 0x3c, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x00, 0x00, 0x53, 0x00, 0x00, 0x00,
|
||||
0x3d, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x00, 0x00, 0x3c, 0x00, 0x00, 0x00,
|
||||
0x80, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00, 0x3e, 0x00, 0x00, 0x00, 0x33, 0x00, 0x00, 0x00,
|
||||
0x3d, 0x00, 0x00, 0x00, 0x80, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00, 0x42, 0x00, 0x00, 0x00,
|
||||
0x12, 0x00, 0x00, 0x00, 0x53, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00, 0x44, 0x00, 0x00, 0x00,
|
||||
0x45, 0x00, 0x00, 0x00, 0x16, 0x00, 0x00, 0x00, 0x2c, 0x00, 0x00, 0x00, 0x42, 0x00, 0x00, 0x00,
|
||||
0x3e, 0x00, 0x03, 0x00, 0x45, 0x00, 0x00, 0x00, 0x3e, 0x00, 0x00, 0x00, 0x80, 0x00, 0x05, 0x00,
|
||||
0x06, 0x00, 0x00, 0x00, 0x48, 0x00, 0x00, 0x00, 0x53, 0x00, 0x00, 0x00, 0x47, 0x00, 0x00, 0x00,
|
||||
0xf9, 0x00, 0x02, 0x00, 0x21, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x23, 0x00, 0x00, 0x00,
|
||||
0xf9, 0x00, 0x02, 0x00, 0x4b, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x4e, 0x00, 0x00, 0x00,
|
||||
0xf9, 0x00, 0x02, 0x00, 0x4c, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x4b, 0x00, 0x00, 0x00,
|
||||
0xfd, 0x00, 0x01, 0x00, 0x38, 0x00, 0x01, 0x00,
|
||||
};
|
||||
|
||||
VkDescriptorSetLayoutBinding BuildQuadArrayPassDescriptorSetLayoutBinding() {
|
||||
return {
|
||||
.binding = 0,
|
||||
|
@ -144,208 +54,6 @@ VkPushConstantRange BuildComputePushConstantRange(std::size_t size) {
|
|||
};
|
||||
}
|
||||
|
||||
// Uint8 SPIR-V module. Generated from the "shaders/" directory.
|
||||
constexpr u8 uint8_pass[] = {
|
||||
0x03, 0x02, 0x23, 0x07, 0x00, 0x00, 0x01, 0x00, 0x07, 0x00, 0x08, 0x00, 0x2f, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x11, 0x00, 0x02, 0x00, 0x01, 0x00, 0x00, 0x00, 0x11, 0x00, 0x02, 0x00,
|
||||
0x51, 0x11, 0x00, 0x00, 0x11, 0x00, 0x02, 0x00, 0x61, 0x11, 0x00, 0x00, 0x0a, 0x00, 0x07, 0x00,
|
||||
0x53, 0x50, 0x56, 0x5f, 0x4b, 0x48, 0x52, 0x5f, 0x31, 0x36, 0x62, 0x69, 0x74, 0x5f, 0x73, 0x74,
|
||||
0x6f, 0x72, 0x61, 0x67, 0x65, 0x00, 0x00, 0x00, 0x0a, 0x00, 0x07, 0x00, 0x53, 0x50, 0x56, 0x5f,
|
||||
0x4b, 0x48, 0x52, 0x5f, 0x38, 0x62, 0x69, 0x74, 0x5f, 0x73, 0x74, 0x6f, 0x72, 0x61, 0x67, 0x65,
|
||||
0x00, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x06, 0x00, 0x01, 0x00, 0x00, 0x00, 0x47, 0x4c, 0x53, 0x4c,
|
||||
0x2e, 0x73, 0x74, 0x64, 0x2e, 0x34, 0x35, 0x30, 0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x03, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0f, 0x00, 0x06, 0x00, 0x05, 0x00, 0x00, 0x00,
|
||||
0x04, 0x00, 0x00, 0x00, 0x6d, 0x61, 0x69, 0x6e, 0x00, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00,
|
||||
0x10, 0x00, 0x06, 0x00, 0x04, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00,
|
||||
0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x0b, 0x00, 0x00, 0x00,
|
||||
0x0b, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x12, 0x00, 0x00, 0x00,
|
||||
0x06, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x48, 0x00, 0x04, 0x00, 0x13, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x48, 0x00, 0x05, 0x00, 0x13, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x03, 0x00,
|
||||
0x13, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x15, 0x00, 0x00, 0x00,
|
||||
0x22, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x15, 0x00, 0x00, 0x00,
|
||||
0x21, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x1f, 0x00, 0x00, 0x00,
|
||||
0x06, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x48, 0x00, 0x04, 0x00, 0x20, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, 0x48, 0x00, 0x05, 0x00, 0x20, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x03, 0x00,
|
||||
0x20, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x22, 0x00, 0x00, 0x00,
|
||||
0x22, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x22, 0x00, 0x00, 0x00,
|
||||
0x21, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x2e, 0x00, 0x00, 0x00,
|
||||
0x0b, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, 0x13, 0x00, 0x02, 0x00, 0x02, 0x00, 0x00, 0x00,
|
||||
0x21, 0x00, 0x03, 0x00, 0x03, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x15, 0x00, 0x04, 0x00,
|
||||
0x06, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00,
|
||||
0x07, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x17, 0x00, 0x04, 0x00,
|
||||
0x09, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00,
|
||||
0x0a, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00,
|
||||
0x0a, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00,
|
||||
0x06, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00,
|
||||
0x0d, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x15, 0x00, 0x04, 0x00,
|
||||
0x11, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1d, 0x00, 0x03, 0x00,
|
||||
0x12, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x03, 0x00, 0x13, 0x00, 0x00, 0x00,
|
||||
0x12, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x14, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
|
||||
0x13, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x14, 0x00, 0x00, 0x00, 0x15, 0x00, 0x00, 0x00,
|
||||
0x02, 0x00, 0x00, 0x00, 0x15, 0x00, 0x04, 0x00, 0x17, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00,
|
||||
0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x02, 0x00, 0x1a, 0x00, 0x00, 0x00, 0x15, 0x00, 0x04, 0x00,
|
||||
0x1e, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1d, 0x00, 0x03, 0x00,
|
||||
0x1f, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x03, 0x00, 0x20, 0x00, 0x00, 0x00,
|
||||
0x1f, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x21, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
|
||||
0x20, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x21, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00,
|
||||
0x02, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x17, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x26, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
|
||||
0x11, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x2a, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
|
||||
0x1e, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x2c, 0x00, 0x00, 0x00,
|
||||
0x00, 0x04, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x2d, 0x00, 0x00, 0x00,
|
||||
0x01, 0x00, 0x00, 0x00, 0x2c, 0x00, 0x06, 0x00, 0x09, 0x00, 0x00, 0x00, 0x2e, 0x00, 0x00, 0x00,
|
||||
0x2c, 0x00, 0x00, 0x00, 0x2d, 0x00, 0x00, 0x00, 0x2d, 0x00, 0x00, 0x00, 0x36, 0x00, 0x05, 0x00,
|
||||
0x02, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
|
||||
0xf8, 0x00, 0x02, 0x00, 0x05, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x07, 0x00, 0x00, 0x00,
|
||||
0x08, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, 0x0d, 0x00, 0x00, 0x00,
|
||||
0x0e, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00,
|
||||
0x06, 0x00, 0x00, 0x00, 0x0f, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x00, 0x00, 0x3e, 0x00, 0x03, 0x00,
|
||||
0x08, 0x00, 0x00, 0x00, 0x0f, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00,
|
||||
0x10, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x44, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00,
|
||||
0x16, 0x00, 0x00, 0x00, 0x15, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7c, 0x00, 0x04, 0x00,
|
||||
0x17, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x16, 0x00, 0x00, 0x00, 0x7c, 0x00, 0x04, 0x00,
|
||||
0x06, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0xb0, 0x00, 0x05, 0x00,
|
||||
0x1a, 0x00, 0x00, 0x00, 0x1b, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00,
|
||||
0xf7, 0x00, 0x03, 0x00, 0x1d, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfa, 0x00, 0x04, 0x00,
|
||||
0x1b, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x1d, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00,
|
||||
0x1c, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00,
|
||||
0x08, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x25, 0x00, 0x00, 0x00,
|
||||
0x08, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00, 0x26, 0x00, 0x00, 0x00, 0x27, 0x00, 0x00, 0x00,
|
||||
0x15, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x00, 0x25, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00,
|
||||
0x11, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x27, 0x00, 0x00, 0x00, 0x71, 0x00, 0x04, 0x00,
|
||||
0x1e, 0x00, 0x00, 0x00, 0x29, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00,
|
||||
0x2a, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x00,
|
||||
0x24, 0x00, 0x00, 0x00, 0x3e, 0x00, 0x03, 0x00, 0x2b, 0x00, 0x00, 0x00, 0x29, 0x00, 0x00, 0x00,
|
||||
0xf9, 0x00, 0x02, 0x00, 0x1d, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x1d, 0x00, 0x00, 0x00,
|
||||
0xfd, 0x00, 0x01, 0x00, 0x38, 0x00, 0x01, 0x00,
|
||||
};
|
||||
|
||||
// Quad indexed SPIR-V module. Generated from the "shaders/" directory.
|
||||
constexpr u8 QUAD_INDEXED_SPV[] = {
|
||||
0x03, 0x02, 0x23, 0x07, 0x00, 0x00, 0x01, 0x00, 0x07, 0x00, 0x08, 0x00, 0x7c, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x11, 0x00, 0x02, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x06, 0x00,
|
||||
0x01, 0x00, 0x00, 0x00, 0x47, 0x4c, 0x53, 0x4c, 0x2e, 0x73, 0x74, 0x64, 0x2e, 0x34, 0x35, 0x30,
|
||||
0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
|
||||
0x0f, 0x00, 0x06, 0x00, 0x05, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x6d, 0x61, 0x69, 0x6e,
|
||||
0x00, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x10, 0x00, 0x06, 0x00, 0x04, 0x00, 0x00, 0x00,
|
||||
0x11, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
|
||||
0x47, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00,
|
||||
0x47, 0x00, 0x04, 0x00, 0x15, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
|
||||
0x48, 0x00, 0x04, 0x00, 0x16, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00,
|
||||
0x48, 0x00, 0x05, 0x00, 0x16, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x03, 0x00, 0x16, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
|
||||
0x47, 0x00, 0x04, 0x00, 0x18, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x47, 0x00, 0x04, 0x00, 0x18, 0x00, 0x00, 0x00, 0x21, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
|
||||
0x48, 0x00, 0x05, 0x00, 0x22, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x48, 0x00, 0x05, 0x00, 0x22, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
|
||||
0x23, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x47, 0x00, 0x03, 0x00, 0x22, 0x00, 0x00, 0x00,
|
||||
0x02, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x56, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00,
|
||||
0x04, 0x00, 0x00, 0x00, 0x48, 0x00, 0x04, 0x00, 0x57, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x18, 0x00, 0x00, 0x00, 0x48, 0x00, 0x05, 0x00, 0x57, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x23, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x03, 0x00, 0x57, 0x00, 0x00, 0x00,
|
||||
0x03, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x59, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x59, 0x00, 0x00, 0x00, 0x21, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x72, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00,
|
||||
0x19, 0x00, 0x00, 0x00, 0x13, 0x00, 0x02, 0x00, 0x02, 0x00, 0x00, 0x00, 0x21, 0x00, 0x03, 0x00,
|
||||
0x03, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x15, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00,
|
||||
0x20, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x07, 0x00, 0x00, 0x00,
|
||||
0x07, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x15, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00,
|
||||
0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x17, 0x00, 0x04, 0x00, 0x0a, 0x00, 0x00, 0x00,
|
||||
0x09, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x0b, 0x00, 0x00, 0x00,
|
||||
0x01, 0x00, 0x00, 0x00, 0x0a, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x0b, 0x00, 0x00, 0x00,
|
||||
0x0c, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00,
|
||||
0x0d, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00,
|
||||
0x01, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00,
|
||||
0x13, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x1d, 0x00, 0x03, 0x00, 0x15, 0x00, 0x00, 0x00,
|
||||
0x09, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x03, 0x00, 0x16, 0x00, 0x00, 0x00, 0x15, 0x00, 0x00, 0x00,
|
||||
0x20, 0x00, 0x04, 0x00, 0x17, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x16, 0x00, 0x00, 0x00,
|
||||
0x3b, 0x00, 0x04, 0x00, 0x17, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
|
||||
0x14, 0x00, 0x02, 0x00, 0x1b, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00,
|
||||
0x21, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x04, 0x00, 0x22, 0x00, 0x00, 0x00,
|
||||
0x09, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x23, 0x00, 0x00, 0x00,
|
||||
0x09, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x23, 0x00, 0x00, 0x00,
|
||||
0x24, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00,
|
||||
0x25, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x26, 0x00, 0x00, 0x00,
|
||||
0x09, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00,
|
||||
0x2b, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00,
|
||||
0x3b, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00,
|
||||
0x3f, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x04, 0x00, 0x41, 0x00, 0x00, 0x00,
|
||||
0x06, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00,
|
||||
0x42, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00,
|
||||
0x43, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x2c, 0x00, 0x09, 0x00, 0x41, 0x00, 0x00, 0x00,
|
||||
0x44, 0x00, 0x00, 0x00, 0x42, 0x00, 0x00, 0x00, 0x25, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x00, 0x00,
|
||||
0x42, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x00, 0x00, 0x43, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00,
|
||||
0x46, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x41, 0x00, 0x00, 0x00, 0x1d, 0x00, 0x03, 0x00,
|
||||
0x56, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x03, 0x00, 0x57, 0x00, 0x00, 0x00,
|
||||
0x56, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x58, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
|
||||
0x57, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x58, 0x00, 0x00, 0x00, 0x59, 0x00, 0x00, 0x00,
|
||||
0x02, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x5b, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
|
||||
0x09, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x69, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00,
|
||||
0x09, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00, 0x70, 0x00, 0x00, 0x00,
|
||||
0x00, 0x04, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00, 0x71, 0x00, 0x00, 0x00,
|
||||
0x01, 0x00, 0x00, 0x00, 0x2c, 0x00, 0x06, 0x00, 0x0a, 0x00, 0x00, 0x00, 0x72, 0x00, 0x00, 0x00,
|
||||
0x70, 0x00, 0x00, 0x00, 0x71, 0x00, 0x00, 0x00, 0x71, 0x00, 0x00, 0x00, 0x36, 0x00, 0x05, 0x00,
|
||||
0x02, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
|
||||
0xf8, 0x00, 0x02, 0x00, 0x05, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x46, 0x00, 0x00, 0x00,
|
||||
0x47, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0xf9, 0x00, 0x02, 0x00, 0x74, 0x00, 0x00, 0x00,
|
||||
0xf8, 0x00, 0x02, 0x00, 0x74, 0x00, 0x00, 0x00, 0xf6, 0x00, 0x04, 0x00, 0x73, 0x00, 0x00, 0x00,
|
||||
0x76, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xf9, 0x00, 0x02, 0x00, 0x75, 0x00, 0x00, 0x00,
|
||||
0xf8, 0x00, 0x02, 0x00, 0x75, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, 0x0e, 0x00, 0x00, 0x00,
|
||||
0x0f, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x0d, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00,
|
||||
0x09, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x0f, 0x00, 0x00, 0x00, 0x7c, 0x00, 0x04, 0x00,
|
||||
0x06, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x84, 0x00, 0x05, 0x00,
|
||||
0x06, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, 0x13, 0x00, 0x00, 0x00,
|
||||
0x44, 0x00, 0x05, 0x00, 0x09, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x7c, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x1a, 0x00, 0x00, 0x00,
|
||||
0x19, 0x00, 0x00, 0x00, 0xaf, 0x00, 0x05, 0x00, 0x1b, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00,
|
||||
0x14, 0x00, 0x00, 0x00, 0x1a, 0x00, 0x00, 0x00, 0xf7, 0x00, 0x03, 0x00, 0x1e, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0xfa, 0x00, 0x04, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x1d, 0x00, 0x00, 0x00,
|
||||
0x1e, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x1d, 0x00, 0x00, 0x00, 0xf9, 0x00, 0x02, 0x00,
|
||||
0x73, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x1e, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00,
|
||||
0x26, 0x00, 0x00, 0x00, 0x27, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00, 0x25, 0x00, 0x00, 0x00,
|
||||
0x3d, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x27, 0x00, 0x00, 0x00,
|
||||
0xc4, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00, 0x29, 0x00, 0x00, 0x00, 0x21, 0x00, 0x00, 0x00,
|
||||
0x28, 0x00, 0x00, 0x00, 0x82, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00, 0x2e, 0x00, 0x00, 0x00,
|
||||
0x2b, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0xc4, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00,
|
||||
0x31, 0x00, 0x00, 0x00, 0x25, 0x00, 0x00, 0x00, 0x2e, 0x00, 0x00, 0x00, 0x82, 0x00, 0x05, 0x00,
|
||||
0x06, 0x00, 0x00, 0x00, 0x32, 0x00, 0x00, 0x00, 0x31, 0x00, 0x00, 0x00, 0x25, 0x00, 0x00, 0x00,
|
||||
0xf9, 0x00, 0x02, 0x00, 0x35, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x35, 0x00, 0x00, 0x00,
|
||||
0xf5, 0x00, 0x07, 0x00, 0x09, 0x00, 0x00, 0x00, 0x7b, 0x00, 0x00, 0x00, 0x0d, 0x00, 0x00, 0x00,
|
||||
0x1e, 0x00, 0x00, 0x00, 0x6f, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x00, 0xb0, 0x00, 0x05, 0x00,
|
||||
0x1b, 0x00, 0x00, 0x00, 0x3c, 0x00, 0x00, 0x00, 0x7b, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x00, 0x00,
|
||||
0xf6, 0x00, 0x04, 0x00, 0x37, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0xfa, 0x00, 0x04, 0x00, 0x3c, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x00, 0x37, 0x00, 0x00, 0x00,
|
||||
0xf8, 0x00, 0x02, 0x00, 0x36, 0x00, 0x00, 0x00, 0x84, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00,
|
||||
0x40, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, 0x3f, 0x00, 0x00, 0x00, 0x3e, 0x00, 0x03, 0x00,
|
||||
0x47, 0x00, 0x00, 0x00, 0x44, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, 0x07, 0x00, 0x00, 0x00,
|
||||
0x48, 0x00, 0x00, 0x00, 0x47, 0x00, 0x00, 0x00, 0x7b, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00,
|
||||
0x06, 0x00, 0x00, 0x00, 0x49, 0x00, 0x00, 0x00, 0x48, 0x00, 0x00, 0x00, 0x80, 0x00, 0x05, 0x00,
|
||||
0x06, 0x00, 0x00, 0x00, 0x4a, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x00, 0x49, 0x00, 0x00, 0x00,
|
||||
0xc3, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00, 0x4e, 0x00, 0x00, 0x00, 0x4a, 0x00, 0x00, 0x00,
|
||||
0x2e, 0x00, 0x00, 0x00, 0xc7, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00, 0x52, 0x00, 0x00, 0x00,
|
||||
0x4a, 0x00, 0x00, 0x00, 0x32, 0x00, 0x00, 0x00, 0x84, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00,
|
||||
0x54, 0x00, 0x00, 0x00, 0x52, 0x00, 0x00, 0x00, 0x29, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00,
|
||||
0x5b, 0x00, 0x00, 0x00, 0x5c, 0x00, 0x00, 0x00, 0x59, 0x00, 0x00, 0x00, 0x42, 0x00, 0x00, 0x00,
|
||||
0x4e, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00, 0x5d, 0x00, 0x00, 0x00,
|
||||
0x5c, 0x00, 0x00, 0x00, 0xcb, 0x00, 0x06, 0x00, 0x09, 0x00, 0x00, 0x00, 0x62, 0x00, 0x00, 0x00,
|
||||
0x5d, 0x00, 0x00, 0x00, 0x54, 0x00, 0x00, 0x00, 0x29, 0x00, 0x00, 0x00, 0x7c, 0x00, 0x04, 0x00,
|
||||
0x09, 0x00, 0x00, 0x00, 0x65, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x80, 0x00, 0x05, 0x00,
|
||||
0x09, 0x00, 0x00, 0x00, 0x67, 0x00, 0x00, 0x00, 0x65, 0x00, 0x00, 0x00, 0x7b, 0x00, 0x00, 0x00,
|
||||
0x41, 0x00, 0x05, 0x00, 0x69, 0x00, 0x00, 0x00, 0x6a, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00,
|
||||
0x42, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00, 0x6b, 0x00, 0x00, 0x00,
|
||||
0x6a, 0x00, 0x00, 0x00, 0x80, 0x00, 0x05, 0x00, 0x09, 0x00, 0x00, 0x00, 0x6c, 0x00, 0x00, 0x00,
|
||||
0x62, 0x00, 0x00, 0x00, 0x6b, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00, 0x5b, 0x00, 0x00, 0x00,
|
||||
0x6d, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x42, 0x00, 0x00, 0x00, 0x67, 0x00, 0x00, 0x00,
|
||||
0x3e, 0x00, 0x03, 0x00, 0x6d, 0x00, 0x00, 0x00, 0x6c, 0x00, 0x00, 0x00, 0x80, 0x00, 0x05, 0x00,
|
||||
0x09, 0x00, 0x00, 0x00, 0x6f, 0x00, 0x00, 0x00, 0x7b, 0x00, 0x00, 0x00, 0x25, 0x00, 0x00, 0x00,
|
||||
0xf9, 0x00, 0x02, 0x00, 0x35, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x37, 0x00, 0x00, 0x00,
|
||||
0xf9, 0x00, 0x02, 0x00, 0x73, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x76, 0x00, 0x00, 0x00,
|
||||
0xf9, 0x00, 0x02, 0x00, 0x74, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x73, 0x00, 0x00, 0x00,
|
||||
0xfd, 0x00, 0x01, 0x00, 0x38, 0x00, 0x01, 0x00,
|
||||
};
|
||||
|
||||
std::array<VkDescriptorSetLayoutBinding, 2> BuildInputOutputDescriptorSetBindings() {
|
||||
return {{
|
||||
{
|
||||
|
@ -381,8 +89,8 @@ VkDescriptorUpdateTemplateEntryKHR BuildInputOutputDescriptorUpdateTemplate() {
|
|||
VKComputePass::VKComputePass(const VKDevice& device, VKDescriptorPool& descriptor_pool,
|
||||
vk::Span<VkDescriptorSetLayoutBinding> bindings,
|
||||
vk::Span<VkDescriptorUpdateTemplateEntryKHR> templates,
|
||||
vk::Span<VkPushConstantRange> push_constants, std::size_t code_size,
|
||||
const u8* code) {
|
||||
vk::Span<VkPushConstantRange> push_constants,
|
||||
std::span<const u32> code) {
|
||||
descriptor_set_layout = device.GetLogical().CreateDescriptorSetLayout({
|
||||
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
|
||||
.pNext = nullptr,
|
||||
|
@ -390,7 +98,6 @@ VKComputePass::VKComputePass(const VKDevice& device, VKDescriptorPool& descripto
|
|||
.bindingCount = bindings.size(),
|
||||
.pBindings = bindings.data(),
|
||||
});
|
||||
|
||||
layout = device.GetLogical().CreatePipelineLayout({
|
||||
.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
|
||||
.pNext = nullptr,
|
||||
|
@ -400,7 +107,6 @@ VKComputePass::VKComputePass(const VKDevice& device, VKDescriptorPool& descripto
|
|||
.pushConstantRangeCount = push_constants.size(),
|
||||
.pPushConstantRanges = push_constants.data(),
|
||||
});
|
||||
|
||||
if (!templates.empty()) {
|
||||
descriptor_template = device.GetLogical().CreateDescriptorUpdateTemplateKHR({
|
||||
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_CREATE_INFO_KHR,
|
||||
|
@ -417,18 +123,13 @@ VKComputePass::VKComputePass(const VKDevice& device, VKDescriptorPool& descripto
|
|||
|
||||
descriptor_allocator.emplace(descriptor_pool, *descriptor_set_layout);
|
||||
}
|
||||
|
||||
auto code_copy = std::make_unique<u32[]>(code_size / sizeof(u32) + 1);
|
||||
std::memcpy(code_copy.get(), code, code_size);
|
||||
|
||||
module = device.GetLogical().CreateShaderModule({
|
||||
.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO,
|
||||
.pNext = nullptr,
|
||||
.flags = 0,
|
||||
.codeSize = code_size,
|
||||
.pCode = code_copy.get(),
|
||||
.codeSize = static_cast<u32>(code.size_bytes()),
|
||||
.pCode = code.data(),
|
||||
});
|
||||
|
||||
pipeline = device.GetLogical().CreateComputePipeline({
|
||||
.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
|
||||
.pNext = nullptr,
|
||||
|
@ -467,7 +168,7 @@ QuadArrayPass::QuadArrayPass(const VKDevice& device_, VKScheduler& scheduler_,
|
|||
VKUpdateDescriptorQueue& update_descriptor_queue_)
|
||||
: VKComputePass(device_, descriptor_pool_, BuildQuadArrayPassDescriptorSetLayoutBinding(),
|
||||
BuildQuadArrayPassDescriptorUpdateTemplateEntry(),
|
||||
BuildComputePushConstantRange(sizeof(u32)), std::size(quad_array), quad_array),
|
||||
BuildComputePushConstantRange(sizeof(u32)), VULKAN_QUAD_ARRAY_COMP_SPV),
|
||||
scheduler{scheduler_}, staging_buffer_pool{staging_buffer_pool_},
|
||||
update_descriptor_queue{update_descriptor_queue_} {}
|
||||
|
||||
|
@ -510,12 +211,11 @@ std::pair<VkBuffer, VkDeviceSize> QuadArrayPass::Assemble(u32 num_vertices, u32
|
|||
return {*buffer.handle, 0};
|
||||
}
|
||||
|
||||
Uint8Pass::Uint8Pass(const VKDevice& device_, VKScheduler& scheduler_,
|
||||
VKDescriptorPool& descriptor_pool_, VKStagingBufferPool& staging_buffer_pool_,
|
||||
Uint8Pass::Uint8Pass(const VKDevice& device, VKScheduler& scheduler_,
|
||||
VKDescriptorPool& descriptor_pool, VKStagingBufferPool& staging_buffer_pool_,
|
||||
VKUpdateDescriptorQueue& update_descriptor_queue_)
|
||||
: VKComputePass(device_, descriptor_pool_, BuildInputOutputDescriptorSetBindings(),
|
||||
BuildInputOutputDescriptorUpdateTemplate(), {}, std::size(uint8_pass),
|
||||
uint8_pass),
|
||||
: VKComputePass(device, descriptor_pool, BuildInputOutputDescriptorSetBindings(),
|
||||
BuildInputOutputDescriptorUpdateTemplate(), {}, VULKAN_UINT8_COMP_SPV),
|
||||
scheduler{scheduler_}, staging_buffer_pool{staging_buffer_pool_},
|
||||
update_descriptor_queue{update_descriptor_queue_} {}
|
||||
|
||||
|
@ -561,8 +261,7 @@ QuadIndexedPass::QuadIndexedPass(const VKDevice& device_, VKScheduler& scheduler
|
|||
VKUpdateDescriptorQueue& update_descriptor_queue_)
|
||||
: VKComputePass(device_, descriptor_pool_, BuildInputOutputDescriptorSetBindings(),
|
||||
BuildInputOutputDescriptorUpdateTemplate(),
|
||||
BuildComputePushConstantRange(sizeof(u32) * 2), std::size(QUAD_INDEXED_SPV),
|
||||
QUAD_INDEXED_SPV),
|
||||
BuildComputePushConstantRange(sizeof(u32) * 2), VULKAN_QUAD_INDEXED_COMP_SPV),
|
||||
scheduler{scheduler_}, staging_buffer_pool{staging_buffer_pool_},
|
||||
update_descriptor_queue{update_descriptor_queue_} {}
|
||||
|
||||
|
|
|
@ -5,6 +5,7 @@
|
|||
#pragma once
|
||||
|
||||
#include <optional>
|
||||
#include <span>
|
||||
#include <utility>
|
||||
|
||||
#include "common/common_types.h"
|
||||
|
@ -24,8 +25,7 @@ public:
|
|||
explicit VKComputePass(const VKDevice& device, VKDescriptorPool& descriptor_pool,
|
||||
vk::Span<VkDescriptorSetLayoutBinding> bindings,
|
||||
vk::Span<VkDescriptorUpdateTemplateEntryKHR> templates,
|
||||
vk::Span<VkPushConstantRange> push_constants, std::size_t code_size,
|
||||
const u8* code);
|
||||
vk::Span<VkPushConstantRange> push_constants, std::span<const u32> code);
|
||||
~VKComputePass();
|
||||
|
||||
protected:
|
||||
|
|
|
@ -46,6 +46,7 @@ constexpr std::array REQUIRED_EXTENSIONS{
|
|||
VK_KHR_DRIVER_PROPERTIES_EXTENSION_NAME,
|
||||
VK_KHR_DESCRIPTOR_UPDATE_TEMPLATE_EXTENSION_NAME,
|
||||
VK_KHR_TIMELINE_SEMAPHORE_EXTENSION_NAME,
|
||||
VK_KHR_SAMPLER_MIRROR_CLAMP_TO_EDGE_EXTENSION_NAME,
|
||||
VK_EXT_VERTEX_ATTRIBUTE_DIVISOR_EXTENSION_NAME,
|
||||
VK_EXT_SHADER_SUBGROUP_BALLOT_EXTENSION_NAME,
|
||||
VK_EXT_SHADER_SUBGROUP_VOTE_EXTENSION_NAME,
|
||||
|
@ -122,6 +123,7 @@ std::unordered_map<VkFormat, VkFormatProperties> GetFormatProperties(
|
|||
VK_FORMAT_R16G16_UNORM,
|
||||
VK_FORMAT_R16G16_SNORM,
|
||||
VK_FORMAT_R16G16_SFLOAT,
|
||||
VK_FORMAT_R16G16_SINT,
|
||||
VK_FORMAT_R16_UNORM,
|
||||
VK_FORMAT_R16_UINT,
|
||||
VK_FORMAT_R8G8B8A8_SRGB,
|
||||
|
@ -161,18 +163,32 @@ std::unordered_map<VkFormat, VkFormatProperties> GetFormatProperties(
|
|||
VK_FORMAT_BC2_SRGB_BLOCK,
|
||||
VK_FORMAT_BC3_SRGB_BLOCK,
|
||||
VK_FORMAT_BC7_SRGB_BLOCK,
|
||||
VK_FORMAT_ASTC_4x4_UNORM_BLOCK,
|
||||
VK_FORMAT_ASTC_4x4_SRGB_BLOCK,
|
||||
VK_FORMAT_ASTC_8x8_SRGB_BLOCK,
|
||||
VK_FORMAT_ASTC_8x5_SRGB_BLOCK,
|
||||
VK_FORMAT_ASTC_5x4_UNORM_BLOCK,
|
||||
VK_FORMAT_ASTC_5x4_SRGB_BLOCK,
|
||||
VK_FORMAT_ASTC_5x5_UNORM_BLOCK,
|
||||
VK_FORMAT_ASTC_5x5_SRGB_BLOCK,
|
||||
VK_FORMAT_ASTC_10x8_UNORM_BLOCK,
|
||||
VK_FORMAT_ASTC_10x8_SRGB_BLOCK,
|
||||
VK_FORMAT_ASTC_6x5_UNORM_BLOCK,
|
||||
VK_FORMAT_ASTC_6x5_SRGB_BLOCK,
|
||||
VK_FORMAT_ASTC_6x6_UNORM_BLOCK,
|
||||
VK_FORMAT_ASTC_6x6_SRGB_BLOCK,
|
||||
VK_FORMAT_ASTC_8x5_UNORM_BLOCK,
|
||||
VK_FORMAT_ASTC_8x5_SRGB_BLOCK,
|
||||
VK_FORMAT_ASTC_8x6_UNORM_BLOCK,
|
||||
VK_FORMAT_ASTC_8x6_SRGB_BLOCK,
|
||||
VK_FORMAT_ASTC_8x8_UNORM_BLOCK,
|
||||
VK_FORMAT_ASTC_8x8_SRGB_BLOCK,
|
||||
VK_FORMAT_ASTC_10x5_UNORM_BLOCK,
|
||||
VK_FORMAT_ASTC_10x5_SRGB_BLOCK,
|
||||
VK_FORMAT_ASTC_10x6_UNORM_BLOCK,
|
||||
VK_FORMAT_ASTC_10x6_SRGB_BLOCK,
|
||||
VK_FORMAT_ASTC_10x8_UNORM_BLOCK,
|
||||
VK_FORMAT_ASTC_10x8_SRGB_BLOCK,
|
||||
VK_FORMAT_ASTC_10x10_UNORM_BLOCK,
|
||||
VK_FORMAT_ASTC_10x10_SRGB_BLOCK,
|
||||
VK_FORMAT_ASTC_12x10_UNORM_BLOCK,
|
||||
VK_FORMAT_ASTC_12x10_SRGB_BLOCK,
|
||||
VK_FORMAT_ASTC_12x12_UNORM_BLOCK,
|
||||
VK_FORMAT_ASTC_12x12_SRGB_BLOCK,
|
||||
VK_FORMAT_ASTC_8x6_UNORM_BLOCK,
|
||||
|
@ -192,7 +208,7 @@ std::unordered_map<VkFormat, VkFormatProperties> GetFormatProperties(
|
|||
|
||||
VKDevice::VKDevice(VkInstance instance_, u32 instance_version_, vk::PhysicalDevice physical_,
|
||||
VkSurfaceKHR surface, const vk::InstanceDispatch& dld_)
|
||||
: dld{dld_}, physical{physical_}, properties{physical.GetProperties()},
|
||||
: instance{instance_}, dld{dld_}, physical{physical_}, properties{physical.GetProperties()},
|
||||
instance_version{instance_version_}, format_properties{GetFormatProperties(physical, dld)} {
|
||||
SetupFamilies(surface);
|
||||
SetupFeatures();
|
||||
|
@ -214,7 +230,7 @@ bool VKDevice::Create() {
|
|||
features2.features = {
|
||||
.robustBufferAccess = false,
|
||||
.fullDrawIndexUint32 = false,
|
||||
.imageCubeArray = false,
|
||||
.imageCubeArray = true,
|
||||
.independentBlend = true,
|
||||
.geometryShader = true,
|
||||
.tessellationShader = true,
|
||||
|
@ -242,7 +258,7 @@ bool VKDevice::Create() {
|
|||
.shaderTessellationAndGeometryPointSize = false,
|
||||
.shaderImageGatherExtended = true,
|
||||
.shaderStorageImageExtendedFormats = false,
|
||||
.shaderStorageImageMultisample = false,
|
||||
.shaderStorageImageMultisample = true,
|
||||
.shaderStorageImageReadWithoutFormat = is_formatless_image_load_supported,
|
||||
.shaderStorageImageWriteWithoutFormat = true,
|
||||
.shaderUniformBufferArrayDynamicIndexing = false,
|
||||
|
@ -268,7 +284,6 @@ bool VKDevice::Create() {
|
|||
.variableMultisampleRate = false,
|
||||
.inheritedQueries = false,
|
||||
};
|
||||
|
||||
VkPhysicalDeviceTimelineSemaphoreFeaturesKHR timeline_semaphore{
|
||||
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TIMELINE_SEMAPHORE_FEATURES_KHR,
|
||||
.pNext = nullptr,
|
||||
|
@ -380,6 +395,20 @@ bool VKDevice::Create() {
|
|||
LOG_INFO(Render_Vulkan, "Device doesn't support extended dynamic state");
|
||||
}
|
||||
|
||||
VkPhysicalDeviceRobustness2FeaturesEXT robustness2;
|
||||
if (ext_robustness2) {
|
||||
robustness2 = {
|
||||
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ROBUSTNESS_2_FEATURES_EXT,
|
||||
.pNext = nullptr,
|
||||
.robustBufferAccess2 = false,
|
||||
.robustImageAccess2 = true,
|
||||
.nullDescriptor = true,
|
||||
};
|
||||
SetNext(next, robustness2);
|
||||
} else {
|
||||
LOG_INFO(Render_Vulkan, "Device doesn't support robustness2");
|
||||
}
|
||||
|
||||
if (!ext_depth_range_unrestricted) {
|
||||
LOG_INFO(Render_Vulkan, "Device doesn't support depth range unrestricted");
|
||||
}
|
||||
|
@ -405,7 +434,14 @@ bool VKDevice::Create() {
|
|||
}
|
||||
|
||||
CollectTelemetryParameters();
|
||||
CollectToolingInfo();
|
||||
|
||||
if (ext_extended_dynamic_state && driver_id == VK_DRIVER_ID_MESA_RADV) {
|
||||
LOG_WARNING(
|
||||
Render_Vulkan,
|
||||
"Blacklisting RADV for VK_EXT_extended_dynamic state, likely due to a bug in yuzu");
|
||||
ext_extended_dynamic_state = false;
|
||||
}
|
||||
if (ext_extended_dynamic_state && IsRDNA(properties.deviceName, driver_id)) {
|
||||
// AMD's proprietary driver supports VK_EXT_extended_dynamic_state but on RDNA devices it
|
||||
// seems to cause stability issues
|
||||
|
@ -458,7 +494,7 @@ void VKDevice::ReportLoss() const {
|
|||
LOG_CRITICAL(Render_Vulkan, "Device loss occured!");
|
||||
|
||||
// Wait for the log to flush and for Nsight Aftermath to dump the results
|
||||
std::this_thread::sleep_for(std::chrono::seconds{3});
|
||||
std::this_thread::sleep_for(std::chrono::seconds{15});
|
||||
}
|
||||
|
||||
void VKDevice::SaveShader(const std::vector<u32>& spirv) const {
|
||||
|
@ -499,6 +535,16 @@ bool VKDevice::IsOptimalAstcSupported(const VkPhysicalDeviceFeatures& features)
|
|||
return true;
|
||||
}
|
||||
|
||||
bool VKDevice::TestDepthStencilBlits() const {
|
||||
static constexpr VkFormatFeatureFlags required_features =
|
||||
VK_FORMAT_FEATURE_BLIT_SRC_BIT | VK_FORMAT_FEATURE_BLIT_DST_BIT;
|
||||
const auto test_features = [](VkFormatProperties props) {
|
||||
return (props.optimalTilingFeatures & required_features) == required_features;
|
||||
};
|
||||
return test_features(format_properties.at(VK_FORMAT_D32_SFLOAT_S8_UINT)) &&
|
||||
test_features(format_properties.at(VK_FORMAT_D24_UNORM_S8_UINT));
|
||||
}
|
||||
|
||||
bool VKDevice::IsFormatSupported(VkFormat wanted_format, VkFormatFeatureFlags wanted_usage,
|
||||
FormatType format_type) const {
|
||||
const auto it = format_properties.find(wanted_format);
|
||||
|
@ -569,6 +615,7 @@ bool VKDevice::IsSuitable(vk::PhysicalDevice physical, VkSurfaceKHR surface) {
|
|||
const auto features{physical.GetFeatures()};
|
||||
const std::array feature_report = {
|
||||
std::make_pair(features.vertexPipelineStoresAndAtomics, "vertexPipelineStoresAndAtomics"),
|
||||
std::make_pair(features.imageCubeArray, "imageCubeArray"),
|
||||
std::make_pair(features.independentBlend, "independentBlend"),
|
||||
std::make_pair(features.depthClamp, "depthClamp"),
|
||||
std::make_pair(features.samplerAnisotropy, "samplerAnisotropy"),
|
||||
|
@ -580,6 +627,7 @@ bool VKDevice::IsSuitable(vk::PhysicalDevice physical, VkSurfaceKHR surface) {
|
|||
std::make_pair(features.occlusionQueryPrecise, "occlusionQueryPrecise"),
|
||||
std::make_pair(features.fragmentStoresAndAtomics, "fragmentStoresAndAtomics"),
|
||||
std::make_pair(features.shaderImageGatherExtended, "shaderImageGatherExtended"),
|
||||
std::make_pair(features.shaderStorageImageMultisample, "shaderStorageImageMultisample"),
|
||||
std::make_pair(features.shaderStorageImageWriteWithoutFormat,
|
||||
"shaderStorageImageWriteWithoutFormat"),
|
||||
};
|
||||
|
@ -608,6 +656,7 @@ std::vector<const char*> VKDevice::LoadExtensions() {
|
|||
bool has_ext_transform_feedback{};
|
||||
bool has_ext_custom_border_color{};
|
||||
bool has_ext_extended_dynamic_state{};
|
||||
bool has_ext_robustness2{};
|
||||
for (const VkExtensionProperties& extension : physical.EnumerateDeviceExtensionProperties()) {
|
||||
const auto test = [&](std::optional<std::reference_wrapper<bool>> status, const char* name,
|
||||
bool push) {
|
||||
|
@ -627,11 +676,15 @@ std::vector<const char*> VKDevice::LoadExtensions() {
|
|||
test(has_khr_shader_float16_int8, VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME, false);
|
||||
test(ext_depth_range_unrestricted, VK_EXT_DEPTH_RANGE_UNRESTRICTED_EXTENSION_NAME, true);
|
||||
test(ext_index_type_uint8, VK_EXT_INDEX_TYPE_UINT8_EXTENSION_NAME, true);
|
||||
test(ext_sampler_filter_minmax, VK_EXT_SAMPLER_FILTER_MINMAX_EXTENSION_NAME, true);
|
||||
test(ext_shader_viewport_index_layer, VK_EXT_SHADER_VIEWPORT_INDEX_LAYER_EXTENSION_NAME,
|
||||
true);
|
||||
test(ext_tooling_info, VK_EXT_TOOLING_INFO_EXTENSION_NAME, true);
|
||||
test(ext_shader_stencil_export, VK_EXT_SHADER_STENCIL_EXPORT_EXTENSION_NAME, true);
|
||||
test(has_ext_transform_feedback, VK_EXT_TRANSFORM_FEEDBACK_EXTENSION_NAME, false);
|
||||
test(has_ext_custom_border_color, VK_EXT_CUSTOM_BORDER_COLOR_EXTENSION_NAME, false);
|
||||
test(has_ext_extended_dynamic_state, VK_EXT_EXTENDED_DYNAMIC_STATE_EXTENSION_NAME, false);
|
||||
test(has_ext_robustness2, VK_EXT_ROBUSTNESS_2_EXTENSION_NAME, false);
|
||||
if (instance_version >= VK_API_VERSION_1_1) {
|
||||
test(has_ext_subgroup_size_control, VK_EXT_SUBGROUP_SIZE_CONTROL_EXTENSION_NAME, false);
|
||||
}
|
||||
|
@ -733,6 +786,18 @@ std::vector<const char*> VKDevice::LoadExtensions() {
|
|||
}
|
||||
}
|
||||
|
||||
if (has_ext_robustness2) {
|
||||
VkPhysicalDeviceRobustness2FeaturesEXT robustness2;
|
||||
robustness2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ROBUSTNESS_2_FEATURES_EXT;
|
||||
robustness2.pNext = nullptr;
|
||||
features.pNext = &robustness2;
|
||||
physical.GetFeatures2KHR(features);
|
||||
if (robustness2.nullDescriptor && robustness2.robustImageAccess2) {
|
||||
extensions.push_back(VK_EXT_ROBUSTNESS_2_EXTENSION_NAME);
|
||||
ext_robustness2 = true;
|
||||
}
|
||||
}
|
||||
|
||||
return extensions;
|
||||
}
|
||||
|
||||
|
@ -764,6 +829,7 @@ void VKDevice::SetupFamilies(VkSurfaceKHR surface) {
|
|||
void VKDevice::SetupFeatures() {
|
||||
const auto supported_features{physical.GetFeatures()};
|
||||
is_formatless_image_load_supported = supported_features.shaderStorageImageReadWithoutFormat;
|
||||
is_blit_depth_stencil_supported = TestDepthStencilBlits();
|
||||
is_optimal_astc_supported = IsOptimalAstcSupported(supported_features);
|
||||
}
|
||||
|
||||
|
@ -794,6 +860,32 @@ void VKDevice::CollectTelemetryParameters() {
|
|||
}
|
||||
}
|
||||
|
||||
void VKDevice::CollectToolingInfo() {
|
||||
if (!ext_tooling_info) {
|
||||
return;
|
||||
}
|
||||
const auto vkGetPhysicalDeviceToolPropertiesEXT =
|
||||
reinterpret_cast<PFN_vkGetPhysicalDeviceToolPropertiesEXT>(
|
||||
dld.vkGetInstanceProcAddr(instance, "vkGetPhysicalDeviceToolPropertiesEXT"));
|
||||
if (!vkGetPhysicalDeviceToolPropertiesEXT) {
|
||||
return;
|
||||
}
|
||||
u32 tool_count = 0;
|
||||
if (vkGetPhysicalDeviceToolPropertiesEXT(physical, &tool_count, nullptr) != VK_SUCCESS) {
|
||||
return;
|
||||
}
|
||||
std::vector<VkPhysicalDeviceToolPropertiesEXT> tools(tool_count);
|
||||
if (vkGetPhysicalDeviceToolPropertiesEXT(physical, &tool_count, tools.data()) != VK_SUCCESS) {
|
||||
return;
|
||||
}
|
||||
for (const VkPhysicalDeviceToolPropertiesEXT& tool : tools) {
|
||||
const std::string_view name = tool.name;
|
||||
LOG_INFO(Render_Vulkan, "{}", name);
|
||||
has_renderdoc = has_renderdoc || name == "RenderDoc";
|
||||
has_nsight_graphics = has_nsight_graphics || name == "NVIDIA Nsight Graphics";
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<VkDeviceQueueCreateInfo> VKDevice::GetDeviceQueueCreateInfos() const {
|
||||
static constexpr float QUEUE_PRIORITY = 1.0f;
|
||||
|
||||
|
|
|
@ -157,6 +157,11 @@ public:
|
|||
return is_formatless_image_load_supported;
|
||||
}
|
||||
|
||||
/// Returns true when blitting from and to depth stencil images is supported.
|
||||
bool IsBlitDepthStencilSupported() const {
|
||||
return is_blit_depth_stencil_supported;
|
||||
}
|
||||
|
||||
/// Returns true if the device supports VK_NV_viewport_swizzle.
|
||||
bool IsNvViewportSwizzleSupported() const {
|
||||
return nv_viewport_swizzle;
|
||||
|
@ -172,6 +177,11 @@ public:
|
|||
return ext_index_type_uint8;
|
||||
}
|
||||
|
||||
/// Returns true if the device supports VK_EXT_sampler_filter_minmax.
|
||||
bool IsExtSamplerFilterMinmaxSupported() const {
|
||||
return ext_sampler_filter_minmax;
|
||||
}
|
||||
|
||||
/// Returns true if the device supports VK_EXT_depth_range_unrestricted.
|
||||
bool IsExtDepthRangeUnrestrictedSupported() const {
|
||||
return ext_depth_range_unrestricted;
|
||||
|
@ -197,6 +207,16 @@ public:
|
|||
return ext_extended_dynamic_state;
|
||||
}
|
||||
|
||||
/// Returns true if the device supports VK_EXT_shader_stencil_export.
|
||||
bool IsExtShaderStencilExportSupported() const {
|
||||
return ext_shader_stencil_export;
|
||||
}
|
||||
|
||||
/// Returns true when a known debugging tool is attached.
|
||||
bool HasDebuggingToolAttached() const {
|
||||
return has_renderdoc || has_nsight_graphics;
|
||||
}
|
||||
|
||||
/// Returns the vendor name reported from Vulkan.
|
||||
std::string_view GetVendorName() const {
|
||||
return vendor_name;
|
||||
|
@ -228,16 +248,23 @@ private:
|
|||
/// Collects telemetry information from the device.
|
||||
void CollectTelemetryParameters();
|
||||
|
||||
/// Collects information about attached tools.
|
||||
void CollectToolingInfo();
|
||||
|
||||
/// Returns a list of queue initialization descriptors.
|
||||
std::vector<VkDeviceQueueCreateInfo> GetDeviceQueueCreateInfos() const;
|
||||
|
||||
/// Returns true if ASTC textures are natively supported.
|
||||
bool IsOptimalAstcSupported(const VkPhysicalDeviceFeatures& features) const;
|
||||
|
||||
/// Returns true if the device natively supports blitting depth stencil images.
|
||||
bool TestDepthStencilBlits() const;
|
||||
|
||||
/// Returns true if a format is supported.
|
||||
bool IsFormatSupported(VkFormat wanted_format, VkFormatFeatureFlags wanted_usage,
|
||||
FormatType format_type) const;
|
||||
|
||||
VkInstance instance; ///< Vulkan instance.
|
||||
vk::DeviceDispatch dld; ///< Device function pointers.
|
||||
vk::PhysicalDevice physical; ///< Physical device.
|
||||
VkPhysicalDeviceProperties properties; ///< Device properties.
|
||||
|
@ -253,15 +280,22 @@ private:
|
|||
bool is_float16_supported{}; ///< Support for float16 arithmetics.
|
||||
bool is_warp_potentially_bigger{}; ///< Host warp size can be bigger than guest.
|
||||
bool is_formatless_image_load_supported{}; ///< Support for shader image read without format.
|
||||
bool is_blit_depth_stencil_supported{}; ///< Support for blitting from and to depth stencil.
|
||||
bool nv_viewport_swizzle{}; ///< Support for VK_NV_viewport_swizzle.
|
||||
bool khr_uniform_buffer_standard_layout{}; ///< Support for std430 on UBOs.
|
||||
bool ext_index_type_uint8{}; ///< Support for VK_EXT_index_type_uint8.
|
||||
bool ext_sampler_filter_minmax{}; ///< Support for VK_EXT_sampler_filter_minmax.
|
||||
bool ext_depth_range_unrestricted{}; ///< Support for VK_EXT_depth_range_unrestricted.
|
||||
bool ext_shader_viewport_index_layer{}; ///< Support for VK_EXT_shader_viewport_index_layer.
|
||||
bool ext_tooling_info{}; ///< Support for VK_EXT_tooling_info.
|
||||
bool ext_transform_feedback{}; ///< Support for VK_EXT_transform_feedback.
|
||||
bool ext_custom_border_color{}; ///< Support for VK_EXT_custom_border_color.
|
||||
bool ext_extended_dynamic_state{}; ///< Support for VK_EXT_extended_dynamic_state.
|
||||
bool ext_robustness2{}; ///< Support for VK_EXT_robustness2.
|
||||
bool ext_shader_stencil_export{}; ///< Support for VK_EXT_shader_stencil_export.
|
||||
bool nv_device_diagnostics_config{}; ///< Support for VK_NV_device_diagnostics_config.
|
||||
bool has_renderdoc{}; ///< Has RenderDoc attached
|
||||
bool has_nsight_graphics{}; ///< Has Nsight Graphics attached
|
||||
|
||||
// Asynchronous Graphics Pipeline setting
|
||||
bool use_asynchronous_shaders{}; ///< Setting to use asynchronous shaders/graphics pipeline
|
||||
|
|
|
@ -73,10 +73,9 @@ bool InnerFence::IsEventSignalled() const {
|
|||
}
|
||||
|
||||
VKFenceManager::VKFenceManager(VideoCore::RasterizerInterface& rasterizer_, Tegra::GPU& gpu_,
|
||||
Tegra::MemoryManager& memory_manager_,
|
||||
VKTextureCache& texture_cache_, VKBufferCache& buffer_cache_,
|
||||
VKQueryCache& query_cache_, const VKDevice& device_,
|
||||
VKScheduler& scheduler_)
|
||||
Tegra::MemoryManager& memory_manager_, TextureCache& texture_cache_,
|
||||
VKBufferCache& buffer_cache_, VKQueryCache& query_cache_,
|
||||
const VKDevice& device_, VKScheduler& scheduler_)
|
||||
: GenericFenceManager{rasterizer_, gpu_, texture_cache_, buffer_cache_, query_cache_},
|
||||
device{device_}, scheduler{scheduler_} {}
|
||||
|
||||
|
|
|
@ -8,6 +8,7 @@
|
|||
|
||||
#include "video_core/fence_manager.h"
|
||||
#include "video_core/renderer_vulkan/vk_buffer_cache.h"
|
||||
#include "video_core/renderer_vulkan/vk_texture_cache.h"
|
||||
#include "video_core/renderer_vulkan/wrapper.h"
|
||||
|
||||
namespace Core {
|
||||
|
@ -24,7 +25,6 @@ class VKBufferCache;
|
|||
class VKDevice;
|
||||
class VKQueryCache;
|
||||
class VKScheduler;
|
||||
class VKTextureCache;
|
||||
|
||||
class InnerFence : public VideoCommon::FenceBase {
|
||||
public:
|
||||
|
@ -51,12 +51,12 @@ private:
|
|||
using Fence = std::shared_ptr<InnerFence>;
|
||||
|
||||
using GenericFenceManager =
|
||||
VideoCommon::FenceManager<Fence, VKTextureCache, VKBufferCache, VKQueryCache>;
|
||||
VideoCommon::FenceManager<Fence, TextureCache, VKBufferCache, VKQueryCache>;
|
||||
|
||||
class VKFenceManager final : public GenericFenceManager {
|
||||
public:
|
||||
explicit VKFenceManager(VideoCore::RasterizerInterface& rasterizer_, Tegra::GPU& gpu_,
|
||||
Tegra::MemoryManager& memory_manager_, VKTextureCache& texture_cache_,
|
||||
Tegra::MemoryManager& memory_manager_, TextureCache& texture_cache_,
|
||||
VKBufferCache& buffer_cache_, VKQueryCache& query_cache_,
|
||||
const VKDevice& device_, VKScheduler& scheduler_);
|
||||
|
||||
|
|
|
@ -15,7 +15,6 @@
|
|||
#include "video_core/renderer_vulkan/vk_device.h"
|
||||
#include "video_core/renderer_vulkan/vk_graphics_pipeline.h"
|
||||
#include "video_core/renderer_vulkan/vk_pipeline_cache.h"
|
||||
#include "video_core/renderer_vulkan/vk_renderpass_cache.h"
|
||||
#include "video_core/renderer_vulkan/vk_scheduler.h"
|
||||
#include "video_core/renderer_vulkan/vk_update_descriptor.h"
|
||||
#include "video_core/renderer_vulkan/wrapper.h"
|
||||
|
@ -69,23 +68,45 @@ VkViewportSwizzleNV UnpackViewportSwizzle(u16 swizzle) {
|
|||
};
|
||||
}
|
||||
|
||||
VkSampleCountFlagBits ConvertMsaaMode(Tegra::Texture::MsaaMode msaa_mode) {
|
||||
switch (msaa_mode) {
|
||||
case Tegra::Texture::MsaaMode::Msaa1x1:
|
||||
return VK_SAMPLE_COUNT_1_BIT;
|
||||
case Tegra::Texture::MsaaMode::Msaa2x1:
|
||||
case Tegra::Texture::MsaaMode::Msaa2x1_D3D:
|
||||
return VK_SAMPLE_COUNT_2_BIT;
|
||||
case Tegra::Texture::MsaaMode::Msaa2x2:
|
||||
case Tegra::Texture::MsaaMode::Msaa2x2_VC4:
|
||||
case Tegra::Texture::MsaaMode::Msaa2x2_VC12:
|
||||
return VK_SAMPLE_COUNT_4_BIT;
|
||||
case Tegra::Texture::MsaaMode::Msaa4x2:
|
||||
case Tegra::Texture::MsaaMode::Msaa4x2_D3D:
|
||||
case Tegra::Texture::MsaaMode::Msaa4x2_VC8:
|
||||
case Tegra::Texture::MsaaMode::Msaa4x2_VC24:
|
||||
return VK_SAMPLE_COUNT_8_BIT;
|
||||
case Tegra::Texture::MsaaMode::Msaa4x4:
|
||||
return VK_SAMPLE_COUNT_16_BIT;
|
||||
default:
|
||||
UNREACHABLE_MSG("Invalid msaa_mode={}", static_cast<int>(msaa_mode));
|
||||
return VK_SAMPLE_COUNT_1_BIT;
|
||||
}
|
||||
}
|
||||
|
||||
} // Anonymous namespace
|
||||
|
||||
VKGraphicsPipeline::VKGraphicsPipeline(const VKDevice& device_, VKScheduler& scheduler_,
|
||||
VKDescriptorPool& descriptor_pool_,
|
||||
VKUpdateDescriptorQueue& update_descriptor_queue_,
|
||||
VKRenderPassCache& renderpass_cache_,
|
||||
const GraphicsPipelineCacheKey& key_,
|
||||
vk::Span<VkDescriptorSetLayoutBinding> bindings_,
|
||||
const SPIRVProgram& program_)
|
||||
: device{device_}, scheduler{scheduler_}, cache_key{key_}, hash{cache_key.Hash()},
|
||||
descriptor_set_layout{CreateDescriptorSetLayout(bindings_)},
|
||||
const GraphicsPipelineCacheKey& key,
|
||||
vk::Span<VkDescriptorSetLayoutBinding> bindings,
|
||||
const SPIRVProgram& program, u32 num_color_buffers)
|
||||
: device{device_}, scheduler{scheduler_}, cache_key{key}, hash{cache_key.Hash()},
|
||||
descriptor_set_layout{CreateDescriptorSetLayout(bindings)},
|
||||
descriptor_allocator{descriptor_pool_, *descriptor_set_layout},
|
||||
update_descriptor_queue{update_descriptor_queue_}, layout{CreatePipelineLayout()},
|
||||
descriptor_template{CreateDescriptorUpdateTemplate(program_)}, modules{CreateShaderModules(
|
||||
program_)},
|
||||
renderpass{renderpass_cache_.GetRenderPass(cache_key.renderpass_params)},
|
||||
pipeline{CreatePipeline(cache_key.renderpass_params, program_)} {}
|
||||
descriptor_template{CreateDescriptorUpdateTemplate(program)},
|
||||
modules(CreateShaderModules(program)),
|
||||
pipeline(CreatePipeline(program, cache_key.renderpass, num_color_buffers)) {}
|
||||
|
||||
VKGraphicsPipeline::~VKGraphicsPipeline() = default;
|
||||
|
||||
|
@ -179,8 +200,9 @@ std::vector<vk::ShaderModule> VKGraphicsPipeline::CreateShaderModules(
|
|||
return shader_modules;
|
||||
}
|
||||
|
||||
vk::Pipeline VKGraphicsPipeline::CreatePipeline(const RenderPassParams& renderpass_params,
|
||||
const SPIRVProgram& program) const {
|
||||
vk::Pipeline VKGraphicsPipeline::CreatePipeline(const SPIRVProgram& program,
|
||||
VkRenderPass renderpass,
|
||||
u32 num_color_buffers) const {
|
||||
const auto& state = cache_key.fixed_state;
|
||||
const auto& viewport_swizzles = state.viewport_swizzles;
|
||||
|
||||
|
@ -290,8 +312,7 @@ vk::Pipeline VKGraphicsPipeline::CreatePipeline(const RenderPassParams& renderpa
|
|||
};
|
||||
|
||||
std::array<VkViewportSwizzleNV, Maxwell::NumViewports> swizzles;
|
||||
std::transform(viewport_swizzles.begin(), viewport_swizzles.end(), swizzles.begin(),
|
||||
UnpackViewportSwizzle);
|
||||
std::ranges::transform(viewport_swizzles, swizzles.begin(), UnpackViewportSwizzle);
|
||||
VkPipelineViewportSwizzleStateCreateInfoNV swizzle_ci{
|
||||
.sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_SWIZZLE_STATE_CREATE_INFO_NV,
|
||||
.pNext = nullptr,
|
||||
|
@ -326,7 +347,7 @@ vk::Pipeline VKGraphicsPipeline::CreatePipeline(const RenderPassParams& renderpa
|
|||
.sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
|
||||
.pNext = nullptr,
|
||||
.flags = 0,
|
||||
.rasterizationSamples = VK_SAMPLE_COUNT_1_BIT,
|
||||
.rasterizationSamples = ConvertMsaaMode(state.msaa_mode),
|
||||
.sampleShadingEnable = VK_FALSE,
|
||||
.minSampleShading = 0.0f,
|
||||
.pSampleMask = nullptr,
|
||||
|
@ -352,8 +373,7 @@ vk::Pipeline VKGraphicsPipeline::CreatePipeline(const RenderPassParams& renderpa
|
|||
};
|
||||
|
||||
std::array<VkPipelineColorBlendAttachmentState, Maxwell::NumRenderTargets> cb_attachments;
|
||||
const auto num_attachments = static_cast<std::size_t>(renderpass_params.num_color_attachments);
|
||||
for (std::size_t index = 0; index < num_attachments; ++index) {
|
||||
for (std::size_t index = 0; index < num_color_buffers; ++index) {
|
||||
static constexpr std::array COMPONENT_TABLE{
|
||||
VK_COLOR_COMPONENT_R_BIT,
|
||||
VK_COLOR_COMPONENT_G_BIT,
|
||||
|
@ -387,7 +407,7 @@ vk::Pipeline VKGraphicsPipeline::CreatePipeline(const RenderPassParams& renderpa
|
|||
.flags = 0,
|
||||
.logicOpEnable = VK_FALSE,
|
||||
.logicOp = VK_LOGIC_OP_COPY,
|
||||
.attachmentCount = static_cast<u32>(num_attachments),
|
||||
.attachmentCount = num_color_buffers,
|
||||
.pAttachments = cb_attachments.data(),
|
||||
.blendConstants = {},
|
||||
};
|
||||
|
@ -447,8 +467,7 @@ vk::Pipeline VKGraphicsPipeline::CreatePipeline(const RenderPassParams& renderpa
|
|||
stage_ci.pNext = &subgroup_size_ci;
|
||||
}
|
||||
}
|
||||
|
||||
const VkGraphicsPipelineCreateInfo ci{
|
||||
return device.GetLogical().CreateGraphicsPipeline(VkGraphicsPipelineCreateInfo{
|
||||
.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
|
||||
.pNext = nullptr,
|
||||
.flags = 0,
|
||||
|
@ -468,8 +487,7 @@ vk::Pipeline VKGraphicsPipeline::CreatePipeline(const RenderPassParams& renderpa
|
|||
.subpass = 0,
|
||||
.basePipelineHandle = nullptr,
|
||||
.basePipelineIndex = 0,
|
||||
};
|
||||
return device.GetLogical().CreateGraphicsPipeline(ci);
|
||||
});
|
||||
}
|
||||
|
||||
} // namespace Vulkan
|
||||
|
|
|
@ -8,10 +8,10 @@
|
|||
#include <optional>
|
||||
#include <vector>
|
||||
|
||||
#include "common/common_types.h"
|
||||
#include "video_core/engines/maxwell_3d.h"
|
||||
#include "video_core/renderer_vulkan/fixed_pipeline_state.h"
|
||||
#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
|
||||
#include "video_core/renderer_vulkan/vk_renderpass_cache.h"
|
||||
#include "video_core/renderer_vulkan/vk_shader_decompiler.h"
|
||||
#include "video_core/renderer_vulkan/wrapper.h"
|
||||
|
||||
|
@ -20,8 +20,7 @@ namespace Vulkan {
|
|||
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
|
||||
|
||||
struct GraphicsPipelineCacheKey {
|
||||
RenderPassParams renderpass_params;
|
||||
u32 padding;
|
||||
VkRenderPass renderpass;
|
||||
std::array<GPUVAddr, Maxwell::MaxShaderProgram> shaders;
|
||||
FixedPipelineState fixed_state;
|
||||
|
||||
|
@ -34,7 +33,7 @@ struct GraphicsPipelineCacheKey {
|
|||
}
|
||||
|
||||
std::size_t Size() const noexcept {
|
||||
return sizeof(renderpass_params) + sizeof(padding) + sizeof(shaders) + fixed_state.Size();
|
||||
return sizeof(renderpass) + sizeof(shaders) + fixed_state.Size();
|
||||
}
|
||||
};
|
||||
static_assert(std::has_unique_object_representations_v<GraphicsPipelineCacheKey>);
|
||||
|
@ -43,7 +42,6 @@ static_assert(std::is_trivially_constructible_v<GraphicsPipelineCacheKey>);
|
|||
|
||||
class VKDescriptorPool;
|
||||
class VKDevice;
|
||||
class VKRenderPassCache;
|
||||
class VKScheduler;
|
||||
class VKUpdateDescriptorQueue;
|
||||
|
||||
|
@ -52,12 +50,11 @@ using SPIRVProgram = std::array<std::optional<SPIRVShader>, Maxwell::MaxShaderSt
|
|||
class VKGraphicsPipeline final {
|
||||
public:
|
||||
explicit VKGraphicsPipeline(const VKDevice& device_, VKScheduler& scheduler_,
|
||||
VKDescriptorPool& descriptor_pool_,
|
||||
VKDescriptorPool& descriptor_pool,
|
||||
VKUpdateDescriptorQueue& update_descriptor_queue_,
|
||||
VKRenderPassCache& renderpass_cache_,
|
||||
const GraphicsPipelineCacheKey& key_,
|
||||
vk::Span<VkDescriptorSetLayoutBinding> bindings_,
|
||||
const SPIRVProgram& program_);
|
||||
const GraphicsPipelineCacheKey& key,
|
||||
vk::Span<VkDescriptorSetLayoutBinding> bindings,
|
||||
const SPIRVProgram& program, u32 num_color_buffers);
|
||||
~VKGraphicsPipeline();
|
||||
|
||||
VkDescriptorSet CommitDescriptorSet();
|
||||
|
@ -70,10 +67,6 @@ public:
|
|||
return *layout;
|
||||
}
|
||||
|
||||
VkRenderPass GetRenderPass() const {
|
||||
return renderpass;
|
||||
}
|
||||
|
||||
GraphicsPipelineCacheKey GetCacheKey() const {
|
||||
return cache_key;
|
||||
}
|
||||
|
@ -89,8 +82,8 @@ private:
|
|||
|
||||
std::vector<vk::ShaderModule> CreateShaderModules(const SPIRVProgram& program) const;
|
||||
|
||||
vk::Pipeline CreatePipeline(const RenderPassParams& renderpass_params,
|
||||
const SPIRVProgram& program) const;
|
||||
vk::Pipeline CreatePipeline(const SPIRVProgram& program, VkRenderPass renderpass,
|
||||
u32 num_color_buffers) const;
|
||||
|
||||
const VKDevice& device;
|
||||
VKScheduler& scheduler;
|
||||
|
@ -104,7 +97,6 @@ private:
|
|||
vk::DescriptorUpdateTemplateKHR descriptor_template;
|
||||
std::vector<vk::ShaderModule> modules;
|
||||
|
||||
VkRenderPass renderpass;
|
||||
vk::Pipeline pipeline;
|
||||
};
|
||||
|
||||
|
|
|
@ -1,135 +0,0 @@
|
|||
// Copyright 2018 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
|
||||
#include "common/assert.h"
|
||||
#include "video_core/renderer_vulkan/vk_device.h"
|
||||
#include "video_core/renderer_vulkan/vk_image.h"
|
||||
#include "video_core/renderer_vulkan/vk_scheduler.h"
|
||||
#include "video_core/renderer_vulkan/wrapper.h"
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
VKImage::VKImage(const VKDevice& device_, VKScheduler& scheduler_,
|
||||
const VkImageCreateInfo& image_ci_, VkImageAspectFlags aspect_mask_)
|
||||
: device{device_}, scheduler{scheduler_}, format{image_ci_.format}, aspect_mask{aspect_mask_},
|
||||
image_num_layers{image_ci_.arrayLayers}, image_num_levels{image_ci_.mipLevels} {
|
||||
UNIMPLEMENTED_IF_MSG(image_ci_.queueFamilyIndexCount != 0,
|
||||
"Queue family tracking is not implemented");
|
||||
|
||||
image = device_.GetLogical().CreateImage(image_ci_);
|
||||
|
||||
const u32 num_ranges = image_num_layers * image_num_levels;
|
||||
barriers.resize(num_ranges);
|
||||
subrange_states.resize(num_ranges, {{}, image_ci_.initialLayout});
|
||||
}
|
||||
|
||||
VKImage::~VKImage() = default;
|
||||
|
||||
void VKImage::Transition(u32 base_layer, u32 num_layers, u32 base_level, u32 num_levels,
|
||||
VkPipelineStageFlags new_stage_mask, VkAccessFlags new_access,
|
||||
VkImageLayout new_layout) {
|
||||
if (!HasChanged(base_layer, num_layers, base_level, num_levels, new_access, new_layout)) {
|
||||
return;
|
||||
}
|
||||
|
||||
std::size_t cursor = 0;
|
||||
for (u32 layer_it = 0; layer_it < num_layers; ++layer_it) {
|
||||
for (u32 level_it = 0; level_it < num_levels; ++level_it, ++cursor) {
|
||||
const u32 layer = base_layer + layer_it;
|
||||
const u32 level = base_level + level_it;
|
||||
auto& state = GetSubrangeState(layer, level);
|
||||
auto& barrier = barriers[cursor];
|
||||
barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER;
|
||||
barrier.pNext = nullptr;
|
||||
barrier.srcAccessMask = state.access;
|
||||
barrier.dstAccessMask = new_access;
|
||||
barrier.oldLayout = state.layout;
|
||||
barrier.newLayout = new_layout;
|
||||
barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
|
||||
barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
|
||||
barrier.image = *image;
|
||||
barrier.subresourceRange.aspectMask = aspect_mask;
|
||||
barrier.subresourceRange.baseMipLevel = level;
|
||||
barrier.subresourceRange.levelCount = 1;
|
||||
barrier.subresourceRange.baseArrayLayer = layer;
|
||||
barrier.subresourceRange.layerCount = 1;
|
||||
state.access = new_access;
|
||||
state.layout = new_layout;
|
||||
}
|
||||
}
|
||||
|
||||
scheduler.RequestOutsideRenderPassOperationContext();
|
||||
|
||||
scheduler.Record([barriers = barriers, cursor](vk::CommandBuffer cmdbuf) {
|
||||
// TODO(Rodrigo): Implement a way to use the latest stage across subresources.
|
||||
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
|
||||
VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, 0, {}, {},
|
||||
vk::Span(barriers.data(), cursor));
|
||||
});
|
||||
}
|
||||
|
||||
bool VKImage::HasChanged(u32 base_layer, u32 num_layers, u32 base_level, u32 num_levels,
|
||||
VkAccessFlags new_access, VkImageLayout new_layout) noexcept {
|
||||
const bool is_full_range = base_layer == 0 && num_layers == image_num_layers &&
|
||||
base_level == 0 && num_levels == image_num_levels;
|
||||
if (!is_full_range) {
|
||||
state_diverged = true;
|
||||
}
|
||||
|
||||
if (!state_diverged) {
|
||||
auto& state = GetSubrangeState(0, 0);
|
||||
if (state.access != new_access || state.layout != new_layout) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
for (u32 layer_it = 0; layer_it < num_layers; ++layer_it) {
|
||||
for (u32 level_it = 0; level_it < num_levels; ++level_it) {
|
||||
const u32 layer = base_layer + layer_it;
|
||||
const u32 level = base_level + level_it;
|
||||
auto& state = GetSubrangeState(layer, level);
|
||||
if (state.access != new_access || state.layout != new_layout) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
void VKImage::CreatePresentView() {
|
||||
// Image type has to be 2D to be presented.
|
||||
present_view = device.GetLogical().CreateImageView({
|
||||
.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
|
||||
.pNext = nullptr,
|
||||
.flags = 0,
|
||||
.image = *image,
|
||||
.viewType = VK_IMAGE_VIEW_TYPE_2D,
|
||||
.format = format,
|
||||
.components =
|
||||
{
|
||||
.r = VK_COMPONENT_SWIZZLE_IDENTITY,
|
||||
.g = VK_COMPONENT_SWIZZLE_IDENTITY,
|
||||
.b = VK_COMPONENT_SWIZZLE_IDENTITY,
|
||||
.a = VK_COMPONENT_SWIZZLE_IDENTITY,
|
||||
},
|
||||
.subresourceRange =
|
||||
{
|
||||
.aspectMask = aspect_mask,
|
||||
.baseMipLevel = 0,
|
||||
.levelCount = 1,
|
||||
.baseArrayLayer = 0,
|
||||
.layerCount = 1,
|
||||
},
|
||||
});
|
||||
}
|
||||
|
||||
VKImage::SubrangeState& VKImage::GetSubrangeState(u32 layer, u32 level) noexcept {
|
||||
return subrange_states[static_cast<std::size_t>(layer * image_num_levels) +
|
||||
static_cast<std::size_t>(level)];
|
||||
}
|
||||
|
||||
} // namespace Vulkan
|
|
@ -1,84 +0,0 @@
|
|||
// Copyright 2018 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
|
||||
#include "common/common_types.h"
|
||||
#include "video_core/renderer_vulkan/wrapper.h"
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
class VKDevice;
|
||||
class VKScheduler;
|
||||
|
||||
class VKImage {
|
||||
public:
|
||||
explicit VKImage(const VKDevice& device_, VKScheduler& scheduler_,
|
||||
const VkImageCreateInfo& image_ci_, VkImageAspectFlags aspect_mask_);
|
||||
~VKImage();
|
||||
|
||||
/// Records in the passed command buffer an image transition and updates the state of the image.
|
||||
void Transition(u32 base_layer, u32 num_layers, u32 base_level, u32 num_levels,
|
||||
VkPipelineStageFlags new_stage_mask, VkAccessFlags new_access,
|
||||
VkImageLayout new_layout);
|
||||
|
||||
/// Returns a view compatible with presentation, the image has to be 2D.
|
||||
VkImageView GetPresentView() {
|
||||
if (!present_view) {
|
||||
CreatePresentView();
|
||||
}
|
||||
return *present_view;
|
||||
}
|
||||
|
||||
/// Returns the Vulkan image handler.
|
||||
const vk::Image& GetHandle() const {
|
||||
return image;
|
||||
}
|
||||
|
||||
/// Returns the Vulkan format for this image.
|
||||
VkFormat GetFormat() const {
|
||||
return format;
|
||||
}
|
||||
|
||||
/// Returns the Vulkan aspect mask.
|
||||
VkImageAspectFlags GetAspectMask() const {
|
||||
return aspect_mask;
|
||||
}
|
||||
|
||||
private:
|
||||
struct SubrangeState final {
|
||||
VkAccessFlags access = 0; ///< Current access bits.
|
||||
VkImageLayout layout = VK_IMAGE_LAYOUT_UNDEFINED; ///< Current image layout.
|
||||
};
|
||||
|
||||
bool HasChanged(u32 base_layer, u32 num_layers, u32 base_level, u32 num_levels,
|
||||
VkAccessFlags new_access, VkImageLayout new_layout) noexcept;
|
||||
|
||||
/// Creates a presentation view.
|
||||
void CreatePresentView();
|
||||
|
||||
/// Returns the subrange state for a layer and layer.
|
||||
SubrangeState& GetSubrangeState(u32 layer, u32 level) noexcept;
|
||||
|
||||
const VKDevice& device; ///< Device handler.
|
||||
VKScheduler& scheduler; ///< Device scheduler.
|
||||
|
||||
const VkFormat format; ///< Vulkan format.
|
||||
const VkImageAspectFlags aspect_mask; ///< Vulkan aspect mask.
|
||||
const u32 image_num_layers; ///< Number of layers.
|
||||
const u32 image_num_levels; ///< Number of mipmap levels.
|
||||
|
||||
vk::Image image; ///< Image handle.
|
||||
vk::ImageView present_view; ///< Image view compatible with presentation.
|
||||
|
||||
std::vector<VkImageMemoryBarrier> barriers; ///< Pool of barriers.
|
||||
std::vector<SubrangeState> subrange_states; ///< Current subrange state.
|
||||
|
||||
bool state_diverged = false; ///< True when subresources mismatch in layout.
|
||||
};
|
||||
|
||||
} // namespace Vulkan
|
|
@ -216,7 +216,7 @@ VKMemoryCommitImpl::~VKMemoryCommitImpl() {
|
|||
}
|
||||
|
||||
MemoryMap VKMemoryCommitImpl::Map(u64 size, u64 offset_) const {
|
||||
return MemoryMap{this, memory.Map(interval.first + offset_, size)};
|
||||
return MemoryMap(this, std::span<u8>(memory.Map(interval.first + offset_, size), size));
|
||||
}
|
||||
|
||||
void VKMemoryCommitImpl::Unmap() const {
|
||||
|
|
|
@ -5,6 +5,7 @@
|
|||
#pragma once
|
||||
|
||||
#include <memory>
|
||||
#include <span>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
#include "common/common_types.h"
|
||||
|
@ -93,8 +94,8 @@ private:
|
|||
/// Holds ownership of a memory map.
|
||||
class MemoryMap final {
|
||||
public:
|
||||
explicit MemoryMap(const VKMemoryCommitImpl* commit_, u8* address_)
|
||||
: commit{commit_}, address{address_} {}
|
||||
explicit MemoryMap(const VKMemoryCommitImpl* commit_, std::span<u8> span_)
|
||||
: commit{commit_}, span{span_} {}
|
||||
|
||||
~MemoryMap() {
|
||||
if (commit) {
|
||||
|
@ -108,19 +109,24 @@ public:
|
|||
commit = nullptr;
|
||||
}
|
||||
|
||||
/// Returns a span to the memory map.
|
||||
[[nodiscard]] std::span<u8> Span() const noexcept {
|
||||
return span;
|
||||
}
|
||||
|
||||
/// Returns the address of the memory map.
|
||||
u8* GetAddress() const {
|
||||
return address;
|
||||
[[nodiscard]] u8* Address() const noexcept {
|
||||
return span.data();
|
||||
}
|
||||
|
||||
/// Returns the address of the memory map;
|
||||
operator u8*() const {
|
||||
return address;
|
||||
[[nodiscard]] operator u8*() const noexcept {
|
||||
return span.data();
|
||||
}
|
||||
|
||||
private:
|
||||
const VKMemoryCommitImpl* commit{}; ///< Mapped memory commit.
|
||||
u8* address{}; ///< Address to the mapped memory.
|
||||
std::span<u8> span; ///< Address to the mapped memory.
|
||||
};
|
||||
|
||||
} // namespace Vulkan
|
||||
|
|
|
@ -8,6 +8,7 @@
|
|||
#include <vector>
|
||||
|
||||
#include "common/bit_cast.h"
|
||||
#include "common/cityhash.h"
|
||||
#include "common/microprofile.h"
|
||||
#include "core/core.h"
|
||||
#include "core/memory.h"
|
||||
|
@ -22,7 +23,6 @@
|
|||
#include "video_core/renderer_vulkan/vk_graphics_pipeline.h"
|
||||
#include "video_core/renderer_vulkan/vk_pipeline_cache.h"
|
||||
#include "video_core/renderer_vulkan/vk_rasterizer.h"
|
||||
#include "video_core/renderer_vulkan/vk_renderpass_cache.h"
|
||||
#include "video_core/renderer_vulkan/vk_scheduler.h"
|
||||
#include "video_core/renderer_vulkan/vk_update_descriptor.h"
|
||||
#include "video_core/renderer_vulkan/wrapper.h"
|
||||
|
@ -52,7 +52,9 @@ constexpr VkDescriptorType STORAGE_TEXEL_BUFFER = VK_DESCRIPTOR_TYPE_STORAGE_TEX
|
|||
constexpr VkDescriptorType STORAGE_IMAGE = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE;
|
||||
|
||||
constexpr VideoCommon::Shader::CompilerSettings compiler_settings{
|
||||
VideoCommon::Shader::CompileDepth::FullDecompile};
|
||||
.depth = VideoCommon::Shader::CompileDepth::FullDecompile,
|
||||
.disable_else_derivation = true,
|
||||
};
|
||||
|
||||
constexpr std::size_t GetStageFromProgram(std::size_t program) {
|
||||
return program == 0 ? 0 : program - 1;
|
||||
|
@ -149,12 +151,11 @@ VKPipelineCache::VKPipelineCache(RasterizerVulkan& rasterizer_, Tegra::GPU& gpu_
|
|||
Tegra::Engines::KeplerCompute& kepler_compute_,
|
||||
Tegra::MemoryManager& gpu_memory_, const VKDevice& device_,
|
||||
VKScheduler& scheduler_, VKDescriptorPool& descriptor_pool_,
|
||||
VKUpdateDescriptorQueue& update_descriptor_queue_,
|
||||
VKRenderPassCache& renderpass_cache_)
|
||||
: ShaderCache{rasterizer_}, gpu{gpu_}, maxwell3d{maxwell3d_}, kepler_compute{kepler_compute_},
|
||||
gpu_memory{gpu_memory_}, device{device_}, scheduler{scheduler_},
|
||||
descriptor_pool{descriptor_pool_}, update_descriptor_queue{update_descriptor_queue_},
|
||||
renderpass_cache{renderpass_cache_} {}
|
||||
VKUpdateDescriptorQueue& update_descriptor_queue_)
|
||||
: VideoCommon::ShaderCache<Shader>{rasterizer_}, gpu{gpu_}, maxwell3d{maxwell3d_},
|
||||
kepler_compute{kepler_compute_}, gpu_memory{gpu_memory_}, device{device_},
|
||||
scheduler{scheduler_}, descriptor_pool{descriptor_pool_}, update_descriptor_queue{
|
||||
update_descriptor_queue_} {}
|
||||
|
||||
VKPipelineCache::~VKPipelineCache() = default;
|
||||
|
||||
|
@ -199,7 +200,8 @@ std::array<Shader*, Maxwell::MaxShaderProgram> VKPipelineCache::GetShaders() {
|
|||
}
|
||||
|
||||
VKGraphicsPipeline* VKPipelineCache::GetGraphicsPipeline(
|
||||
const GraphicsPipelineCacheKey& key, VideoCommon::Shader::AsyncShaders& async_shaders) {
|
||||
const GraphicsPipelineCacheKey& key, u32 num_color_buffers,
|
||||
VideoCommon::Shader::AsyncShaders& async_shaders) {
|
||||
MICROPROFILE_SCOPE(Vulkan_PipelineCache);
|
||||
|
||||
if (last_graphics_pipeline && last_graphics_key == key) {
|
||||
|
@ -215,8 +217,8 @@ VKGraphicsPipeline* VKPipelineCache::GetGraphicsPipeline(
|
|||
LOG_INFO(Render_Vulkan, "Compile 0x{:016X}", key.Hash());
|
||||
const auto [program, bindings] = DecompileShaders(key.fixed_state);
|
||||
async_shaders.QueueVulkanShader(this, device, scheduler, descriptor_pool,
|
||||
update_descriptor_queue, renderpass_cache, bindings,
|
||||
program, key);
|
||||
update_descriptor_queue, bindings, program, key,
|
||||
num_color_buffers);
|
||||
}
|
||||
last_graphics_pipeline = pair->second.get();
|
||||
return last_graphics_pipeline;
|
||||
|
@ -229,8 +231,8 @@ VKGraphicsPipeline* VKPipelineCache::GetGraphicsPipeline(
|
|||
LOG_INFO(Render_Vulkan, "Compile 0x{:016X}", key.Hash());
|
||||
const auto [program, bindings] = DecompileShaders(key.fixed_state);
|
||||
entry = std::make_unique<VKGraphicsPipeline>(device, scheduler, descriptor_pool,
|
||||
update_descriptor_queue, renderpass_cache, key,
|
||||
bindings, program);
|
||||
update_descriptor_queue, key, bindings,
|
||||
program, num_color_buffers);
|
||||
gpu.ShaderNotify().MarkShaderComplete();
|
||||
}
|
||||
last_graphics_pipeline = entry.get();
|
||||
|
|
|
@ -19,7 +19,6 @@
|
|||
#include "video_core/engines/maxwell_3d.h"
|
||||
#include "video_core/renderer_vulkan/fixed_pipeline_state.h"
|
||||
#include "video_core/renderer_vulkan/vk_graphics_pipeline.h"
|
||||
#include "video_core/renderer_vulkan/vk_renderpass_cache.h"
|
||||
#include "video_core/renderer_vulkan/vk_shader_decompiler.h"
|
||||
#include "video_core/renderer_vulkan/wrapper.h"
|
||||
#include "video_core/shader/async_shaders.h"
|
||||
|
@ -119,18 +118,18 @@ private:
|
|||
|
||||
class VKPipelineCache final : public VideoCommon::ShaderCache<Shader> {
|
||||
public:
|
||||
explicit VKPipelineCache(RasterizerVulkan& rasterizer_, Tegra::GPU& gpu_,
|
||||
Tegra::Engines::Maxwell3D& maxwell3d_,
|
||||
Tegra::Engines::KeplerCompute& kepler_compute_,
|
||||
Tegra::MemoryManager& gpu_memory_, const VKDevice& device_,
|
||||
VKScheduler& scheduler_, VKDescriptorPool& descriptor_pool_,
|
||||
VKUpdateDescriptorQueue& update_descriptor_queue_,
|
||||
VKRenderPassCache& renderpass_cache_);
|
||||
explicit VKPipelineCache(RasterizerVulkan& rasterizer, Tegra::GPU& gpu,
|
||||
Tegra::Engines::Maxwell3D& maxwell3d,
|
||||
Tegra::Engines::KeplerCompute& kepler_compute,
|
||||
Tegra::MemoryManager& gpu_memory, const VKDevice& device,
|
||||
VKScheduler& scheduler, VKDescriptorPool& descriptor_pool,
|
||||
VKUpdateDescriptorQueue& update_descriptor_queue);
|
||||
~VKPipelineCache() override;
|
||||
|
||||
std::array<Shader*, Maxwell::MaxShaderProgram> GetShaders();
|
||||
|
||||
VKGraphicsPipeline* GetGraphicsPipeline(const GraphicsPipelineCacheKey& key,
|
||||
u32 num_color_buffers,
|
||||
VideoCommon::Shader::AsyncShaders& async_shaders);
|
||||
|
||||
VKComputePipeline& GetComputePipeline(const ComputePipelineCacheKey& key);
|
||||
|
@ -153,7 +152,6 @@ private:
|
|||
VKScheduler& scheduler;
|
||||
VKDescriptorPool& descriptor_pool;
|
||||
VKUpdateDescriptorQueue& update_descriptor_queue;
|
||||
VKRenderPassCache& renderpass_cache;
|
||||
|
||||
std::unique_ptr<Shader> null_shader;
|
||||
std::unique_ptr<Shader> null_kernel;
|
||||
|
|
File diff suppressed because it is too large
Load diff
|
@ -11,11 +11,11 @@
|
|||
#include <vector>
|
||||
|
||||
#include <boost/container/static_vector.hpp>
|
||||
#include <boost/functional/hash.hpp>
|
||||
|
||||
#include "common/common_types.h"
|
||||
#include "video_core/rasterizer_accelerated.h"
|
||||
#include "video_core/rasterizer_interface.h"
|
||||
#include "video_core/renderer_vulkan/blit_image.h"
|
||||
#include "video_core/renderer_vulkan/fixed_pipeline_state.h"
|
||||
#include "video_core/renderer_vulkan/vk_buffer_cache.h"
|
||||
#include "video_core/renderer_vulkan/vk_compute_pass.h"
|
||||
|
@ -24,10 +24,9 @@
|
|||
#include "video_core/renderer_vulkan/vk_memory_manager.h"
|
||||
#include "video_core/renderer_vulkan/vk_pipeline_cache.h"
|
||||
#include "video_core/renderer_vulkan/vk_query_cache.h"
|
||||
#include "video_core/renderer_vulkan/vk_renderpass_cache.h"
|
||||
#include "video_core/renderer_vulkan/vk_sampler_cache.h"
|
||||
#include "video_core/renderer_vulkan/vk_scheduler.h"
|
||||
#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h"
|
||||
#include "video_core/renderer_vulkan/vk_stream_buffer.h"
|
||||
#include "video_core/renderer_vulkan/vk_texture_cache.h"
|
||||
#include "video_core/renderer_vulkan/vk_update_descriptor.h"
|
||||
#include "video_core/renderer_vulkan/wrapper.h"
|
||||
|
@ -49,60 +48,9 @@ namespace Vulkan {
|
|||
|
||||
struct VKScreenInfo;
|
||||
|
||||
using ImageViewsPack = boost::container::static_vector<VkImageView, Maxwell::NumRenderTargets + 1>;
|
||||
|
||||
struct FramebufferCacheKey {
|
||||
VkRenderPass renderpass{};
|
||||
u32 width = 0;
|
||||
u32 height = 0;
|
||||
u32 layers = 0;
|
||||
ImageViewsPack views;
|
||||
|
||||
std::size_t Hash() const noexcept {
|
||||
std::size_t hash = 0;
|
||||
boost::hash_combine(hash, static_cast<VkRenderPass>(renderpass));
|
||||
for (const auto& view : views) {
|
||||
boost::hash_combine(hash, static_cast<VkImageView>(view));
|
||||
}
|
||||
boost::hash_combine(hash, width);
|
||||
boost::hash_combine(hash, height);
|
||||
boost::hash_combine(hash, layers);
|
||||
return hash;
|
||||
}
|
||||
|
||||
bool operator==(const FramebufferCacheKey& rhs) const noexcept {
|
||||
return std::tie(renderpass, views, width, height, layers) ==
|
||||
std::tie(rhs.renderpass, rhs.views, rhs.width, rhs.height, rhs.layers);
|
||||
}
|
||||
|
||||
bool operator!=(const FramebufferCacheKey& rhs) const noexcept {
|
||||
return !operator==(rhs);
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace Vulkan
|
||||
|
||||
namespace std {
|
||||
|
||||
template <>
|
||||
struct hash<Vulkan::FramebufferCacheKey> {
|
||||
std::size_t operator()(const Vulkan::FramebufferCacheKey& k) const noexcept {
|
||||
return k.Hash();
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace std
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
class StateTracker;
|
||||
class BufferBindings;
|
||||
|
||||
struct ImageView {
|
||||
View view;
|
||||
VkImageLayout* layout = nullptr;
|
||||
};
|
||||
|
||||
class RasterizerVulkan final : public VideoCore::RasterizerAccelerated {
|
||||
public:
|
||||
explicit RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_,
|
||||
|
@ -123,15 +71,18 @@ public:
|
|||
void InvalidateRegion(VAddr addr, u64 size) override;
|
||||
void OnCPUWrite(VAddr addr, u64 size) override;
|
||||
void SyncGuestHost() override;
|
||||
void UnmapMemory(VAddr addr, u64 size) override;
|
||||
void SignalSemaphore(GPUVAddr addr, u32 value) override;
|
||||
void SignalSyncPoint(u32 value) override;
|
||||
void ReleaseFences() override;
|
||||
void FlushAndInvalidateRegion(VAddr addr, u64 size) override;
|
||||
void WaitForIdle() override;
|
||||
void FragmentBarrier() override;
|
||||
void TiledCacheBarrier() override;
|
||||
void FlushCommands() override;
|
||||
void TickFrame() override;
|
||||
bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src,
|
||||
const Tegra::Engines::Fermi2D::Regs::Surface& dst,
|
||||
bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Surface& src,
|
||||
const Tegra::Engines::Fermi2D::Surface& dst,
|
||||
const Tegra::Engines::Fermi2D::Config& copy_config) override;
|
||||
bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr,
|
||||
u32 pixel_stride) override;
|
||||
|
@ -145,11 +96,17 @@ public:
|
|||
}
|
||||
|
||||
/// Maximum supported size that a constbuffer can have in bytes.
|
||||
static constexpr std::size_t MaxConstbufferSize = 0x10000;
|
||||
static constexpr size_t MaxConstbufferSize = 0x10000;
|
||||
static_assert(MaxConstbufferSize % (4 * sizeof(float)) == 0,
|
||||
"The maximum size of a constbuffer must be a multiple of the size of GLvec4");
|
||||
|
||||
private:
|
||||
static constexpr size_t MAX_TEXTURES = 192;
|
||||
static constexpr size_t MAX_IMAGES = 48;
|
||||
static constexpr size_t MAX_IMAGE_VIEWS = MAX_TEXTURES + MAX_IMAGES;
|
||||
|
||||
static constexpr VkDeviceSize DEFAULT_BUFFER_SIZE = 4 * sizeof(float);
|
||||
|
||||
struct DrawParameters {
|
||||
void Draw(vk::CommandBuffer cmdbuf) const;
|
||||
|
||||
|
@ -160,23 +117,8 @@ private:
|
|||
bool is_indexed = 0;
|
||||
};
|
||||
|
||||
using ColorAttachments = std::array<View, Maxwell::NumRenderTargets>;
|
||||
using ZetaAttachment = View;
|
||||
|
||||
using Texceptions = std::bitset<Maxwell::NumRenderTargets + 1>;
|
||||
|
||||
static constexpr std::size_t ZETA_TEXCEPTION_INDEX = 8;
|
||||
static constexpr VkDeviceSize DEFAULT_BUFFER_SIZE = 4 * sizeof(float);
|
||||
|
||||
void FlushWork();
|
||||
|
||||
/// @brief Updates the currently bound attachments
|
||||
/// @param is_clear True when the framebuffer is updated as a clear
|
||||
/// @return Bitfield of attachments being used as sampled textures
|
||||
Texceptions UpdateAttachments(bool is_clear);
|
||||
|
||||
std::tuple<VkFramebuffer, VkExtent2D> ConfigureFramebuffers(VkRenderPass renderpass);
|
||||
|
||||
/// Setups geometry buffers and state.
|
||||
DrawParameters SetupGeometry(FixedPipelineState& fixed_state, BufferBindings& buffer_bindings,
|
||||
bool is_indexed, bool is_instanced);
|
||||
|
@ -184,17 +126,12 @@ private:
|
|||
/// Setup descriptors in the graphics pipeline.
|
||||
void SetupShaderDescriptors(const std::array<Shader*, Maxwell::MaxShaderProgram>& shaders);
|
||||
|
||||
void SetupImageTransitions(Texceptions texceptions, const ColorAttachments& color,
|
||||
const ZetaAttachment& zeta);
|
||||
|
||||
void UpdateDynamicStates();
|
||||
|
||||
void BeginTransformFeedback();
|
||||
|
||||
void EndTransformFeedback();
|
||||
|
||||
bool WalkAttachmentOverlaps(const CachedSurfaceView& attachment);
|
||||
|
||||
void SetupVertexArrays(BufferBindings& buffer_bindings);
|
||||
|
||||
void SetupIndexBuffer(BufferBindings& buffer_bindings, DrawParameters& params, bool is_indexed);
|
||||
|
@ -240,14 +177,6 @@ private:
|
|||
|
||||
void SetupGlobalBuffer(const GlobalBufferEntry& entry, GPUVAddr address);
|
||||
|
||||
void SetupUniformTexels(const Tegra::Texture::TICEntry& image, const UniformTexelEntry& entry);
|
||||
|
||||
void SetupTexture(const Tegra::Texture::FullTextureInfo& texture, const SamplerEntry& entry);
|
||||
|
||||
void SetupStorageTexel(const Tegra::Texture::TICEntry& tic, const StorageTexelEntry& entry);
|
||||
|
||||
void SetupImage(const Tegra::Texture::TICEntry& tic, const ImageEntry& entry);
|
||||
|
||||
void UpdateViewportsState(Tegra::Engines::Maxwell3D::Regs& regs);
|
||||
void UpdateScissorsState(Tegra::Engines::Maxwell3D::Regs& regs);
|
||||
void UpdateDepthBias(Tegra::Engines::Maxwell3D::Regs& regs);
|
||||
|
@ -264,18 +193,16 @@ private:
|
|||
void UpdateStencilOp(Tegra::Engines::Maxwell3D::Regs& regs);
|
||||
void UpdateStencilTestEnable(Tegra::Engines::Maxwell3D::Regs& regs);
|
||||
|
||||
std::size_t CalculateGraphicsStreamBufferSize(bool is_indexed) const;
|
||||
size_t CalculateGraphicsStreamBufferSize(bool is_indexed) const;
|
||||
|
||||
std::size_t CalculateComputeStreamBufferSize() const;
|
||||
size_t CalculateComputeStreamBufferSize() const;
|
||||
|
||||
std::size_t CalculateVertexArraysSize() const;
|
||||
size_t CalculateVertexArraysSize() const;
|
||||
|
||||
std::size_t CalculateIndexBufferSize() const;
|
||||
size_t CalculateIndexBufferSize() const;
|
||||
|
||||
std::size_t CalculateConstBufferSize(const ConstBufferEntry& entry,
|
||||
const Tegra::Engines::ConstBufferInfo& buffer) const;
|
||||
|
||||
RenderPassParams GetRenderPassParams(Texceptions texceptions) const;
|
||||
size_t CalculateConstBufferSize(const ConstBufferEntry& entry,
|
||||
const Tegra::Engines::ConstBufferInfo& buffer) const;
|
||||
|
||||
VkBuffer DefaultBuffer();
|
||||
|
||||
|
@ -290,18 +217,19 @@ private:
|
|||
StateTracker& state_tracker;
|
||||
VKScheduler& scheduler;
|
||||
|
||||
VKStreamBuffer stream_buffer;
|
||||
VKStagingBufferPool staging_pool;
|
||||
VKDescriptorPool descriptor_pool;
|
||||
VKUpdateDescriptorQueue update_descriptor_queue;
|
||||
VKRenderPassCache renderpass_cache;
|
||||
BlitImageHelper blit_image;
|
||||
QuadArrayPass quad_array_pass;
|
||||
QuadIndexedPass quad_indexed_pass;
|
||||
Uint8Pass uint8_pass;
|
||||
|
||||
VKTextureCache texture_cache;
|
||||
TextureCacheRuntime texture_cache_runtime;
|
||||
TextureCache texture_cache;
|
||||
VKPipelineCache pipeline_cache;
|
||||
VKBufferCache buffer_cache;
|
||||
VKSamplerCache sampler_cache;
|
||||
VKQueryCache query_cache;
|
||||
VKFenceManager fence_manager;
|
||||
|
||||
|
@ -310,16 +238,11 @@ private:
|
|||
vk::Event wfi_event;
|
||||
VideoCommon::Shader::AsyncShaders async_shaders;
|
||||
|
||||
ColorAttachments color_attachments;
|
||||
ZetaAttachment zeta_attachment;
|
||||
|
||||
std::vector<ImageView> sampled_views;
|
||||
std::vector<ImageView> image_views;
|
||||
boost::container::static_vector<u32, MAX_IMAGE_VIEWS> image_view_indices;
|
||||
std::array<VideoCommon::ImageViewId, MAX_IMAGE_VIEWS> image_view_ids;
|
||||
boost::container::static_vector<VkSampler, MAX_TEXTURES> sampler_handles;
|
||||
|
||||
u32 draw_counter = 0;
|
||||
|
||||
// TODO(Rodrigo): Invalidate on image destruction
|
||||
std::unordered_map<FramebufferCacheKey, vk::Framebuffer> framebuffer_cache;
|
||||
};
|
||||
|
||||
} // namespace Vulkan
|
||||
|
|
|
@ -1,158 +0,0 @@
|
|||
// Copyright 2019 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include <cstring>
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
|
||||
#include "common/cityhash.h"
|
||||
#include "video_core/engines/maxwell_3d.h"
|
||||
#include "video_core/renderer_vulkan/maxwell_to_vk.h"
|
||||
#include "video_core/renderer_vulkan/vk_device.h"
|
||||
#include "video_core/renderer_vulkan/vk_renderpass_cache.h"
|
||||
#include "video_core/renderer_vulkan/wrapper.h"
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
std::size_t RenderPassParams::Hash() const noexcept {
|
||||
const u64 hash = Common::CityHash64(reinterpret_cast<const char*>(this), sizeof *this);
|
||||
return static_cast<std::size_t>(hash);
|
||||
}
|
||||
|
||||
bool RenderPassParams::operator==(const RenderPassParams& rhs) const noexcept {
|
||||
return std::memcmp(&rhs, this, sizeof *this) == 0;
|
||||
}
|
||||
|
||||
VKRenderPassCache::VKRenderPassCache(const VKDevice& device_) : device{device_} {}
|
||||
|
||||
VKRenderPassCache::~VKRenderPassCache() = default;
|
||||
|
||||
VkRenderPass VKRenderPassCache::GetRenderPass(const RenderPassParams& params) {
|
||||
const auto [pair, is_cache_miss] = cache.try_emplace(params);
|
||||
auto& entry = pair->second;
|
||||
if (is_cache_miss) {
|
||||
entry = CreateRenderPass(params);
|
||||
}
|
||||
return *entry;
|
||||
}
|
||||
|
||||
vk::RenderPass VKRenderPassCache::CreateRenderPass(const RenderPassParams& params) const {
|
||||
using namespace VideoCore::Surface;
|
||||
const std::size_t num_attachments = static_cast<std::size_t>(params.num_color_attachments);
|
||||
|
||||
std::vector<VkAttachmentDescription> descriptors;
|
||||
descriptors.reserve(num_attachments);
|
||||
|
||||
std::vector<VkAttachmentReference> color_references;
|
||||
color_references.reserve(num_attachments);
|
||||
|
||||
for (std::size_t rt = 0; rt < num_attachments; ++rt) {
|
||||
const auto guest_format = static_cast<Tegra::RenderTargetFormat>(params.color_formats[rt]);
|
||||
const PixelFormat pixel_format = PixelFormatFromRenderTargetFormat(guest_format);
|
||||
const auto format = MaxwellToVK::SurfaceFormat(device, FormatType::Optimal, pixel_format);
|
||||
ASSERT_MSG(format.attachable, "Trying to attach a non-attachable format with format={}",
|
||||
static_cast<int>(pixel_format));
|
||||
|
||||
// TODO(Rodrigo): Add MAY_ALIAS_BIT when it's needed.
|
||||
const VkImageLayout color_layout = ((params.texceptions >> rt) & 1) != 0
|
||||
? VK_IMAGE_LAYOUT_GENERAL
|
||||
: VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
|
||||
descriptors.push_back({
|
||||
.flags = VK_ATTACHMENT_DESCRIPTION_MAY_ALIAS_BIT,
|
||||
.format = format.format,
|
||||
.samples = VK_SAMPLE_COUNT_1_BIT,
|
||||
.loadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
|
||||
.storeOp = VK_ATTACHMENT_STORE_OP_STORE,
|
||||
.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE,
|
||||
.stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE,
|
||||
.initialLayout = color_layout,
|
||||
.finalLayout = color_layout,
|
||||
});
|
||||
|
||||
color_references.push_back({
|
||||
.attachment = static_cast<u32>(rt),
|
||||
.layout = color_layout,
|
||||
});
|
||||
}
|
||||
|
||||
VkAttachmentReference zeta_attachment_ref;
|
||||
const bool has_zeta = params.zeta_format != 0;
|
||||
if (has_zeta) {
|
||||
const auto guest_format = static_cast<Tegra::DepthFormat>(params.zeta_format);
|
||||
const PixelFormat pixel_format = PixelFormatFromDepthFormat(guest_format);
|
||||
const auto format = MaxwellToVK::SurfaceFormat(device, FormatType::Optimal, pixel_format);
|
||||
ASSERT_MSG(format.attachable, "Trying to attach a non-attachable format with format={}",
|
||||
static_cast<int>(pixel_format));
|
||||
|
||||
const VkImageLayout zeta_layout = params.zeta_texception != 0
|
||||
? VK_IMAGE_LAYOUT_GENERAL
|
||||
: VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL;
|
||||
descriptors.push_back({
|
||||
.flags = 0,
|
||||
.format = format.format,
|
||||
.samples = VK_SAMPLE_COUNT_1_BIT,
|
||||
.loadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
|
||||
.storeOp = VK_ATTACHMENT_STORE_OP_STORE,
|
||||
.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
|
||||
.stencilStoreOp = VK_ATTACHMENT_STORE_OP_STORE,
|
||||
.initialLayout = zeta_layout,
|
||||
.finalLayout = zeta_layout,
|
||||
});
|
||||
|
||||
zeta_attachment_ref = {
|
||||
.attachment = static_cast<u32>(num_attachments),
|
||||
.layout = zeta_layout,
|
||||
};
|
||||
}
|
||||
|
||||
const VkSubpassDescription subpass_description{
|
||||
.flags = 0,
|
||||
.pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
|
||||
.inputAttachmentCount = 0,
|
||||
.pInputAttachments = nullptr,
|
||||
.colorAttachmentCount = static_cast<u32>(color_references.size()),
|
||||
.pColorAttachments = color_references.data(),
|
||||
.pResolveAttachments = nullptr,
|
||||
.pDepthStencilAttachment = has_zeta ? &zeta_attachment_ref : nullptr,
|
||||
.preserveAttachmentCount = 0,
|
||||
.pPreserveAttachments = nullptr,
|
||||
};
|
||||
|
||||
VkAccessFlags access = 0;
|
||||
VkPipelineStageFlags stage = 0;
|
||||
if (!color_references.empty()) {
|
||||
access |= VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
|
||||
stage |= VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
|
||||
}
|
||||
|
||||
if (has_zeta) {
|
||||
access |= VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT |
|
||||
VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT;
|
||||
stage |= VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT;
|
||||
}
|
||||
|
||||
const VkSubpassDependency subpass_dependency{
|
||||
.srcSubpass = VK_SUBPASS_EXTERNAL,
|
||||
.dstSubpass = 0,
|
||||
.srcStageMask = stage,
|
||||
.dstStageMask = stage,
|
||||
.srcAccessMask = 0,
|
||||
.dstAccessMask = access,
|
||||
.dependencyFlags = 0,
|
||||
};
|
||||
|
||||
return device.GetLogical().CreateRenderPass({
|
||||
.sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO,
|
||||
.pNext = nullptr,
|
||||
.flags = 0,
|
||||
.attachmentCount = static_cast<u32>(descriptors.size()),
|
||||
.pAttachments = descriptors.data(),
|
||||
.subpassCount = 1,
|
||||
.pSubpasses = &subpass_description,
|
||||
.dependencyCount = 1,
|
||||
.pDependencies = &subpass_dependency,
|
||||
});
|
||||
}
|
||||
|
||||
} // namespace Vulkan
|
|
@ -1,70 +0,0 @@
|
|||
// Copyright 2019 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <type_traits>
|
||||
#include <unordered_map>
|
||||
|
||||
#include <boost/container/static_vector.hpp>
|
||||
#include <boost/functional/hash.hpp>
|
||||
|
||||
#include "video_core/engines/maxwell_3d.h"
|
||||
#include "video_core/renderer_vulkan/wrapper.h"
|
||||
#include "video_core/surface.h"
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
class VKDevice;
|
||||
|
||||
struct RenderPassParams {
|
||||
std::array<u8, Tegra::Engines::Maxwell3D::Regs::NumRenderTargets> color_formats;
|
||||
u8 num_color_attachments;
|
||||
u8 texceptions;
|
||||
|
||||
u8 zeta_format;
|
||||
u8 zeta_texception;
|
||||
|
||||
std::size_t Hash() const noexcept;
|
||||
|
||||
bool operator==(const RenderPassParams& rhs) const noexcept;
|
||||
|
||||
bool operator!=(const RenderPassParams& rhs) const noexcept {
|
||||
return !operator==(rhs);
|
||||
}
|
||||
};
|
||||
static_assert(std::has_unique_object_representations_v<RenderPassParams>);
|
||||
static_assert(std::is_trivially_copyable_v<RenderPassParams>);
|
||||
static_assert(std::is_trivially_constructible_v<RenderPassParams>);
|
||||
|
||||
} // namespace Vulkan
|
||||
|
||||
namespace std {
|
||||
|
||||
template <>
|
||||
struct hash<Vulkan::RenderPassParams> {
|
||||
std::size_t operator()(const Vulkan::RenderPassParams& k) const noexcept {
|
||||
return k.Hash();
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace std
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
class VKRenderPassCache final {
|
||||
public:
|
||||
explicit VKRenderPassCache(const VKDevice& device_);
|
||||
~VKRenderPassCache();
|
||||
|
||||
VkRenderPass GetRenderPass(const RenderPassParams& params);
|
||||
|
||||
private:
|
||||
vk::RenderPass CreateRenderPass(const RenderPassParams& params) const;
|
||||
|
||||
const VKDevice& device;
|
||||
std::unordered_map<RenderPassParams, vk::RenderPass> cache;
|
||||
};
|
||||
|
||||
} // namespace Vulkan
|
|
@ -1,83 +0,0 @@
|
|||
// Copyright 2019 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include <unordered_map>
|
||||
|
||||
#include "video_core/renderer_vulkan/maxwell_to_vk.h"
|
||||
#include "video_core/renderer_vulkan/vk_sampler_cache.h"
|
||||
#include "video_core/renderer_vulkan/wrapper.h"
|
||||
#include "video_core/textures/texture.h"
|
||||
|
||||
using Tegra::Texture::TextureMipmapFilter;
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
namespace {
|
||||
|
||||
VkBorderColor ConvertBorderColor(std::array<float, 4> color) {
|
||||
// TODO(Rodrigo): Manage integer border colors
|
||||
if (color == std::array<float, 4>{0, 0, 0, 0}) {
|
||||
return VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK;
|
||||
} else if (color == std::array<float, 4>{0, 0, 0, 1}) {
|
||||
return VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK;
|
||||
} else if (color == std::array<float, 4>{1, 1, 1, 1}) {
|
||||
return VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE;
|
||||
}
|
||||
if (color[0] + color[1] + color[2] > 1.35f) {
|
||||
// If color elements are brighter than roughly 0.5 average, use white border
|
||||
return VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE;
|
||||
} else if (color[3] > 0.5f) {
|
||||
return VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK;
|
||||
} else {
|
||||
return VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK;
|
||||
}
|
||||
}
|
||||
|
||||
} // Anonymous namespace
|
||||
|
||||
VKSamplerCache::VKSamplerCache(const VKDevice& device_) : device{device_} {}
|
||||
|
||||
VKSamplerCache::~VKSamplerCache() = default;
|
||||
|
||||
vk::Sampler VKSamplerCache::CreateSampler(const Tegra::Texture::TSCEntry& tsc) const {
|
||||
const bool arbitrary_borders = device.IsExtCustomBorderColorSupported();
|
||||
const std::array color = tsc.GetBorderColor();
|
||||
|
||||
VkSamplerCustomBorderColorCreateInfoEXT border{
|
||||
.sType = VK_STRUCTURE_TYPE_SAMPLER_CUSTOM_BORDER_COLOR_CREATE_INFO_EXT,
|
||||
.pNext = nullptr,
|
||||
.customBorderColor = {},
|
||||
.format = VK_FORMAT_UNDEFINED,
|
||||
};
|
||||
std::memcpy(&border.customBorderColor, color.data(), sizeof(color));
|
||||
|
||||
return device.GetLogical().CreateSampler({
|
||||
.sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO,
|
||||
.pNext = arbitrary_borders ? &border : nullptr,
|
||||
.flags = 0,
|
||||
.magFilter = MaxwellToVK::Sampler::Filter(tsc.mag_filter),
|
||||
.minFilter = MaxwellToVK::Sampler::Filter(tsc.min_filter),
|
||||
.mipmapMode = MaxwellToVK::Sampler::MipmapMode(tsc.mipmap_filter),
|
||||
.addressModeU = MaxwellToVK::Sampler::WrapMode(device, tsc.wrap_u, tsc.mag_filter),
|
||||
.addressModeV = MaxwellToVK::Sampler::WrapMode(device, tsc.wrap_v, tsc.mag_filter),
|
||||
.addressModeW = MaxwellToVK::Sampler::WrapMode(device, tsc.wrap_p, tsc.mag_filter),
|
||||
.mipLodBias = tsc.GetLodBias(),
|
||||
.anisotropyEnable =
|
||||
static_cast<VkBool32>(tsc.GetMaxAnisotropy() > 1.0f ? VK_TRUE : VK_FALSE),
|
||||
.maxAnisotropy = tsc.GetMaxAnisotropy(),
|
||||
.compareEnable = tsc.depth_compare_enabled,
|
||||
.compareOp = MaxwellToVK::Sampler::DepthCompareFunction(tsc.depth_compare_func),
|
||||
.minLod = tsc.mipmap_filter == TextureMipmapFilter::None ? 0.0f : tsc.GetMinLod(),
|
||||
.maxLod = tsc.mipmap_filter == TextureMipmapFilter::None ? 0.25f : tsc.GetMaxLod(),
|
||||
.borderColor =
|
||||
arbitrary_borders ? VK_BORDER_COLOR_INT_CUSTOM_EXT : ConvertBorderColor(color),
|
||||
.unnormalizedCoordinates = VK_FALSE,
|
||||
});
|
||||
}
|
||||
|
||||
VkSampler VKSamplerCache::ToSamplerType(const vk::Sampler& sampler) const {
|
||||
return *sampler;
|
||||
}
|
||||
|
||||
} // namespace Vulkan
|
|
@ -1,29 +0,0 @@
|
|||
// Copyright 2019 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "video_core/renderer_vulkan/wrapper.h"
|
||||
#include "video_core/sampler_cache.h"
|
||||
#include "video_core/textures/texture.h"
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
class VKDevice;
|
||||
|
||||
class VKSamplerCache final : public VideoCommon::SamplerCache<VkSampler, vk::Sampler> {
|
||||
public:
|
||||
explicit VKSamplerCache(const VKDevice& device_);
|
||||
~VKSamplerCache();
|
||||
|
||||
protected:
|
||||
vk::Sampler CreateSampler(const Tegra::Texture::TSCEntry& tsc) const override;
|
||||
|
||||
VkSampler ToSamplerType(const vk::Sampler& sampler) const override;
|
||||
|
||||
private:
|
||||
const VKDevice& device;
|
||||
};
|
||||
|
||||
} // namespace Vulkan
|
Some files were not shown because too many files have changed in this diff Show more
Loading…
Reference in a new issue