early-access version 1879

This commit is contained in:
pineappleEA 2021-07-15 02:45:51 +02:00
parent 87928f294a
commit c230feea19
14 changed files with 192 additions and 123 deletions

View file

@ -1,7 +1,7 @@
yuzu emulator early access
=============
This is the source code for early-access 1878.
This is the source code for early-access 1879.
## Legal Notice

View file

@ -257,6 +257,32 @@ void SetupOutPerVertex(EmitContext& ctx, std::string& header) {
}
}
void SetupInPerVertex(EmitContext& ctx, std::string& header) {
// Currently only required for TessellationControl to adhere to
// ARB_separate_shader_objects requirements
if (ctx.stage != Stage::TessellationControl) {
return;
}
const bool loads_position{ctx.info.loads.AnyComponent(IR::Attribute::PositionX)};
const bool loads_point_size{ctx.info.loads[IR::Attribute::PointSize]};
const bool loads_clip_distance{ctx.info.loads.ClipDistances()};
const bool loads_per_vertex{loads_position || loads_point_size || loads_clip_distance};
if (!loads_per_vertex) {
return;
}
header += "in gl_PerVertex{";
if (loads_position) {
header += "vec4 gl_Position;";
}
if (loads_point_size) {
header += "float gl_PointSize;";
}
if (loads_clip_distance) {
header += "float gl_ClipDistance[];";
}
header += "}gl_in[gl_MaxPatchVertices];";
}
void SetupLegacyInPerFragment(EmitContext& ctx, std::string& header) {
if (!ctx.info.loads.Legacy()) {
return;
@ -334,6 +360,7 @@ EmitContext::EmitContext(IR::Program& program, Bindings& bindings, const Profile
break;
}
SetupOutPerVertex(*this, header);
SetupInPerVertex(*this, header);
SetupLegacyInPerFragment(*this, header);
for (size_t index = 0; index < IR::NUM_GENERICS; ++index) {
@ -375,6 +402,7 @@ EmitContext::EmitContext(IR::Program& program, Bindings& bindings, const Profile
}
void EmitContext::SetupExtensions() {
header += "#extension GL_ARB_separate_shader_objects : enable\n";
if (info.uses_shadow_lod && profile.support_gl_texture_shadow_lod) {
header += "#extension GL_EXT_texture_shadow_lod : enable\n";
}

View file

@ -550,10 +550,6 @@ bool BufferCache<P>::DMACopy(GPUVAddr src_address, GPUVAddr dest_address, u64 am
auto& src_buffer = slot_buffers[buffer_a];
auto& dest_buffer = slot_buffers[buffer_b];
SynchronizeBuffer(src_buffer, *cpu_src_address, static_cast<u32>(amount));
const VAddr aligned_dst = Common::AlignUp(*cpu_dest_address, 64);
const u64 align_diff = aligned_dst - *cpu_dest_address;
const u64 new_amount = align_diff > amount ? 0 : amount - align_diff;
dest_buffer.UnmarkRegionAsCpuModified(aligned_dst, Common::AlignDown(new_amount, 64));
SynchronizeBuffer(dest_buffer, *cpu_dest_address, static_cast<u32>(amount));
std::array copies{BufferCopy{
.src_offset = src_buffer.Offset(*cpu_src_address),
@ -610,10 +606,12 @@ bool BufferCache<P>::DMAClear(GPUVAddr dst_address, u64 amount, u32 value) {
buffer = FindBuffer(*cpu_dst_address, static_cast<u32>(size));
} while (has_deleted_buffers);
const VAddr aligned_dst = Common::AlignUp(*cpu_dst_address, 64);
const u64 align_diff = aligned_dst - *cpu_dst_address;
const u64 new_amount = align_diff > size ? 0 : size - align_diff;
auto& dest_buffer = slot_buffers[buffer];
const u32 offset = static_cast<u32>(*cpu_dst_address - dest_buffer.CpuAddr());
runtime.ClearBuffer(dest_buffer, offset, size, value);
dest_buffer.UnmarkRegionAsCpuModified(*cpu_dst_address, size);
return true;
}

View file

@ -4,6 +4,7 @@
#include "common/assert.h"
#include "common/logging/log.h"
#include "common/microprofile.h"
#include "common/settings.h"
#include "core/core.h"
#include "video_core/engines/maxwell_3d.h"
@ -12,6 +13,9 @@
#include "video_core/renderer_base.h"
#include "video_core/textures/decoders.h"
MICROPROFILE_DECLARE(GPU_DMAEngine);
MICROPROFILE_DEFINE(GPU_DMAEngine, "GPU", "DMA Engine", MP_RGB(224, 224, 128));
namespace Tegra::Engines {
using namespace Texture;
@ -43,6 +47,7 @@ void MaxwellDMA::CallMultiMethod(u32 method, const u32* base_start, u32 amount,
}
void MaxwellDMA::Launch() {
MICROPROFILE_SCOPE(GPU_DMAEngine);
LOG_TRACE(Render_OpenGL, "DMA copy 0x{:x} -> 0x{:x}", static_cast<GPUVAddr>(regs.offset_in),
static_cast<GPUVAddr>(regs.offset_out));

View file

@ -345,7 +345,7 @@ void MemoryManager::WriteBlock(GPUVAddr gpu_dest_addr, const void* src_buffer, s
// Invalidate must happen on the rasterizer interface, such that memory is always
// synchronous when it is written (even when in asynchronous GPU mode).
rasterizer->UnmapMemory(dest_addr, copy_amount);
rasterizer->InvalidateRegion(dest_addr, copy_amount);
system.Memory().WriteBlockUnsafe(dest_addr, src_buffer, copy_amount);
}

View file

@ -46,17 +46,13 @@ ComputePipeline::ComputePipeline(const Device& device, TextureCache& texture_cac
kepler_compute{kepler_compute_}, program_manager{program_manager_}, info{info_} {
switch (device.GetShaderBackend()) {
case Settings::ShaderBackend::GLSL:
source_program.handle = glCreateProgram();
AttachShader(GL_COMPUTE_SHADER, source_program.handle, code);
LinkProgram(source_program.handle);
source_program = CreateProgram(code, GL_COMPUTE_SHADER);
break;
case Settings::ShaderBackend::GLASM:
assembly_program = CompileProgram(code, GL_COMPUTE_PROGRAM_NV);
break;
case Settings::ShaderBackend::SPIRV:
source_program.handle = glCreateProgram();
AttachShader(GL_COMPUTE_SHADER, source_program.handle, code_v);
LinkProgram(source_program.handle);
source_program = CreateProgram(code_v, GL_COMPUTE_SHADER);
break;
}
std::copy_n(info.constant_buffer_used_sizes.begin(), uniform_buffer_sizes.size(),
@ -154,7 +150,7 @@ void ComputePipeline::Configure() {
if (assembly_program.handle != 0) {
program_manager.BindComputeAssemblyProgram(assembly_program.handle);
} else {
program_manager.BindProgram(source_program.handle);
program_manager.BindComputeProgram(source_program.handle);
}
buffer_cache.UnbindComputeTextureBuffers();
size_t texbuf_index{};

View file

@ -237,44 +237,32 @@ GraphicsPipeline::GraphicsPipeline(
if (key.xfb_enabled && device.UseAssemblyShaders()) {
GenerateTransformFeedbackState();
}
auto func{
[this, device, sources, sources_spirv, shader_notify](ShaderContext::Context*) mutable {
if (!device.UseAssemblyShaders()) {
program.handle = glCreateProgram();
}
for (size_t stage = 0; stage < 5; ++stage) {
switch (device.GetShaderBackend()) {
case Settings::ShaderBackend::GLSL: {
const auto code{sources[stage]};
if (code.empty()) {
continue;
}
AttachShader(Stage(stage), program.handle, code);
} break;
case Settings::ShaderBackend::GLASM: {
const auto code{sources[stage]};
if (code.empty()) {
continue;
}
assembly_programs[stage] = CompileProgram(code, AssemblyStage(stage));
} break;
case Settings::ShaderBackend::SPIRV: {
const auto code{sources_spirv[stage]};
if (code.empty()) {
continue;
}
AttachShader(Stage(stage), program.handle, code);
} break;
auto func{[this, device, sources, sources_spirv,
shader_notify](ShaderContext::Context*) mutable {
for (size_t stage = 0; stage < 5; ++stage) {
switch (device.GetShaderBackend()) {
case Settings::ShaderBackend::GLSL:
if (!sources[stage].empty()) {
source_programs[stage] = CreateProgram(sources[stage], Stage(stage));
}
break;
case Settings::ShaderBackend::GLASM:
if (!sources[stage].empty()) {
assembly_programs[stage] = CompileProgram(sources[stage], AssemblyStage(stage));
}
break;
case Settings::ShaderBackend::SPIRV:
if (!sources_spirv[stage].empty()) {
source_programs[stage] = CreateProgram(sources_spirv[stage], Stage(stage));
}
break;
}
if (!device.UseAssemblyShaders()) {
LinkProgram(program.handle);
}
if (shader_notify) {
shader_notify->MarkShaderComplete();
}
is_built.store(true, std::memory_order_relaxed);
}};
}
if (shader_notify) {
shader_notify->MarkShaderComplete();
}
is_built = true;
}};
if (thread_worker) {
thread_worker->QueueWork(std::move(func));
} else {
@ -449,7 +437,7 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) {
if (assembly_programs[0].handle != 0) {
program_manager.BindAssemblyPrograms(assembly_programs, enabled_stages_mask);
} else {
program_manager.BindProgram(program.handle);
program_manager.BindSourcePrograms(source_programs);
}
const ImageId* views_it{image_view_ids.data()};
GLsizei texture_binding = 0;

View file

@ -129,7 +129,7 @@ private:
void (*configure_func)(GraphicsPipeline*, bool){};
OGLProgram program;
std::array<OGLProgram, 5> source_programs;
std::array<OGLAssemblyProgram, 5> assembly_programs;
u32 enabled_stages_mask{};

View file

@ -24,34 +24,68 @@ class ProgramManager {
public:
explicit ProgramManager(const Device& device) {
glCreateProgramPipelines(1, &pipeline.handle);
if (device.UseAssemblyShaders()) {
glEnable(GL_COMPUTE_PROGRAM_NV);
}
}
void BindProgram(GLuint program) {
if (current_source_program == program) {
return;
}
current_source_program = program;
void BindComputeProgram(GLuint program) {
glUseProgram(program);
is_compute_bound = true;
}
void BindComputeAssemblyProgram(GLuint program) {
if (current_compute_assembly_program != program) {
current_compute_assembly_program = program;
if (current_assembly_compute_program != program) {
current_assembly_compute_program = program;
glBindProgramARB(GL_COMPUTE_PROGRAM_NV, program);
}
if (current_source_program != 0) {
current_source_program = 0;
glUseProgram(0);
UnbindPipeline();
}
void BindSourcePrograms(std::span<const OGLProgram, NUM_STAGES> programs) {
static constexpr std::array<GLenum, 5> stage_enums{
GL_VERTEX_SHADER_BIT, GL_TESS_CONTROL_SHADER_BIT, GL_TESS_EVALUATION_SHADER_BIT,
GL_GEOMETRY_SHADER_BIT, GL_FRAGMENT_SHADER_BIT,
};
for (size_t stage = 0; stage < NUM_STAGES; ++stage) {
if (current_programs[stage] != programs[stage].handle) {
current_programs[stage] = programs[stage].handle;
glUseProgramStages(pipeline.handle, stage_enums[stage], programs[stage].handle);
}
}
BindPipeline();
}
void BindPresentPrograms(GLuint vertex, GLuint fragment) {
if (current_programs[0] != vertex) {
current_programs[0] = vertex;
glUseProgramStages(pipeline.handle, GL_VERTEX_SHADER_BIT, vertex);
}
if (current_programs[4] != fragment) {
current_programs[4] = fragment;
glUseProgramStages(pipeline.handle, GL_FRAGMENT_SHADER_BIT, fragment);
}
glUseProgramStages(
pipeline.handle,
GL_TESS_CONTROL_SHADER_BIT | GL_TESS_EVALUATION_SHADER_BIT | GL_GEOMETRY_SHADER_BIT, 0);
current_programs[1] = 0;
current_programs[2] = 0;
current_programs[3] = 0;
if (current_stage_mask != 0) {
current_stage_mask = 0;
for (const GLenum program_type : ASSEMBLY_PROGRAM_ENUMS) {
glDisable(program_type);
}
}
BindPipeline();
}
void BindAssemblyPrograms(std::span<const OGLAssemblyProgram, NUM_STAGES> programs,
u32 stage_mask) {
const u32 changed_mask = current_assembly_mask ^ stage_mask;
current_assembly_mask = stage_mask;
const u32 changed_mask = current_stage_mask ^ stage_mask;
current_stage_mask = stage_mask;
if (changed_mask != 0) {
for (size_t stage = 0; stage < NUM_STAGES; ++stage) {
@ -65,25 +99,47 @@ public:
}
}
for (size_t stage = 0; stage < NUM_STAGES; ++stage) {
if (current_assembly_programs[stage] != programs[stage].handle) {
current_assembly_programs[stage] = programs[stage].handle;
if (current_programs[stage] != programs[stage].handle) {
current_programs[stage] = programs[stage].handle;
glBindProgramARB(ASSEMBLY_PROGRAM_ENUMS[stage], programs[stage].handle);
}
}
if (current_source_program != 0) {
current_source_program = 0;
glUseProgram(0);
}
UnbindPipeline();
}
void RestoreGuestCompute() {}
private:
GLuint current_source_program = 0;
void BindPipeline() {
if (!is_pipeline_bound) {
is_pipeline_bound = true;
glBindProgramPipeline(pipeline.handle);
}
UnbindCompute();
}
u32 current_assembly_mask = 0;
std::array<GLuint, NUM_STAGES> current_assembly_programs{};
GLuint current_compute_assembly_program = 0;
void UnbindPipeline() {
if (is_pipeline_bound) {
is_pipeline_bound = false;
glBindProgramPipeline(0);
}
UnbindCompute();
}
void UnbindCompute() {
if (is_compute_bound) {
is_compute_bound = false;
glUseProgram(0);
}
}
OGLPipeline pipeline;
bool is_pipeline_bound{};
bool is_compute_bound{};
u32 current_stage_mask = 0;
std::array<GLuint, NUM_STAGES> current_programs{};
GLuint current_assembly_compute_program = 0;
};
} // namespace OpenGL

View file

@ -13,6 +13,33 @@
namespace OpenGL {
static OGLProgram LinkSeparableProgram(GLuint shader) {
OGLProgram program;
program.handle = glCreateProgram();
glProgramParameteri(program.handle, GL_PROGRAM_SEPARABLE, GL_TRUE);
glAttachShader(program.handle, shader);
glLinkProgram(program.handle);
if (!Settings::values.renderer_debug) {
return program;
}
GLint link_status{};
glGetProgramiv(program.handle, GL_LINK_STATUS, &link_status);
GLint log_length{};
glGetProgramiv(program.handle, GL_INFO_LOG_LENGTH, &log_length);
if (log_length == 0) {
return program;
}
std::string log(log_length, 0);
glGetProgramInfoLog(program.handle, log_length, nullptr, log.data());
if (link_status == GL_FALSE) {
LOG_ERROR(Render_OpenGL, "{}", log);
} else {
LOG_WARNING(Render_OpenGL, "{}", log);
}
return program;
}
static void LogShader(GLuint shader, std::string_view code = {}) {
GLint shader_status{};
glGetShaderiv(shader, GL_COMPILE_STATUS, &shader_status);
@ -36,7 +63,7 @@ static void LogShader(GLuint shader, std::string_view code = {}) {
}
}
void AttachShader(GLenum stage, GLuint program, std::string_view code) {
OGLProgram CreateProgram(std::string_view code, GLenum stage) {
OGLShader shader;
shader.handle = glCreateShader(stage);
@ -44,45 +71,23 @@ void AttachShader(GLenum stage, GLuint program, std::string_view code) {
const GLchar* const code_ptr = code.data();
glShaderSource(shader.handle, 1, &code_ptr, &length);
glCompileShader(shader.handle);
glAttachShader(program, shader.handle);
if (Settings::values.renderer_debug) {
LogShader(shader.handle, code);
}
return LinkSeparableProgram(shader.handle);
}
void AttachShader(GLenum stage, GLuint program, std::span<const u32> code) {
OGLProgram CreateProgram(std::span<const u32> code, GLenum stage) {
OGLShader shader;
shader.handle = glCreateShader(stage);
glShaderBinary(1, &shader.handle, GL_SHADER_BINARY_FORMAT_SPIR_V_ARB, code.data(),
static_cast<GLsizei>(code.size_bytes()));
glSpecializeShader(shader.handle, "main", 0, nullptr, nullptr);
glAttachShader(program, shader.handle);
if (Settings::values.renderer_debug) {
LogShader(shader.handle);
}
}
void LinkProgram(GLuint program) {
glLinkProgram(program);
if (!Settings::values.renderer_debug) {
return;
}
GLint link_status{};
glGetProgramiv(program, GL_LINK_STATUS, &link_status);
GLint log_length{};
glGetProgramiv(program, GL_INFO_LOG_LENGTH, &log_length);
if (log_length == 0) {
return;
}
std::string log(log_length, 0);
glGetProgramInfoLog(program, log_length, nullptr, log.data());
if (link_status == GL_FALSE) {
LOG_ERROR(Render_OpenGL, "{}", log);
} else {
LOG_WARNING(Render_OpenGL, "{}", log);
}
return LinkSeparableProgram(shader.handle);
}
OGLAssemblyProgram CompileProgram(std::string_view code, GLenum target) {

View file

@ -17,11 +17,9 @@
namespace OpenGL {
void AttachShader(GLenum stage, GLuint program, std::string_view code);
OGLProgram CreateProgram(std::string_view code, GLenum stage);
void AttachShader(GLenum stage, GLuint program, std::span<const u32> code);
void LinkProgram(GLuint program);
OGLProgram CreateProgram(std::span<const u32> code, GLenum stage);
OGLAssemblyProgram CompileProgram(std::string_view code, GLenum target);

View file

@ -254,10 +254,8 @@ void RendererOpenGL::InitOpenGLObjects() {
Settings::values.bg_blue.GetValue(), 0.0f);
// Create shader programs
present_program.handle = glCreateProgram();
AttachShader(GL_VERTEX_SHADER, present_program.handle, HostShaders::OPENGL_PRESENT_VERT);
AttachShader(GL_FRAGMENT_SHADER, present_program.handle, HostShaders::OPENGL_PRESENT_FRAG);
LinkProgram(present_program.handle);
present_vertex = CreateProgram(HostShaders::OPENGL_PRESENT_VERT, GL_VERTEX_SHADER);
present_fragment = CreateProgram(HostShaders::OPENGL_PRESENT_FRAG, GL_FRAGMENT_SHADER);
// Generate presentation sampler
present_sampler.Create();
@ -341,8 +339,9 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) {
// Set projection matrix
const std::array ortho_matrix =
MakeOrthographicMatrix(static_cast<float>(layout.width), static_cast<float>(layout.height));
program_manager.BindProgram(present_program.handle);
glUniformMatrix3x2fv(ModelViewMatrixLocation, 1, GL_FALSE, ortho_matrix.data());
program_manager.BindPresentPrograms(present_vertex.handle, present_fragment.handle);
glProgramUniformMatrix3x2fv(present_vertex.handle, ModelViewMatrixLocation, 1, GL_FALSE,
ortho_matrix.data());
const auto& texcoords = screen_info.display_texcoords;
auto left = texcoords.left;

View file

@ -110,7 +110,8 @@ private:
// OpenGL object IDs
OGLSampler present_sampler;
OGLBuffer vertex_buffer;
OGLProgram present_program;
OGLProgram present_vertex;
OGLProgram present_fragment;
OGLFramebuffer screenshot_framebuffer;
// GPU address of the vertex buffer

View file

@ -42,12 +42,7 @@ using VideoCore::Surface::BytesPerBlock;
namespace {
OGLProgram MakeProgram(std::string_view source) {
OGLProgram program;
OGLShader shader;
program.handle = glCreateProgram();
AttachShader(GL_COMPUTE_SHADER, program.handle, source);
LinkProgram(program.handle);
return program;
return CreateProgram(source, GL_COMPUTE_SHADER);
}
size_t NumPixelsInCopy(const VideoCommon::ImageCopy& copy) {
@ -84,7 +79,7 @@ void UtilShaders::ASTCDecode(Image& image, const ImageBufferMap& map,
.width = VideoCore::Surface::DefaultBlockWidth(image.info.format),
.height = VideoCore::Surface::DefaultBlockHeight(image.info.format),
};
program_manager.BindProgram(astc_decoder_program.handle);
program_manager.BindComputeProgram(astc_decoder_program.handle);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_SWIZZLE_BUFFER, swizzle_table_buffer.handle);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_ENC_BUFFER, astc_buffer.handle);
@ -132,7 +127,7 @@ void UtilShaders::BlockLinearUpload2D(Image& image, const ImageBufferMap& map,
static constexpr GLuint BINDING_INPUT_BUFFER = 1;
static constexpr GLuint BINDING_OUTPUT_IMAGE = 0;
program_manager.BindProgram(block_linear_unswizzle_2d_program.handle);
program_manager.BindComputeProgram(block_linear_unswizzle_2d_program.handle);
glFlushMappedNamedBufferRange(map.buffer, map.offset, image.guest_size_bytes);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_SWIZZLE_BUFFER, swizzle_table_buffer.handle);
@ -171,7 +166,7 @@ void UtilShaders::BlockLinearUpload3D(Image& image, const ImageBufferMap& map,
static constexpr GLuint BINDING_OUTPUT_IMAGE = 0;
glFlushMappedNamedBufferRange(map.buffer, map.offset, image.guest_size_bytes);
program_manager.BindProgram(block_linear_unswizzle_3d_program.handle);
program_manager.BindComputeProgram(block_linear_unswizzle_3d_program.handle);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_SWIZZLE_BUFFER, swizzle_table_buffer.handle);
const GLenum store_format = StoreFormat(BytesPerBlock(image.info.format));
@ -220,7 +215,7 @@ void UtilShaders::PitchUpload(Image& image, const ImageBufferMap& map,
UNIMPLEMENTED_IF_MSG(!std::has_single_bit(bytes_per_block),
"Non-power of two images are not implemented");
program_manager.BindProgram(pitch_unswizzle_program.handle);
program_manager.BindComputeProgram(pitch_unswizzle_program.handle);
glFlushMappedNamedBufferRange(map.buffer, map.offset, image.guest_size_bytes);
glUniform2ui(LOC_ORIGIN, 0, 0);
glUniform2i(LOC_DESTINATION, 0, 0);
@ -248,7 +243,7 @@ void UtilShaders::CopyBC4(Image& dst_image, Image& src_image, std::span<const Im
static constexpr GLuint LOC_SRC_OFFSET = 0;
static constexpr GLuint LOC_DST_OFFSET = 1;
program_manager.BindProgram(copy_bc4_program.handle);
program_manager.BindComputeProgram(copy_bc4_program.handle);
for (const ImageCopy& copy : copies) {
ASSERT(copy.src_subresource.base_layer == 0);
@ -284,7 +279,7 @@ void UtilShaders::CopyBGR(Image& dst_image, Image& src_image,
break;
case 4: {
// BGRA8 copy
program_manager.BindProgram(copy_bgra_program.handle);
program_manager.BindComputeProgram(copy_bgra_program.handle);
constexpr GLenum FORMAT = GL_RGBA8;
for (const ImageCopy& copy : copies) {
ASSERT(copy.src_offset == zero_offset);