Merge pull request #2358 from ReinUsesLisp/parallel-shader

gl_shader_cache: Use shared contexts to build shaders in parallel at boot
This commit is contained in:
bunnei 2019-05-24 22:42:08 -04:00 committed by GitHub
commit 68c9c9222d
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
9 changed files with 124 additions and 64 deletions

View file

@ -98,9 +98,11 @@ struct FramebufferCacheKey {
} }
}; };
RasterizerOpenGL::RasterizerOpenGL(Core::System& system, ScreenInfo& info) RasterizerOpenGL::RasterizerOpenGL(Core::System& system, Core::Frontend::EmuWindow& emu_window,
: res_cache{*this}, shader_cache{*this, system, device}, global_cache{*this}, system{system}, ScreenInfo& info)
screen_info{info}, buffer_cache(*this, STREAM_BUFFER_SIZE) { : res_cache{*this}, shader_cache{*this, system, emu_window, device},
global_cache{*this}, system{system}, screen_info{info},
buffer_cache(*this, STREAM_BUFFER_SIZE) {
OpenGLState::ApplyDefaultState(); OpenGLState::ApplyDefaultState();
shader_program_manager = std::make_unique<GLShader::ProgramManager>(); shader_program_manager = std::make_unique<GLShader::ProgramManager>();

View file

@ -48,7 +48,8 @@ struct FramebufferCacheKey;
class RasterizerOpenGL : public VideoCore::RasterizerInterface { class RasterizerOpenGL : public VideoCore::RasterizerInterface {
public: public:
explicit RasterizerOpenGL(Core::System& system, ScreenInfo& info); explicit RasterizerOpenGL(Core::System& system, Core::Frontend::EmuWindow& emu_window,
ScreenInfo& info);
~RasterizerOpenGL() override; ~RasterizerOpenGL() override;
void DrawArrays() override; void DrawArrays() override;

View file

@ -2,10 +2,14 @@
// Licensed under GPLv2 or any later version // Licensed under GPLv2 or any later version
// Refer to the license.txt file included. // Refer to the license.txt file included.
#include <mutex>
#include <thread>
#include <boost/functional/hash.hpp> #include <boost/functional/hash.hpp>
#include "common/assert.h" #include "common/assert.h"
#include "common/hash.h" #include "common/hash.h"
#include "common/scope_exit.h"
#include "core/core.h" #include "core/core.h"
#include "core/frontend/emu_window.h"
#include "video_core/engines/maxwell_3d.h" #include "video_core/engines/maxwell_3d.h"
#include "video_core/memory_manager.h" #include "video_core/memory_manager.h"
#include "video_core/renderer_opengl/gl_rasterizer.h" #include "video_core/renderer_opengl/gl_rasterizer.h"
@ -344,8 +348,8 @@ ShaderDiskCacheUsage CachedShader::GetUsage(GLenum primitive_mode,
} }
ShaderCacheOpenGL::ShaderCacheOpenGL(RasterizerOpenGL& rasterizer, Core::System& system, ShaderCacheOpenGL::ShaderCacheOpenGL(RasterizerOpenGL& rasterizer, Core::System& system,
const Device& device) Core::Frontend::EmuWindow& emu_window, const Device& device)
: RasterizerCache{rasterizer}, device{device}, disk_cache{system} {} : RasterizerCache{rasterizer}, emu_window{emu_window}, device{device}, disk_cache{system} {}
void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading, void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading,
const VideoCore::DiskResourceLoadCallback& callback) { const VideoCore::DiskResourceLoadCallback& callback) {
@ -353,62 +357,107 @@ void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading,
if (!transferable) { if (!transferable) {
return; return;
} }
const auto [raws, usages] = *transferable; const auto [raws, shader_usages] = *transferable;
auto [decompiled, dumps] = disk_cache.LoadPrecompiled(); auto [decompiled, dumps] = disk_cache.LoadPrecompiled();
const auto supported_formats{GetSupportedFormats()}; const auto supported_formats{GetSupportedFormats()};
const auto unspecialized{ const auto unspecialized_shaders{
GenerateUnspecializedShaders(stop_loading, callback, raws, decompiled)}; GenerateUnspecializedShaders(stop_loading, callback, raws, decompiled)};
if (stop_loading) if (stop_loading) {
return; return;
}
// Track if precompiled cache was altered during loading to know if we have to serialize the // Track if precompiled cache was altered during loading to know if we have to serialize the
// virtual precompiled cache file back to the hard drive // virtual precompiled cache file back to the hard drive
bool precompiled_cache_altered = false; bool precompiled_cache_altered = false;
// Build shaders // Inform the frontend about shader build initialization
if (callback) if (callback) {
callback(VideoCore::LoadCallbackStage::Build, 0, usages.size()); callback(VideoCore::LoadCallbackStage::Build, 0, shader_usages.size());
for (std::size_t i = 0; i < usages.size(); ++i) { }
if (stop_loading)
return;
const auto& usage{usages[i]}; std::mutex mutex;
LOG_INFO(Render_OpenGL, "Building shader {:016x} ({} of {})", usage.unique_identifier, std::size_t built_shaders = 0; // It doesn't have be atomic since it's used behind a mutex
i + 1, usages.size()); std::atomic_bool compilation_failed = false;
const auto& unspec{unspecialized.at(usage.unique_identifier)}; const auto Worker = [&](Core::Frontend::GraphicsContext* context, std::size_t begin,
const auto dump_it = dumps.find(usage); std::size_t end, const std::vector<ShaderDiskCacheUsage>& shader_usages,
const ShaderDumpsMap& dumps) {
context->MakeCurrent();
SCOPE_EXIT({ return context->DoneCurrent(); });
CachedProgram shader; for (std::size_t i = begin; i < end; ++i) {
if (dump_it != dumps.end()) { if (stop_loading || compilation_failed) {
// If the shader is dumped, attempt to load it with return;
shader = GeneratePrecompiledProgram(dump_it->second, supported_formats);
if (!shader) {
// Invalidate the precompiled cache if a shader dumped shader was rejected
disk_cache.InvalidatePrecompiled();
precompiled_cache_altered = true;
dumps.clear();
} }
} const auto& usage{shader_usages[i]};
if (!shader) { LOG_INFO(Render_OpenGL, "Building shader {:016x} (index {} of {})",
shader = SpecializeShader(unspec.code, unspec.entries, unspec.program_type, usage.unique_identifier, i, shader_usages.size());
usage.bindings, usage.primitive, true);
}
precompiled_programs.insert({usage, std::move(shader)});
if (callback) const auto& unspecialized{unspecialized_shaders.at(usage.unique_identifier)};
callback(VideoCore::LoadCallbackStage::Build, i + 1, usages.size()); const auto dump{dumps.find(usage)};
CachedProgram shader;
if (dump != dumps.end()) {
// If the shader is dumped, attempt to load it with
shader = GeneratePrecompiledProgram(dump->second, supported_formats);
if (!shader) {
compilation_failed = true;
return;
}
}
if (!shader) {
shader = SpecializeShader(unspecialized.code, unspecialized.entries,
unspecialized.program_type, usage.bindings,
usage.primitive, true);
}
std::scoped_lock lock(mutex);
if (callback) {
callback(VideoCore::LoadCallbackStage::Build, ++built_shaders,
shader_usages.size());
}
precompiled_programs.emplace(usage, std::move(shader));
}
};
const auto num_workers{static_cast<std::size_t>(std::thread::hardware_concurrency() + 1)};
const std::size_t bucket_size{shader_usages.size() / num_workers};
std::vector<std::unique_ptr<Core::Frontend::GraphicsContext>> contexts(num_workers);
std::vector<std::thread> threads(num_workers);
for (std::size_t i = 0; i < num_workers; ++i) {
const bool is_last_worker = i + 1 == num_workers;
const std::size_t start{bucket_size * i};
const std::size_t end{is_last_worker ? shader_usages.size() : start + bucket_size};
// On some platforms the shared context has to be created from the GUI thread
contexts[i] = emu_window.CreateSharedContext();
threads[i] = std::thread(Worker, contexts[i].get(), start, end, shader_usages, dumps);
}
for (auto& thread : threads) {
thread.join();
}
if (compilation_failed) {
// Invalidate the precompiled cache if a shader dumped shader was rejected
disk_cache.InvalidatePrecompiled();
dumps.clear();
precompiled_cache_altered = true;
return;
}
if (stop_loading) {
return;
} }
// TODO(Rodrigo): Do state tracking for transferable shaders and do a dummy draw before // TODO(Rodrigo): Do state tracking for transferable shaders and do a dummy draw before
// precompiling them // precompiling them
for (std::size_t i = 0; i < usages.size(); ++i) { for (std::size_t i = 0; i < shader_usages.size(); ++i) {
const auto& usage{usages[i]}; const auto& usage{shader_usages[i]};
if (dumps.find(usage) == dumps.end()) { if (dumps.find(usage) == dumps.end()) {
const auto& program = precompiled_programs.at(usage); const auto& program{precompiled_programs.at(usage)};
disk_cache.SaveDump(usage, program->handle); disk_cache.SaveDump(usage, program->handle);
precompiled_cache_altered = true; precompiled_cache_altered = true;
} }

View file

@ -22,7 +22,11 @@
namespace Core { namespace Core {
class System; class System;
} // namespace Core }
namespace Core::Frontend {
class EmuWindow;
}
namespace OpenGL { namespace OpenGL {
@ -111,7 +115,7 @@ private:
class ShaderCacheOpenGL final : public RasterizerCache<Shader> { class ShaderCacheOpenGL final : public RasterizerCache<Shader> {
public: public:
explicit ShaderCacheOpenGL(RasterizerOpenGL& rasterizer, Core::System& system, explicit ShaderCacheOpenGL(RasterizerOpenGL& rasterizer, Core::System& system,
const Device& device); Core::Frontend::EmuWindow& emu_window, const Device& device);
/// Loads disk cache for the current game /// Loads disk cache for the current game
void LoadDiskCache(const std::atomic_bool& stop_loading, void LoadDiskCache(const std::atomic_bool& stop_loading,
@ -133,13 +137,13 @@ private:
CachedProgram GeneratePrecompiledProgram(const ShaderDiskCacheDump& dump, CachedProgram GeneratePrecompiledProgram(const ShaderDiskCacheDump& dump,
const std::set<GLenum>& supported_formats); const std::set<GLenum>& supported_formats);
Core::Frontend::EmuWindow& emu_window;
const Device& device; const Device& device;
std::array<Shader, Maxwell::MaxShaderProgram> last_shaders;
ShaderDiskCacheOpenGL disk_cache; ShaderDiskCacheOpenGL disk_cache;
PrecompiledShaders precompiled_shaders; PrecompiledShaders precompiled_shaders;
PrecompiledPrograms precompiled_programs; PrecompiledPrograms precompiled_programs;
std::array<Shader, Maxwell::MaxShaderProgram> last_shaders;
}; };
} // namespace OpenGL } // namespace OpenGL

View file

@ -183,8 +183,7 @@ ShaderDiskCacheOpenGL::LoadTransferable() {
return {{raws, usages}}; return {{raws, usages}};
} }
std::pair<std::unordered_map<u64, ShaderDiskCacheDecompiled>, std::pair<std::unordered_map<u64, ShaderDiskCacheDecompiled>, ShaderDumpsMap>
std::unordered_map<ShaderDiskCacheUsage, ShaderDiskCacheDump>>
ShaderDiskCacheOpenGL::LoadPrecompiled() { ShaderDiskCacheOpenGL::LoadPrecompiled() {
if (!IsUsable()) if (!IsUsable())
return {}; return {};
@ -208,8 +207,7 @@ ShaderDiskCacheOpenGL::LoadPrecompiled() {
return *result; return *result;
} }
std::optional<std::pair<std::unordered_map<u64, ShaderDiskCacheDecompiled>, std::optional<std::pair<std::unordered_map<u64, ShaderDiskCacheDecompiled>, ShaderDumpsMap>>
std::unordered_map<ShaderDiskCacheUsage, ShaderDiskCacheDump>>>
ShaderDiskCacheOpenGL::LoadPrecompiledFile(FileUtil::IOFile& file) { ShaderDiskCacheOpenGL::LoadPrecompiledFile(FileUtil::IOFile& file) {
// Read compressed file from disk and decompress to virtual precompiled cache file // Read compressed file from disk and decompress to virtual precompiled cache file
std::vector<u8> compressed(file.GetSize()); std::vector<u8> compressed(file.GetSize());
@ -230,7 +228,7 @@ ShaderDiskCacheOpenGL::LoadPrecompiledFile(FileUtil::IOFile& file) {
} }
std::unordered_map<u64, ShaderDiskCacheDecompiled> decompiled; std::unordered_map<u64, ShaderDiskCacheDecompiled> decompiled;
std::unordered_map<ShaderDiskCacheUsage, ShaderDiskCacheDump> dumps; ShaderDumpsMap dumps;
while (precompiled_cache_virtual_file_offset < precompiled_cache_virtual_file.GetSize()) { while (precompiled_cache_virtual_file_offset < precompiled_cache_virtual_file.GetSize()) {
PrecompiledEntryKind kind{}; PrecompiledEntryKind kind{};
if (!LoadObjectFromPrecompiled(kind)) { if (!LoadObjectFromPrecompiled(kind)) {

View file

@ -33,6 +33,11 @@ namespace OpenGL {
using ProgramCode = std::vector<u64>; using ProgramCode = std::vector<u64>;
using Maxwell = Tegra::Engines::Maxwell3D::Regs; using Maxwell = Tegra::Engines::Maxwell3D::Regs;
struct ShaderDiskCacheUsage;
struct ShaderDiskCacheDump;
using ShaderDumpsMap = std::unordered_map<ShaderDiskCacheUsage, ShaderDiskCacheDump>;
/// Allocated bindings used by an OpenGL shader program /// Allocated bindings used by an OpenGL shader program
struct BaseBindings { struct BaseBindings {
u32 cbuf{}; u32 cbuf{};

View file

@ -97,8 +97,8 @@ static std::array<GLfloat, 3 * 2> MakeOrthographicMatrix(const float width, cons
return matrix; return matrix;
} }
RendererOpenGL::RendererOpenGL(Core::Frontend::EmuWindow& window, Core::System& system) RendererOpenGL::RendererOpenGL(Core::Frontend::EmuWindow& emu_window, Core::System& system)
: VideoCore::RendererBase{window}, system{system} {} : VideoCore::RendererBase{emu_window}, emu_window{emu_window}, system{system} {}
RendererOpenGL::~RendererOpenGL() = default; RendererOpenGL::~RendererOpenGL() = default;
@ -265,7 +265,7 @@ void RendererOpenGL::CreateRasterizer() {
} }
// Initialize sRGB Usage // Initialize sRGB Usage
OpenGLState::ClearsRGBUsed(); OpenGLState::ClearsRGBUsed();
rasterizer = std::make_unique<RasterizerOpenGL>(system, screen_info); rasterizer = std::make_unique<RasterizerOpenGL>(system, emu_window, screen_info);
} }
void RendererOpenGL::ConfigureFramebufferTexture(TextureInfo& texture, void RendererOpenGL::ConfigureFramebufferTexture(TextureInfo& texture,

View file

@ -45,7 +45,7 @@ struct ScreenInfo {
class RendererOpenGL : public VideoCore::RendererBase { class RendererOpenGL : public VideoCore::RendererBase {
public: public:
explicit RendererOpenGL(Core::Frontend::EmuWindow& window, Core::System& system); explicit RendererOpenGL(Core::Frontend::EmuWindow& emu_window, Core::System& system);
~RendererOpenGL() override; ~RendererOpenGL() override;
/// Swap buffers (render frame) /// Swap buffers (render frame)
@ -77,6 +77,7 @@ private:
void LoadColorToActiveGLTexture(u8 color_r, u8 color_g, u8 color_b, u8 color_a, void LoadColorToActiveGLTexture(u8 color_r, u8 color_g, u8 color_b, u8 color_a,
const TextureInfo& texture); const TextureInfo& texture);
Core::Frontend::EmuWindow& emu_window;
Core::System& system; Core::System& system;
OpenGLState state; OpenGLState state;

View file

@ -91,25 +91,25 @@ void EmuThread::run() {
class GGLContext : public Core::Frontend::GraphicsContext { class GGLContext : public Core::Frontend::GraphicsContext {
public: public:
explicit GGLContext(QOpenGLContext* shared_context) explicit GGLContext(QOpenGLContext* shared_context) : shared_context{shared_context} {
: context{std::make_unique<QOpenGLContext>(shared_context)} { context.setFormat(shared_context->format());
surface.setFormat(shared_context->format()); context.setShareContext(shared_context);
surface.create(); context.create();
} }
void MakeCurrent() override { void MakeCurrent() override {
context->makeCurrent(&surface); context.makeCurrent(shared_context->surface());
} }
void DoneCurrent() override { void DoneCurrent() override {
context->doneCurrent(); context.doneCurrent();
} }
void SwapBuffers() override {} void SwapBuffers() override {}
private: private:
std::unique_ptr<QOpenGLContext> context; QOpenGLContext* shared_context;
QOffscreenSurface surface; QOpenGLContext context;
}; };
// This class overrides paintEvent and resizeEvent to prevent the GUI thread from stealing GL // This class overrides paintEvent and resizeEvent to prevent the GUI thread from stealing GL
@ -358,7 +358,7 @@ void GRenderWindow::OnClientAreaResized(unsigned width, unsigned height) {
} }
std::unique_ptr<Core::Frontend::GraphicsContext> GRenderWindow::CreateSharedContext() const { std::unique_ptr<Core::Frontend::GraphicsContext> GRenderWindow::CreateSharedContext() const {
return std::make_unique<GGLContext>(shared_context.get()); return std::make_unique<GGLContext>(context.get());
} }
void GRenderWindow::InitRenderTarget() { void GRenderWindow::InitRenderTarget() {