Merge pull request #3986 from ReinUsesLisp/shader-cache
shader_cache: Implement a generic runtime shader cache
This commit is contained in:
commit
5633887569
14 changed files with 364 additions and 417 deletions
|
@ -49,8 +49,6 @@ add_library(video_core STATIC
|
|||
query_cache.h
|
||||
rasterizer_accelerated.cpp
|
||||
rasterizer_accelerated.h
|
||||
rasterizer_cache.cpp
|
||||
rasterizer_cache.h
|
||||
rasterizer_interface.h
|
||||
renderer_base.cpp
|
||||
renderer_base.h
|
||||
|
@ -93,6 +91,7 @@ add_library(video_core STATIC
|
|||
renderer_opengl/utils.h
|
||||
sampler_cache.cpp
|
||||
sampler_cache.h
|
||||
shader_cache.h
|
||||
shader/decode/arithmetic.cpp
|
||||
shader/decode/arithmetic_immediate.cpp
|
||||
shader/decode/bfe.cpp
|
||||
|
|
|
@ -1,7 +0,0 @@
|
|||
// Copyright 2018 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include "video_core/rasterizer_cache.h"
|
||||
|
||||
RasterizerCacheObject::~RasterizerCacheObject() = default;
|
|
@ -1,253 +0,0 @@
|
|||
// Copyright 2018 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <mutex>
|
||||
#include <set>
|
||||
#include <unordered_map>
|
||||
|
||||
#include <boost/icl/interval_map.hpp>
|
||||
#include <boost/range/iterator_range_core.hpp>
|
||||
|
||||
#include "common/common_types.h"
|
||||
#include "core/settings.h"
|
||||
#include "video_core/gpu.h"
|
||||
#include "video_core/rasterizer_interface.h"
|
||||
|
||||
class RasterizerCacheObject {
|
||||
public:
|
||||
explicit RasterizerCacheObject(const VAddr cpu_addr) : cpu_addr{cpu_addr} {}
|
||||
|
||||
virtual ~RasterizerCacheObject();
|
||||
|
||||
VAddr GetCpuAddr() const {
|
||||
return cpu_addr;
|
||||
}
|
||||
|
||||
/// Gets the size of the shader in guest memory, required for cache management
|
||||
virtual std::size_t GetSizeInBytes() const = 0;
|
||||
|
||||
/// Sets whether the cached object should be considered registered
|
||||
void SetIsRegistered(bool registered) {
|
||||
is_registered = registered;
|
||||
}
|
||||
|
||||
/// Returns true if the cached object is registered
|
||||
bool IsRegistered() const {
|
||||
return is_registered;
|
||||
}
|
||||
|
||||
/// Returns true if the cached object is dirty
|
||||
bool IsDirty() const {
|
||||
return is_dirty;
|
||||
}
|
||||
|
||||
/// Returns ticks from when this cached object was last modified
|
||||
u64 GetLastModifiedTicks() const {
|
||||
return last_modified_ticks;
|
||||
}
|
||||
|
||||
/// Marks an object as recently modified, used to specify whether it is clean or dirty
|
||||
template <class T>
|
||||
void MarkAsModified(bool dirty, T& cache) {
|
||||
is_dirty = dirty;
|
||||
last_modified_ticks = cache.GetModifiedTicks();
|
||||
}
|
||||
|
||||
void SetMemoryMarked(bool is_memory_marked_) {
|
||||
is_memory_marked = is_memory_marked_;
|
||||
}
|
||||
|
||||
bool IsMemoryMarked() const {
|
||||
return is_memory_marked;
|
||||
}
|
||||
|
||||
void SetSyncPending(bool is_sync_pending_) {
|
||||
is_sync_pending = is_sync_pending_;
|
||||
}
|
||||
|
||||
bool IsSyncPending() const {
|
||||
return is_sync_pending;
|
||||
}
|
||||
|
||||
private:
|
||||
bool is_registered{}; ///< Whether the object is currently registered with the cache
|
||||
bool is_dirty{}; ///< Whether the object is dirty (out of sync with guest memory)
|
||||
bool is_memory_marked{}; ///< Whether the object is marking rasterizer memory.
|
||||
bool is_sync_pending{}; ///< Whether the object is pending deletion.
|
||||
u64 last_modified_ticks{}; ///< When the object was last modified, used for in-order flushing
|
||||
VAddr cpu_addr{}; ///< Cpu address memory, unique from emulated virtual address space
|
||||
};
|
||||
|
||||
template <class T>
|
||||
class RasterizerCache : NonCopyable {
|
||||
friend class RasterizerCacheObject;
|
||||
|
||||
public:
|
||||
explicit RasterizerCache(VideoCore::RasterizerInterface& rasterizer) : rasterizer{rasterizer} {}
|
||||
|
||||
/// Write any cached resources overlapping the specified region back to memory
|
||||
void FlushRegion(VAddr addr, std::size_t size) {
|
||||
std::lock_guard lock{mutex};
|
||||
|
||||
const auto& objects{GetSortedObjectsFromRegion(addr, size)};
|
||||
for (auto& object : objects) {
|
||||
FlushObject(object);
|
||||
}
|
||||
}
|
||||
|
||||
/// Mark the specified region as being invalidated
|
||||
void InvalidateRegion(VAddr addr, u64 size) {
|
||||
std::lock_guard lock{mutex};
|
||||
|
||||
const auto& objects{GetSortedObjectsFromRegion(addr, size)};
|
||||
for (auto& object : objects) {
|
||||
if (!object->IsRegistered()) {
|
||||
// Skip duplicates
|
||||
continue;
|
||||
}
|
||||
Unregister(object);
|
||||
}
|
||||
}
|
||||
|
||||
void OnCPUWrite(VAddr addr, std::size_t size) {
|
||||
std::lock_guard lock{mutex};
|
||||
|
||||
for (const auto& object : GetSortedObjectsFromRegion(addr, size)) {
|
||||
if (object->IsRegistered()) {
|
||||
UnmarkMemory(object);
|
||||
object->SetSyncPending(true);
|
||||
marked_for_unregister.emplace_back(object);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void SyncGuestHost() {
|
||||
std::lock_guard lock{mutex};
|
||||
|
||||
for (const auto& object : marked_for_unregister) {
|
||||
if (object->IsRegistered()) {
|
||||
object->SetSyncPending(false);
|
||||
Unregister(object);
|
||||
}
|
||||
}
|
||||
marked_for_unregister.clear();
|
||||
}
|
||||
|
||||
/// Invalidates everything in the cache
|
||||
void InvalidateAll() {
|
||||
std::lock_guard lock{mutex};
|
||||
|
||||
while (interval_cache.begin() != interval_cache.end()) {
|
||||
Unregister(*interval_cache.begin()->second.begin());
|
||||
}
|
||||
}
|
||||
|
||||
protected:
|
||||
/// Tries to get an object from the cache with the specified cache address
|
||||
T TryGet(VAddr addr) const {
|
||||
const auto iter = map_cache.find(addr);
|
||||
if (iter != map_cache.end())
|
||||
return iter->second;
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
/// Register an object into the cache
|
||||
virtual void Register(const T& object) {
|
||||
std::lock_guard lock{mutex};
|
||||
|
||||
object->SetIsRegistered(true);
|
||||
interval_cache.add({GetInterval(object), ObjectSet{object}});
|
||||
map_cache.insert({object->GetCpuAddr(), object});
|
||||
rasterizer.UpdatePagesCachedCount(object->GetCpuAddr(), object->GetSizeInBytes(), 1);
|
||||
object->SetMemoryMarked(true);
|
||||
}
|
||||
|
||||
/// Unregisters an object from the cache
|
||||
virtual void Unregister(const T& object) {
|
||||
std::lock_guard lock{mutex};
|
||||
|
||||
UnmarkMemory(object);
|
||||
object->SetIsRegistered(false);
|
||||
if (object->IsSyncPending()) {
|
||||
marked_for_unregister.remove(object);
|
||||
object->SetSyncPending(false);
|
||||
}
|
||||
const VAddr addr = object->GetCpuAddr();
|
||||
interval_cache.subtract({GetInterval(object), ObjectSet{object}});
|
||||
map_cache.erase(addr);
|
||||
}
|
||||
|
||||
void UnmarkMemory(const T& object) {
|
||||
if (!object->IsMemoryMarked()) {
|
||||
return;
|
||||
}
|
||||
rasterizer.UpdatePagesCachedCount(object->GetCpuAddr(), object->GetSizeInBytes(), -1);
|
||||
object->SetMemoryMarked(false);
|
||||
}
|
||||
|
||||
/// Returns a ticks counter used for tracking when cached objects were last modified
|
||||
u64 GetModifiedTicks() {
|
||||
std::lock_guard lock{mutex};
|
||||
|
||||
return ++modified_ticks;
|
||||
}
|
||||
|
||||
virtual void FlushObjectInner(const T& object) = 0;
|
||||
|
||||
/// Flushes the specified object, updating appropriate cache state as needed
|
||||
void FlushObject(const T& object) {
|
||||
std::lock_guard lock{mutex};
|
||||
|
||||
if (!object->IsDirty()) {
|
||||
return;
|
||||
}
|
||||
FlushObjectInner(object);
|
||||
object->MarkAsModified(false, *this);
|
||||
}
|
||||
|
||||
std::recursive_mutex mutex;
|
||||
|
||||
private:
|
||||
/// Returns a list of cached objects from the specified memory region, ordered by access time
|
||||
std::vector<T> GetSortedObjectsFromRegion(VAddr addr, u64 size) {
|
||||
if (size == 0) {
|
||||
return {};
|
||||
}
|
||||
|
||||
std::vector<T> objects;
|
||||
const ObjectInterval interval{addr, addr + size};
|
||||
for (auto& pair : boost::make_iterator_range(interval_cache.equal_range(interval))) {
|
||||
for (auto& cached_object : pair.second) {
|
||||
if (!cached_object) {
|
||||
continue;
|
||||
}
|
||||
objects.push_back(cached_object);
|
||||
}
|
||||
}
|
||||
|
||||
std::sort(objects.begin(), objects.end(), [](const T& a, const T& b) -> bool {
|
||||
return a->GetLastModifiedTicks() < b->GetLastModifiedTicks();
|
||||
});
|
||||
|
||||
return objects;
|
||||
}
|
||||
|
||||
using ObjectSet = std::set<T>;
|
||||
using ObjectCache = std::unordered_map<VAddr, T>;
|
||||
using IntervalCache = boost::icl::interval_map<VAddr, ObjectSet>;
|
||||
using ObjectInterval = typename IntervalCache::interval_type;
|
||||
|
||||
static auto GetInterval(const T& object) {
|
||||
return ObjectInterval::right_open(object->GetCpuAddr(),
|
||||
object->GetCpuAddr() + object->GetSizeInBytes());
|
||||
}
|
||||
|
||||
ObjectCache map_cache;
|
||||
IntervalCache interval_cache; ///< Cache of objects
|
||||
u64 modified_ticks{}; ///< Counter of cache state ticks, used for in-order flushing
|
||||
VideoCore::RasterizerInterface& rasterizer;
|
||||
std::list<T> marked_for_unregister;
|
||||
};
|
|
@ -10,7 +10,6 @@
|
|||
#include "common/common_types.h"
|
||||
#include "video_core/buffer_cache/buffer_cache.h"
|
||||
#include "video_core/engines/maxwell_3d.h"
|
||||
#include "video_core/rasterizer_cache.h"
|
||||
#include "video_core/renderer_opengl/gl_resource_manager.h"
|
||||
#include "video_core/renderer_opengl/gl_stream_buffer.h"
|
||||
|
||||
|
|
|
@ -30,6 +30,7 @@
|
|||
#include "video_core/renderer_opengl/gl_shader_cache.h"
|
||||
#include "video_core/renderer_opengl/maxwell_to_gl.h"
|
||||
#include "video_core/renderer_opengl/renderer_opengl.h"
|
||||
#include "video_core/shader_cache.h"
|
||||
|
||||
namespace OpenGL {
|
||||
|
||||
|
@ -310,7 +311,7 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
|
|||
continue;
|
||||
}
|
||||
|
||||
Shader shader{shader_cache.GetStageProgram(program)};
|
||||
Shader* const shader = shader_cache.GetStageProgram(program);
|
||||
|
||||
if (device.UseAssemblyShaders()) {
|
||||
// Check for ARB limitation. We only have 16 SSBOs per context state. To workaround this
|
||||
|
@ -870,7 +871,7 @@ bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config,
|
|||
return true;
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::SetupDrawConstBuffers(std::size_t stage_index, const Shader& shader) {
|
||||
void RasterizerOpenGL::SetupDrawConstBuffers(std::size_t stage_index, Shader* shader) {
|
||||
static constexpr std::array PARAMETER_LUT = {
|
||||
GL_VERTEX_PROGRAM_PARAMETER_BUFFER_NV, GL_TESS_CONTROL_PROGRAM_PARAMETER_BUFFER_NV,
|
||||
GL_TESS_EVALUATION_PROGRAM_PARAMETER_BUFFER_NV, GL_GEOMETRY_PROGRAM_PARAMETER_BUFFER_NV,
|
||||
|
@ -900,7 +901,7 @@ void RasterizerOpenGL::SetupDrawConstBuffers(std::size_t stage_index, const Shad
|
|||
}
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::SetupComputeConstBuffers(const Shader& kernel) {
|
||||
void RasterizerOpenGL::SetupComputeConstBuffers(Shader* kernel) {
|
||||
MICROPROFILE_SCOPE(OpenGL_UBO);
|
||||
const auto& launch_desc = system.GPU().KeplerCompute().launch_description;
|
||||
const auto& entries = kernel->GetEntries();
|
||||
|
@ -969,7 +970,7 @@ void RasterizerOpenGL::SetupConstBuffer(GLenum stage, u32 binding,
|
|||
}
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::SetupDrawGlobalMemory(std::size_t stage_index, const Shader& shader) {
|
||||
void RasterizerOpenGL::SetupDrawGlobalMemory(std::size_t stage_index, Shader* shader) {
|
||||
auto& gpu{system.GPU()};
|
||||
auto& memory_manager{gpu.MemoryManager()};
|
||||
const auto cbufs{gpu.Maxwell3D().state.shader_stages[stage_index]};
|
||||
|
@ -984,7 +985,7 @@ void RasterizerOpenGL::SetupDrawGlobalMemory(std::size_t stage_index, const Shad
|
|||
}
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::SetupComputeGlobalMemory(const Shader& kernel) {
|
||||
void RasterizerOpenGL::SetupComputeGlobalMemory(Shader* kernel) {
|
||||
auto& gpu{system.GPU()};
|
||||
auto& memory_manager{gpu.MemoryManager()};
|
||||
const auto cbufs{gpu.KeplerCompute().launch_description.const_buffer_config};
|
||||
|
@ -1007,7 +1008,7 @@ void RasterizerOpenGL::SetupGlobalMemory(u32 binding, const GlobalMemoryEntry& e
|
|||
static_cast<GLsizeiptr>(size));
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::SetupDrawTextures(std::size_t stage_index, const Shader& shader) {
|
||||
void RasterizerOpenGL::SetupDrawTextures(std::size_t stage_index, Shader* shader) {
|
||||
MICROPROFILE_SCOPE(OpenGL_Texture);
|
||||
const auto& maxwell3d = system.GPU().Maxwell3D();
|
||||
u32 binding = device.GetBaseBindings(stage_index).sampler;
|
||||
|
@ -1020,7 +1021,7 @@ void RasterizerOpenGL::SetupDrawTextures(std::size_t stage_index, const Shader&
|
|||
}
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::SetupComputeTextures(const Shader& kernel) {
|
||||
void RasterizerOpenGL::SetupComputeTextures(Shader* kernel) {
|
||||
MICROPROFILE_SCOPE(OpenGL_Texture);
|
||||
const auto& compute = system.GPU().KeplerCompute();
|
||||
u32 binding = 0;
|
||||
|
@ -1049,7 +1050,7 @@ void RasterizerOpenGL::SetupTexture(u32 binding, const Tegra::Texture::FullTextu
|
|||
}
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::SetupDrawImages(std::size_t stage_index, const Shader& shader) {
|
||||
void RasterizerOpenGL::SetupDrawImages(std::size_t stage_index, Shader* shader) {
|
||||
const auto& maxwell3d = system.GPU().Maxwell3D();
|
||||
u32 binding = device.GetBaseBindings(stage_index).image;
|
||||
for (const auto& entry : shader->GetEntries().images) {
|
||||
|
@ -1059,7 +1060,7 @@ void RasterizerOpenGL::SetupDrawImages(std::size_t stage_index, const Shader& sh
|
|||
}
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::SetupComputeImages(const Shader& shader) {
|
||||
void RasterizerOpenGL::SetupComputeImages(Shader* shader) {
|
||||
const auto& compute = system.GPU().KeplerCompute();
|
||||
u32 binding = 0;
|
||||
for (const auto& entry : shader->GetEntries().images) {
|
||||
|
|
|
@ -19,7 +19,6 @@
|
|||
#include "video_core/engines/const_buffer_info.h"
|
||||
#include "video_core/engines/maxwell_3d.h"
|
||||
#include "video_core/rasterizer_accelerated.h"
|
||||
#include "video_core/rasterizer_cache.h"
|
||||
#include "video_core/rasterizer_interface.h"
|
||||
#include "video_core/renderer_opengl/gl_buffer_cache.h"
|
||||
#include "video_core/renderer_opengl/gl_device.h"
|
||||
|
@ -100,10 +99,10 @@ private:
|
|||
void ConfigureClearFramebuffer(bool using_color, bool using_depth_stencil);
|
||||
|
||||
/// Configures the current constbuffers to use for the draw command.
|
||||
void SetupDrawConstBuffers(std::size_t stage_index, const Shader& shader);
|
||||
void SetupDrawConstBuffers(std::size_t stage_index, Shader* shader);
|
||||
|
||||
/// Configures the current constbuffers to use for the kernel invocation.
|
||||
void SetupComputeConstBuffers(const Shader& kernel);
|
||||
void SetupComputeConstBuffers(Shader* kernel);
|
||||
|
||||
/// Configures a constant buffer.
|
||||
void SetupConstBuffer(GLenum stage, u32 binding, const Tegra::Engines::ConstBufferInfo& buffer,
|
||||
|
@ -111,30 +110,30 @@ private:
|
|||
std::size_t unified_offset);
|
||||
|
||||
/// Configures the current global memory entries to use for the draw command.
|
||||
void SetupDrawGlobalMemory(std::size_t stage_index, const Shader& shader);
|
||||
void SetupDrawGlobalMemory(std::size_t stage_index, Shader* shader);
|
||||
|
||||
/// Configures the current global memory entries to use for the kernel invocation.
|
||||
void SetupComputeGlobalMemory(const Shader& kernel);
|
||||
void SetupComputeGlobalMemory(Shader* kernel);
|
||||
|
||||
/// Configures a constant buffer.
|
||||
void SetupGlobalMemory(u32 binding, const GlobalMemoryEntry& entry, GPUVAddr gpu_addr,
|
||||
std::size_t size);
|
||||
|
||||
/// Configures the current textures to use for the draw command.
|
||||
void SetupDrawTextures(std::size_t stage_index, const Shader& shader);
|
||||
void SetupDrawTextures(std::size_t stage_index, Shader* shader);
|
||||
|
||||
/// Configures the textures used in a compute shader.
|
||||
void SetupComputeTextures(const Shader& kernel);
|
||||
void SetupComputeTextures(Shader* kernel);
|
||||
|
||||
/// Configures a texture.
|
||||
void SetupTexture(u32 binding, const Tegra::Texture::FullTextureInfo& texture,
|
||||
const SamplerEntry& entry);
|
||||
|
||||
/// Configures images in a graphics shader.
|
||||
void SetupDrawImages(std::size_t stage_index, const Shader& shader);
|
||||
void SetupDrawImages(std::size_t stage_index, Shader* shader);
|
||||
|
||||
/// Configures images in a compute shader.
|
||||
void SetupComputeImages(const Shader& shader);
|
||||
void SetupComputeImages(Shader* shader);
|
||||
|
||||
/// Configures an image.
|
||||
void SetupImage(u32 binding, const Tegra::Texture::TICEntry& tic, const ImageEntry& entry);
|
||||
|
|
|
@ -29,6 +29,7 @@
|
|||
#include "video_core/shader/memory_util.h"
|
||||
#include "video_core/shader/registry.h"
|
||||
#include "video_core/shader/shader_ir.h"
|
||||
#include "video_core/shader_cache.h"
|
||||
|
||||
namespace OpenGL {
|
||||
|
||||
|
@ -194,12 +195,9 @@ std::unordered_set<GLenum> GetSupportedFormats() {
|
|||
|
||||
} // Anonymous namespace
|
||||
|
||||
CachedShader::CachedShader(VAddr cpu_addr, std::size_t size_in_bytes,
|
||||
std::shared_ptr<VideoCommon::Shader::Registry> registry,
|
||||
ShaderEntries entries, ProgramSharedPtr program_)
|
||||
: RasterizerCacheObject{cpu_addr}, registry{std::move(registry)}, entries{std::move(entries)},
|
||||
size_in_bytes{size_in_bytes}, program{std::move(program_)} {
|
||||
// Assign either the assembly program or source program. We can't have both.
|
||||
Shader::Shader(std::shared_ptr<VideoCommon::Shader::Registry> registry_, ShaderEntries entries_,
|
||||
ProgramSharedPtr program_)
|
||||
: registry{std::move(registry_)}, entries{std::move(entries_)}, program{std::move(program_)} {
|
||||
handle = program->assembly_program.handle;
|
||||
if (handle == 0) {
|
||||
handle = program->source_program.handle;
|
||||
|
@ -207,16 +205,16 @@ CachedShader::CachedShader(VAddr cpu_addr, std::size_t size_in_bytes,
|
|||
ASSERT(handle != 0);
|
||||
}
|
||||
|
||||
CachedShader::~CachedShader() = default;
|
||||
Shader::~Shader() = default;
|
||||
|
||||
GLuint CachedShader::GetHandle() const {
|
||||
GLuint Shader::GetHandle() const {
|
||||
DEBUG_ASSERT(registry->IsConsistent());
|
||||
return handle;
|
||||
}
|
||||
|
||||
Shader CachedShader::CreateStageFromMemory(const ShaderParameters& params,
|
||||
Maxwell::ShaderProgram program_type, ProgramCode code,
|
||||
ProgramCode code_b) {
|
||||
std::unique_ptr<Shader> Shader::CreateStageFromMemory(const ShaderParameters& params,
|
||||
Maxwell::ShaderProgram program_type,
|
||||
ProgramCode code, ProgramCode code_b) {
|
||||
const auto shader_type = GetShaderType(program_type);
|
||||
const std::size_t size_in_bytes = code.size() * sizeof(u64);
|
||||
|
||||
|
@ -241,12 +239,12 @@ Shader CachedShader::CreateStageFromMemory(const ShaderParameters& params,
|
|||
entry.bindless_samplers = registry->GetBindlessSamplers();
|
||||
params.disk_cache.SaveEntry(std::move(entry));
|
||||
|
||||
return std::shared_ptr<CachedShader>(
|
||||
new CachedShader(params.cpu_addr, size_in_bytes, std::move(registry),
|
||||
MakeEntries(params.device, ir, shader_type), std::move(program)));
|
||||
return std::unique_ptr<Shader>(new Shader(
|
||||
std::move(registry), MakeEntries(params.device, ir, shader_type), std::move(program)));
|
||||
}
|
||||
|
||||
Shader CachedShader::CreateKernelFromMemory(const ShaderParameters& params, ProgramCode code) {
|
||||
std::unique_ptr<Shader> Shader::CreateKernelFromMemory(const ShaderParameters& params,
|
||||
ProgramCode code) {
|
||||
const std::size_t size_in_bytes = code.size() * sizeof(u64);
|
||||
|
||||
auto& engine = params.system.GPU().KeplerCompute();
|
||||
|
@ -266,23 +264,23 @@ Shader CachedShader::CreateKernelFromMemory(const ShaderParameters& params, Prog
|
|||
entry.bindless_samplers = registry->GetBindlessSamplers();
|
||||
params.disk_cache.SaveEntry(std::move(entry));
|
||||
|
||||
return std::shared_ptr<CachedShader>(
|
||||
new CachedShader(params.cpu_addr, size_in_bytes, std::move(registry),
|
||||
MakeEntries(params.device, ir, ShaderType::Compute), std::move(program)));
|
||||
return std::unique_ptr<Shader>(new Shader(std::move(registry),
|
||||
MakeEntries(params.device, ir, ShaderType::Compute),
|
||||
std::move(program)));
|
||||
}
|
||||
|
||||
Shader CachedShader::CreateFromCache(const ShaderParameters& params,
|
||||
const PrecompiledShader& precompiled_shader,
|
||||
std::size_t size_in_bytes) {
|
||||
return std::shared_ptr<CachedShader>(
|
||||
new CachedShader(params.cpu_addr, size_in_bytes, precompiled_shader.registry,
|
||||
precompiled_shader.entries, precompiled_shader.program));
|
||||
std::unique_ptr<Shader> Shader::CreateFromCache(const ShaderParameters& params,
|
||||
const PrecompiledShader& precompiled_shader) {
|
||||
return std::unique_ptr<Shader>(new Shader(
|
||||
precompiled_shader.registry, precompiled_shader.entries, precompiled_shader.program));
|
||||
}
|
||||
|
||||
ShaderCacheOpenGL::ShaderCacheOpenGL(RasterizerOpenGL& rasterizer, Core::System& system,
|
||||
Core::Frontend::EmuWindow& emu_window, const Device& device)
|
||||
: RasterizerCache{rasterizer}, system{system}, emu_window{emu_window}, device{device},
|
||||
disk_cache{system} {}
|
||||
: VideoCommon::ShaderCache<Shader>{rasterizer}, system{system},
|
||||
emu_window{emu_window}, device{device}, disk_cache{system} {}
|
||||
|
||||
ShaderCacheOpenGL::~ShaderCacheOpenGL() = default;
|
||||
|
||||
void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading,
|
||||
const VideoCore::DiskResourceLoadCallback& callback) {
|
||||
|
@ -436,7 +434,7 @@ ProgramSharedPtr ShaderCacheOpenGL::GeneratePrecompiledProgram(
|
|||
return program;
|
||||
}
|
||||
|
||||
Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) {
|
||||
Shader* ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) {
|
||||
if (!system.GPU().Maxwell3D().dirty.flags[Dirty::Shaders]) {
|
||||
return last_shaders[static_cast<std::size_t>(program)];
|
||||
}
|
||||
|
@ -446,8 +444,7 @@ Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) {
|
|||
|
||||
// Look up shader in the cache based on address
|
||||
const auto cpu_addr{memory_manager.GpuToCpuAddress(address)};
|
||||
Shader shader{cpu_addr ? TryGet(*cpu_addr) : null_shader};
|
||||
if (shader) {
|
||||
if (Shader* const shader{cpu_addr ? TryGet(*cpu_addr) : null_shader.get()}) {
|
||||
return last_shaders[static_cast<std::size_t>(program)] = shader;
|
||||
}
|
||||
|
||||
|
@ -468,30 +465,29 @@ Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) {
|
|||
const ShaderParameters params{system, disk_cache, device,
|
||||
*cpu_addr, host_ptr, unique_identifier};
|
||||
|
||||
std::unique_ptr<Shader> shader;
|
||||
const auto found = runtime_cache.find(unique_identifier);
|
||||
if (found == runtime_cache.end()) {
|
||||
shader = CachedShader::CreateStageFromMemory(params, program, std::move(code),
|
||||
std::move(code_b));
|
||||
shader = Shader::CreateStageFromMemory(params, program, std::move(code), std::move(code_b));
|
||||
} else {
|
||||
const std::size_t size_in_bytes = code.size() * sizeof(u64);
|
||||
shader = CachedShader::CreateFromCache(params, found->second, size_in_bytes);
|
||||
shader = Shader::CreateFromCache(params, found->second);
|
||||
}
|
||||
|
||||
Shader* const result = shader.get();
|
||||
if (cpu_addr) {
|
||||
Register(shader);
|
||||
Register(std::move(shader), *cpu_addr, code.size() * sizeof(u64));
|
||||
} else {
|
||||
null_shader = shader;
|
||||
null_shader = std::move(shader);
|
||||
}
|
||||
|
||||
return last_shaders[static_cast<std::size_t>(program)] = shader;
|
||||
return last_shaders[static_cast<std::size_t>(program)] = result;
|
||||
}
|
||||
|
||||
Shader ShaderCacheOpenGL::GetComputeKernel(GPUVAddr code_addr) {
|
||||
Shader* ShaderCacheOpenGL::GetComputeKernel(GPUVAddr code_addr) {
|
||||
auto& memory_manager{system.GPU().MemoryManager()};
|
||||
const auto cpu_addr{memory_manager.GpuToCpuAddress(code_addr)};
|
||||
|
||||
auto kernel = cpu_addr ? TryGet(*cpu_addr) : null_kernel;
|
||||
if (kernel) {
|
||||
if (Shader* const kernel = cpu_addr ? TryGet(*cpu_addr) : null_kernel.get()) {
|
||||
return kernel;
|
||||
}
|
||||
|
||||
|
@ -503,20 +499,21 @@ Shader ShaderCacheOpenGL::GetComputeKernel(GPUVAddr code_addr) {
|
|||
const ShaderParameters params{system, disk_cache, device,
|
||||
*cpu_addr, host_ptr, unique_identifier};
|
||||
|
||||
std::unique_ptr<Shader> kernel;
|
||||
const auto found = runtime_cache.find(unique_identifier);
|
||||
if (found == runtime_cache.end()) {
|
||||
kernel = CachedShader::CreateKernelFromMemory(params, std::move(code));
|
||||
kernel = Shader::CreateKernelFromMemory(params, std::move(code));
|
||||
} else {
|
||||
const std::size_t size_in_bytes = code.size() * sizeof(u64);
|
||||
kernel = CachedShader::CreateFromCache(params, found->second, size_in_bytes);
|
||||
kernel = Shader::CreateFromCache(params, found->second);
|
||||
}
|
||||
|
||||
Shader* const result = kernel.get();
|
||||
if (cpu_addr) {
|
||||
Register(kernel);
|
||||
Register(std::move(kernel), *cpu_addr, code.size() * sizeof(u64));
|
||||
} else {
|
||||
null_kernel = kernel;
|
||||
null_kernel = std::move(kernel);
|
||||
}
|
||||
return kernel;
|
||||
return result;
|
||||
}
|
||||
|
||||
} // namespace OpenGL
|
||||
|
|
|
@ -18,12 +18,12 @@
|
|||
|
||||
#include "common/common_types.h"
|
||||
#include "video_core/engines/shader_type.h"
|
||||
#include "video_core/rasterizer_cache.h"
|
||||
#include "video_core/renderer_opengl/gl_resource_manager.h"
|
||||
#include "video_core/renderer_opengl/gl_shader_decompiler.h"
|
||||
#include "video_core/renderer_opengl/gl_shader_disk_cache.h"
|
||||
#include "video_core/shader/registry.h"
|
||||
#include "video_core/shader/shader_ir.h"
|
||||
#include "video_core/shader_cache.h"
|
||||
|
||||
namespace Core {
|
||||
class System;
|
||||
|
@ -35,12 +35,10 @@ class EmuWindow;
|
|||
|
||||
namespace OpenGL {
|
||||
|
||||
class CachedShader;
|
||||
class Device;
|
||||
class RasterizerOpenGL;
|
||||
struct UnspecializedShader;
|
||||
|
||||
using Shader = std::shared_ptr<CachedShader>;
|
||||
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
|
||||
|
||||
struct ProgramHandle {
|
||||
|
@ -64,62 +62,53 @@ struct ShaderParameters {
|
|||
u64 unique_identifier;
|
||||
};
|
||||
|
||||
class CachedShader final : public RasterizerCacheObject {
|
||||
class Shader final {
|
||||
public:
|
||||
~CachedShader();
|
||||
~Shader();
|
||||
|
||||
/// Gets the GL program handle for the shader
|
||||
GLuint GetHandle() const;
|
||||
|
||||
/// Returns the size in bytes of the shader
|
||||
std::size_t GetSizeInBytes() const override {
|
||||
return size_in_bytes;
|
||||
}
|
||||
|
||||
/// Gets the shader entries for the shader
|
||||
const ShaderEntries& GetEntries() const {
|
||||
return entries;
|
||||
}
|
||||
|
||||
static Shader CreateStageFromMemory(const ShaderParameters& params,
|
||||
Maxwell::ShaderProgram program_type,
|
||||
ProgramCode program_code, ProgramCode program_code_b);
|
||||
static Shader CreateKernelFromMemory(const ShaderParameters& params, ProgramCode code);
|
||||
static std::unique_ptr<Shader> CreateStageFromMemory(const ShaderParameters& params,
|
||||
Maxwell::ShaderProgram program_type,
|
||||
ProgramCode program_code,
|
||||
ProgramCode program_code_b);
|
||||
static std::unique_ptr<Shader> CreateKernelFromMemory(const ShaderParameters& params,
|
||||
ProgramCode code);
|
||||
|
||||
static Shader CreateFromCache(const ShaderParameters& params,
|
||||
const PrecompiledShader& precompiled_shader,
|
||||
std::size_t size_in_bytes);
|
||||
static std::unique_ptr<Shader> CreateFromCache(const ShaderParameters& params,
|
||||
const PrecompiledShader& precompiled_shader);
|
||||
|
||||
private:
|
||||
explicit CachedShader(VAddr cpu_addr, std::size_t size_in_bytes,
|
||||
std::shared_ptr<VideoCommon::Shader::Registry> registry,
|
||||
ShaderEntries entries, ProgramSharedPtr program);
|
||||
explicit Shader(std::shared_ptr<VideoCommon::Shader::Registry> registry, ShaderEntries entries,
|
||||
ProgramSharedPtr program);
|
||||
|
||||
std::shared_ptr<VideoCommon::Shader::Registry> registry;
|
||||
ShaderEntries entries;
|
||||
std::size_t size_in_bytes = 0;
|
||||
ProgramSharedPtr program;
|
||||
GLuint handle = 0;
|
||||
};
|
||||
|
||||
class ShaderCacheOpenGL final : public RasterizerCache<Shader> {
|
||||
class ShaderCacheOpenGL final : public VideoCommon::ShaderCache<Shader> {
|
||||
public:
|
||||
explicit ShaderCacheOpenGL(RasterizerOpenGL& rasterizer, Core::System& system,
|
||||
Core::Frontend::EmuWindow& emu_window, const Device& device);
|
||||
~ShaderCacheOpenGL() override;
|
||||
|
||||
/// Loads disk cache for the current game
|
||||
void LoadDiskCache(const std::atomic_bool& stop_loading,
|
||||
const VideoCore::DiskResourceLoadCallback& callback);
|
||||
|
||||
/// Gets the current specified shader stage program
|
||||
Shader GetStageProgram(Maxwell::ShaderProgram program);
|
||||
Shader* GetStageProgram(Maxwell::ShaderProgram program);
|
||||
|
||||
/// Gets a compute kernel in the passed address
|
||||
Shader GetComputeKernel(GPUVAddr code_addr);
|
||||
|
||||
protected:
|
||||
// We do not have to flush this cache as things in it are never modified by us.
|
||||
void FlushObjectInner(const Shader& object) override {}
|
||||
Shader* GetComputeKernel(GPUVAddr code_addr);
|
||||
|
||||
private:
|
||||
ProgramSharedPtr GeneratePrecompiledProgram(
|
||||
|
@ -132,10 +121,10 @@ private:
|
|||
ShaderDiskCacheOpenGL disk_cache;
|
||||
std::unordered_map<u64, PrecompiledShader> runtime_cache;
|
||||
|
||||
Shader null_shader{};
|
||||
Shader null_kernel{};
|
||||
std::unique_ptr<Shader> null_shader;
|
||||
std::unique_ptr<Shader> null_kernel;
|
||||
|
||||
std::array<Shader, Maxwell::MaxShaderProgram> last_shaders;
|
||||
std::array<Shader*, Maxwell::MaxShaderProgram> last_shaders{};
|
||||
};
|
||||
|
||||
} // namespace OpenGL
|
||||
|
|
|
@ -8,7 +8,6 @@
|
|||
|
||||
#include "common/common_types.h"
|
||||
#include "video_core/buffer_cache/buffer_cache.h"
|
||||
#include "video_core/rasterizer_cache.h"
|
||||
#include "video_core/renderer_vulkan/vk_memory_manager.h"
|
||||
#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h"
|
||||
#include "video_core/renderer_vulkan/vk_stream_buffer.h"
|
||||
|
|
|
@ -27,6 +27,7 @@
|
|||
#include "video_core/renderer_vulkan/wrapper.h"
|
||||
#include "video_core/shader/compiler_settings.h"
|
||||
#include "video_core/shader/memory_util.h"
|
||||
#include "video_core/shader_cache.h"
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
|
@ -132,19 +133,18 @@ bool ComputePipelineCacheKey::operator==(const ComputePipelineCacheKey& rhs) con
|
|||
return std::memcmp(&rhs, this, sizeof *this) == 0;
|
||||
}
|
||||
|
||||
CachedShader::CachedShader(Core::System& system, Tegra::Engines::ShaderType stage,
|
||||
GPUVAddr gpu_addr, VAddr cpu_addr, ProgramCode program_code,
|
||||
u32 main_offset)
|
||||
: RasterizerCacheObject{cpu_addr}, gpu_addr{gpu_addr}, program_code{std::move(program_code)},
|
||||
Shader::Shader(Core::System& system, Tegra::Engines::ShaderType stage, GPUVAddr gpu_addr,
|
||||
VideoCommon::Shader::ProgramCode program_code, u32 main_offset)
|
||||
: gpu_addr{gpu_addr}, program_code{std::move(program_code)},
|
||||
registry{stage, GetEngine(system, stage)}, shader_ir{this->program_code, main_offset,
|
||||
compiler_settings, registry},
|
||||
entries{GenerateShaderEntries(shader_ir)} {}
|
||||
|
||||
CachedShader::~CachedShader() = default;
|
||||
Shader::~Shader() = default;
|
||||
|
||||
Tegra::Engines::ConstBufferEngineInterface& CachedShader::GetEngine(
|
||||
Core::System& system, Tegra::Engines::ShaderType stage) {
|
||||
if (stage == Tegra::Engines::ShaderType::Compute) {
|
||||
Tegra::Engines::ConstBufferEngineInterface& Shader::GetEngine(Core::System& system,
|
||||
Tegra::Engines::ShaderType stage) {
|
||||
if (stage == ShaderType::Compute) {
|
||||
return system.GPU().KeplerCompute();
|
||||
} else {
|
||||
return system.GPU().Maxwell3D();
|
||||
|
@ -156,16 +156,16 @@ VKPipelineCache::VKPipelineCache(Core::System& system, RasterizerVulkan& rasteri
|
|||
VKDescriptorPool& descriptor_pool,
|
||||
VKUpdateDescriptorQueue& update_descriptor_queue,
|
||||
VKRenderPassCache& renderpass_cache)
|
||||
: RasterizerCache{rasterizer}, system{system}, device{device}, scheduler{scheduler},
|
||||
descriptor_pool{descriptor_pool}, update_descriptor_queue{update_descriptor_queue},
|
||||
renderpass_cache{renderpass_cache} {}
|
||||
: VideoCommon::ShaderCache<Shader>{rasterizer}, system{system}, device{device},
|
||||
scheduler{scheduler}, descriptor_pool{descriptor_pool},
|
||||
update_descriptor_queue{update_descriptor_queue}, renderpass_cache{renderpass_cache} {}
|
||||
|
||||
VKPipelineCache::~VKPipelineCache() = default;
|
||||
|
||||
std::array<Shader, Maxwell::MaxShaderProgram> VKPipelineCache::GetShaders() {
|
||||
std::array<Shader*, Maxwell::MaxShaderProgram> VKPipelineCache::GetShaders() {
|
||||
const auto& gpu = system.GPU().Maxwell3D();
|
||||
|
||||
std::array<Shader, Maxwell::MaxShaderProgram> shaders;
|
||||
std::array<Shader*, Maxwell::MaxShaderProgram> shaders{};
|
||||
for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) {
|
||||
const auto program{static_cast<Maxwell::ShaderProgram>(index)};
|
||||
|
||||
|
@ -178,24 +178,28 @@ std::array<Shader, Maxwell::MaxShaderProgram> VKPipelineCache::GetShaders() {
|
|||
const GPUVAddr program_addr{GetShaderAddress(system, program)};
|
||||
const std::optional cpu_addr = memory_manager.GpuToCpuAddress(program_addr);
|
||||
ASSERT(cpu_addr);
|
||||
auto shader = cpu_addr ? TryGet(*cpu_addr) : null_shader;
|
||||
if (!shader) {
|
||||
|
||||
Shader* result = cpu_addr ? TryGet(*cpu_addr) : null_shader.get();
|
||||
if (!result) {
|
||||
const auto host_ptr{memory_manager.GetPointer(program_addr)};
|
||||
|
||||
// No shader found - create a new one
|
||||
constexpr u32 stage_offset = STAGE_MAIN_OFFSET;
|
||||
const auto stage = static_cast<Tegra::Engines::ShaderType>(index == 0 ? 0 : index - 1);
|
||||
const auto stage = static_cast<ShaderType>(index == 0 ? 0 : index - 1);
|
||||
ProgramCode code = GetShaderCode(memory_manager, program_addr, host_ptr, false);
|
||||
const std::size_t size_in_bytes = code.size() * sizeof(u64);
|
||||
|
||||
auto shader = std::make_unique<Shader>(system, stage, program_addr, std::move(code),
|
||||
stage_offset);
|
||||
result = shader.get();
|
||||
|
||||
shader = std::make_shared<CachedShader>(system, stage, program_addr, *cpu_addr,
|
||||
std::move(code), stage_offset);
|
||||
if (cpu_addr) {
|
||||
Register(shader);
|
||||
Register(std::move(shader), *cpu_addr, size_in_bytes);
|
||||
} else {
|
||||
null_shader = shader;
|
||||
null_shader = std::move(shader);
|
||||
}
|
||||
}
|
||||
shaders[index] = std::move(shader);
|
||||
shaders[index] = result;
|
||||
}
|
||||
return last_shaders = shaders;
|
||||
}
|
||||
|
@ -236,19 +240,22 @@ VKComputePipeline& VKPipelineCache::GetComputePipeline(const ComputePipelineCach
|
|||
const auto cpu_addr = memory_manager.GpuToCpuAddress(program_addr);
|
||||
ASSERT(cpu_addr);
|
||||
|
||||
auto shader = cpu_addr ? TryGet(*cpu_addr) : null_kernel;
|
||||
Shader* shader = cpu_addr ? TryGet(*cpu_addr) : null_kernel.get();
|
||||
if (!shader) {
|
||||
// No shader found - create a new one
|
||||
const auto host_ptr = memory_manager.GetPointer(program_addr);
|
||||
|
||||
ProgramCode code = GetShaderCode(memory_manager, program_addr, host_ptr, true);
|
||||
shader = std::make_shared<CachedShader>(system, Tegra::Engines::ShaderType::Compute,
|
||||
program_addr, *cpu_addr, std::move(code),
|
||||
KERNEL_MAIN_OFFSET);
|
||||
const std::size_t size_in_bytes = code.size() * sizeof(u64);
|
||||
|
||||
auto shader_info = std::make_unique<Shader>(system, ShaderType::Compute, program_addr,
|
||||
std::move(code), KERNEL_MAIN_OFFSET);
|
||||
shader = shader_info.get();
|
||||
|
||||
if (cpu_addr) {
|
||||
Register(shader);
|
||||
Register(std::move(shader_info), *cpu_addr, size_in_bytes);
|
||||
} else {
|
||||
null_kernel = shader;
|
||||
null_kernel = std::move(shader_info);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -264,7 +271,7 @@ VKComputePipeline& VKPipelineCache::GetComputePipeline(const ComputePipelineCach
|
|||
return *entry;
|
||||
}
|
||||
|
||||
void VKPipelineCache::Unregister(const Shader& shader) {
|
||||
void VKPipelineCache::OnShaderRemoval(Shader* shader) {
|
||||
bool finished = false;
|
||||
const auto Finish = [&] {
|
||||
// TODO(Rodrigo): Instead of finishing here, wait for the fences that use this pipeline and
|
||||
|
@ -296,8 +303,6 @@ void VKPipelineCache::Unregister(const Shader& shader) {
|
|||
Finish();
|
||||
it = compute_cache.erase(it);
|
||||
}
|
||||
|
||||
RasterizerCache::Unregister(shader);
|
||||
}
|
||||
|
||||
std::pair<SPIRVProgram, std::vector<VkDescriptorSetLayoutBinding>>
|
||||
|
@ -332,12 +337,11 @@ VKPipelineCache::DecompileShaders(const GraphicsPipelineCacheKey& key) {
|
|||
}
|
||||
|
||||
const GPUVAddr gpu_addr = GetShaderAddress(system, program_enum);
|
||||
const auto cpu_addr = memory_manager.GpuToCpuAddress(gpu_addr);
|
||||
const auto shader = cpu_addr ? TryGet(*cpu_addr) : null_shader;
|
||||
ASSERT(shader);
|
||||
const std::optional<VAddr> cpu_addr = memory_manager.GpuToCpuAddress(gpu_addr);
|
||||
Shader* const shader = cpu_addr ? TryGet(*cpu_addr) : null_shader.get();
|
||||
|
||||
const std::size_t stage = index == 0 ? 0 : index - 1; // Stage indices are 0 - 5
|
||||
const auto program_type = GetShaderType(program_enum);
|
||||
const ShaderType program_type = GetShaderType(program_enum);
|
||||
const auto& entries = shader->GetEntries();
|
||||
program[stage] = {
|
||||
Decompile(device, shader->GetIR(), program_type, shader->GetRegistry(), specialization),
|
||||
|
|
|
@ -17,7 +17,6 @@
|
|||
#include "common/common_types.h"
|
||||
#include "video_core/engines/const_buffer_engine_interface.h"
|
||||
#include "video_core/engines/maxwell_3d.h"
|
||||
#include "video_core/rasterizer_cache.h"
|
||||
#include "video_core/renderer_vulkan/fixed_pipeline_state.h"
|
||||
#include "video_core/renderer_vulkan/vk_graphics_pipeline.h"
|
||||
#include "video_core/renderer_vulkan/vk_renderpass_cache.h"
|
||||
|
@ -26,6 +25,7 @@
|
|||
#include "video_core/shader/memory_util.h"
|
||||
#include "video_core/shader/registry.h"
|
||||
#include "video_core/shader/shader_ir.h"
|
||||
#include "video_core/shader_cache.h"
|
||||
|
||||
namespace Core {
|
||||
class System;
|
||||
|
@ -41,8 +41,6 @@ class VKFence;
|
|||
class VKScheduler;
|
||||
class VKUpdateDescriptorQueue;
|
||||
|
||||
class CachedShader;
|
||||
using Shader = std::shared_ptr<CachedShader>;
|
||||
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
|
||||
|
||||
struct GraphicsPipelineCacheKey {
|
||||
|
@ -102,21 +100,16 @@ struct hash<Vulkan::ComputePipelineCacheKey> {
|
|||
|
||||
namespace Vulkan {
|
||||
|
||||
class CachedShader final : public RasterizerCacheObject {
|
||||
class Shader {
|
||||
public:
|
||||
explicit CachedShader(Core::System& system, Tegra::Engines::ShaderType stage, GPUVAddr gpu_addr,
|
||||
VAddr cpu_addr, VideoCommon::Shader::ProgramCode program_code,
|
||||
u32 main_offset);
|
||||
~CachedShader();
|
||||
explicit Shader(Core::System& system, Tegra::Engines::ShaderType stage, GPUVAddr gpu_addr,
|
||||
VideoCommon::Shader::ProgramCode program_code, u32 main_offset);
|
||||
~Shader();
|
||||
|
||||
GPUVAddr GetGpuAddr() const {
|
||||
return gpu_addr;
|
||||
}
|
||||
|
||||
std::size_t GetSizeInBytes() const override {
|
||||
return program_code.size() * sizeof(u64);
|
||||
}
|
||||
|
||||
VideoCommon::Shader::ShaderIR& GetIR() {
|
||||
return shader_ir;
|
||||
}
|
||||
|
@ -144,25 +137,23 @@ private:
|
|||
ShaderEntries entries;
|
||||
};
|
||||
|
||||
class VKPipelineCache final : public RasterizerCache<Shader> {
|
||||
class VKPipelineCache final : public VideoCommon::ShaderCache<Shader> {
|
||||
public:
|
||||
explicit VKPipelineCache(Core::System& system, RasterizerVulkan& rasterizer,
|
||||
const VKDevice& device, VKScheduler& scheduler,
|
||||
VKDescriptorPool& descriptor_pool,
|
||||
VKUpdateDescriptorQueue& update_descriptor_queue,
|
||||
VKRenderPassCache& renderpass_cache);
|
||||
~VKPipelineCache();
|
||||
~VKPipelineCache() override;
|
||||
|
||||
std::array<Shader, Maxwell::MaxShaderProgram> GetShaders();
|
||||
std::array<Shader*, Maxwell::MaxShaderProgram> GetShaders();
|
||||
|
||||
VKGraphicsPipeline& GetGraphicsPipeline(const GraphicsPipelineCacheKey& key);
|
||||
|
||||
VKComputePipeline& GetComputePipeline(const ComputePipelineCacheKey& key);
|
||||
|
||||
protected:
|
||||
void Unregister(const Shader& shader) override;
|
||||
|
||||
void FlushObjectInner(const Shader& object) override {}
|
||||
void OnShaderRemoval(Shader* shader) final;
|
||||
|
||||
private:
|
||||
std::pair<SPIRVProgram, std::vector<VkDescriptorSetLayoutBinding>> DecompileShaders(
|
||||
|
@ -175,10 +166,10 @@ private:
|
|||
VKUpdateDescriptorQueue& update_descriptor_queue;
|
||||
VKRenderPassCache& renderpass_cache;
|
||||
|
||||
Shader null_shader{};
|
||||
Shader null_kernel{};
|
||||
std::unique_ptr<Shader> null_shader;
|
||||
std::unique_ptr<Shader> null_kernel;
|
||||
|
||||
std::array<Shader, Maxwell::MaxShaderProgram> last_shaders;
|
||||
std::array<Shader*, Maxwell::MaxShaderProgram> last_shaders{};
|
||||
|
||||
GraphicsPipelineCacheKey last_graphics_key;
|
||||
VKGraphicsPipeline* last_graphics_pipeline = nullptr;
|
||||
|
|
|
@ -38,6 +38,7 @@
|
|||
#include "video_core/renderer_vulkan/vk_texture_cache.h"
|
||||
#include "video_core/renderer_vulkan/vk_update_descriptor.h"
|
||||
#include "video_core/renderer_vulkan/wrapper.h"
|
||||
#include "video_core/shader_cache.h"
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
|
@ -98,7 +99,7 @@ VkRect2D GetScissorState(const Maxwell& regs, std::size_t index) {
|
|||
}
|
||||
|
||||
std::array<GPUVAddr, Maxwell::MaxShaderProgram> GetShaderAddresses(
|
||||
const std::array<Shader, Maxwell::MaxShaderProgram>& shaders) {
|
||||
const std::array<Shader*, Maxwell::MaxShaderProgram>& shaders) {
|
||||
std::array<GPUVAddr, Maxwell::MaxShaderProgram> addresses;
|
||||
for (std::size_t i = 0; i < std::size(addresses); ++i) {
|
||||
addresses[i] = shaders[i] ? shaders[i]->GetGpuAddr() : 0;
|
||||
|
@ -776,12 +777,12 @@ RasterizerVulkan::DrawParameters RasterizerVulkan::SetupGeometry(FixedPipelineSt
|
|||
}
|
||||
|
||||
void RasterizerVulkan::SetupShaderDescriptors(
|
||||
const std::array<Shader, Maxwell::MaxShaderProgram>& shaders) {
|
||||
const std::array<Shader*, Maxwell::MaxShaderProgram>& shaders) {
|
||||
texture_cache.GuardSamplers(true);
|
||||
|
||||
for (std::size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) {
|
||||
// Skip VertexA stage
|
||||
const auto& shader = shaders[stage + 1];
|
||||
Shader* const shader = shaders[stage + 1];
|
||||
if (!shader) {
|
||||
continue;
|
||||
}
|
||||
|
|
|
@ -168,7 +168,7 @@ private:
|
|||
bool is_indexed, bool is_instanced);
|
||||
|
||||
/// Setup descriptors in the graphics pipeline.
|
||||
void SetupShaderDescriptors(const std::array<Shader, Maxwell::MaxShaderProgram>& shaders);
|
||||
void SetupShaderDescriptors(const std::array<Shader*, Maxwell::MaxShaderProgram>& shaders);
|
||||
|
||||
void SetupImageTransitions(Texceptions texceptions,
|
||||
const std::array<View, Maxwell::NumRenderTargets>& color_attachments,
|
||||
|
|
228
src/video_core/shader_cache.h
Normal file
228
src/video_core/shader_cache.h
Normal file
|
@ -0,0 +1,228 @@
|
|||
// Copyright 2020 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <algorithm>
|
||||
#include <memory>
|
||||
#include <mutex>
|
||||
#include <unordered_map>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include "common/assert.h"
|
||||
#include "common/common_types.h"
|
||||
#include "video_core/rasterizer_interface.h"
|
||||
|
||||
namespace VideoCommon {
|
||||
|
||||
template <class T>
|
||||
class ShaderCache {
|
||||
static constexpr u64 PAGE_SHIFT = 14;
|
||||
|
||||
struct Entry {
|
||||
VAddr addr_start;
|
||||
VAddr addr_end;
|
||||
T* data;
|
||||
|
||||
bool is_memory_marked = true;
|
||||
|
||||
constexpr bool Overlaps(VAddr start, VAddr end) const noexcept {
|
||||
return start < addr_end && addr_start < end;
|
||||
}
|
||||
};
|
||||
|
||||
public:
|
||||
virtual ~ShaderCache() = default;
|
||||
|
||||
/// @brief Removes shaders inside a given region
|
||||
/// @note Checks for ranges
|
||||
/// @param addr Start address of the invalidation
|
||||
/// @param size Number of bytes of the invalidation
|
||||
void InvalidateRegion(VAddr addr, std::size_t size) {
|
||||
std::scoped_lock lock{invalidation_mutex};
|
||||
InvalidatePagesInRegion(addr, size);
|
||||
RemovePendingShaders();
|
||||
}
|
||||
|
||||
/// @brief Unmarks a memory region as cached and marks it for removal
|
||||
/// @param addr Start address of the CPU write operation
|
||||
/// @param size Number of bytes of the CPU write operation
|
||||
void OnCPUWrite(VAddr addr, std::size_t size) {
|
||||
std::lock_guard lock{invalidation_mutex};
|
||||
InvalidatePagesInRegion(addr, size);
|
||||
}
|
||||
|
||||
/// @brief Flushes delayed removal operations
|
||||
void SyncGuestHost() {
|
||||
std::scoped_lock lock{invalidation_mutex};
|
||||
RemovePendingShaders();
|
||||
}
|
||||
|
||||
/// @brief Tries to obtain a cached shader starting in a given address
|
||||
/// @note Doesn't check for ranges, the given address has to be the start of the shader
|
||||
/// @param addr Start address of the shader, this doesn't cache for region
|
||||
/// @return Pointer to a valid shader, nullptr when nothing is found
|
||||
T* TryGet(VAddr addr) const {
|
||||
std::scoped_lock lock{lookup_mutex};
|
||||
|
||||
const auto it = lookup_cache.find(addr);
|
||||
if (it == lookup_cache.end()) {
|
||||
return nullptr;
|
||||
}
|
||||
return it->second->data;
|
||||
}
|
||||
|
||||
protected:
|
||||
explicit ShaderCache(VideoCore::RasterizerInterface& rasterizer_) : rasterizer{rasterizer_} {}
|
||||
|
||||
/// @brief Register in the cache a given entry
|
||||
/// @param data Shader to store in the cache
|
||||
/// @param addr Start address of the shader that will be registered
|
||||
/// @param size Size in bytes of the shader
|
||||
void Register(std::unique_ptr<T> data, VAddr addr, std::size_t size) {
|
||||
std::scoped_lock lock{invalidation_mutex, lookup_mutex};
|
||||
|
||||
const VAddr addr_end = addr + size;
|
||||
Entry* const entry = NewEntry(addr, addr_end, data.get());
|
||||
|
||||
const u64 page_end = addr_end >> PAGE_SHIFT;
|
||||
for (u64 page = addr >> PAGE_SHIFT; page <= page_end; ++page) {
|
||||
invalidation_cache[page].push_back(entry);
|
||||
}
|
||||
|
||||
storage.push_back(std::move(data));
|
||||
|
||||
rasterizer.UpdatePagesCachedCount(addr, size, 1);
|
||||
}
|
||||
|
||||
/// @brief Called when a shader is going to be removed
|
||||
/// @param shader Shader that will be removed
|
||||
/// @pre invalidation_cache is locked
|
||||
/// @pre lookup_mutex is locked
|
||||
virtual void OnShaderRemoval([[maybe_unused]] T* shader) {}
|
||||
|
||||
private:
|
||||
/// @brief Invalidate pages in a given region
|
||||
/// @pre invalidation_mutex is locked
|
||||
void InvalidatePagesInRegion(VAddr addr, std::size_t size) {
|
||||
const VAddr addr_end = addr + size;
|
||||
const u64 page_end = addr_end >> PAGE_SHIFT;
|
||||
for (u64 page = addr >> PAGE_SHIFT; page <= page_end; ++page) {
|
||||
const auto it = invalidation_cache.find(page);
|
||||
if (it == invalidation_cache.end()) {
|
||||
continue;
|
||||
}
|
||||
|
||||
std::vector<Entry*>& entries = it->second;
|
||||
InvalidatePageEntries(entries, addr, addr_end);
|
||||
|
||||
// If there's nothing else in this page, remove it to avoid overpopulating the hash map.
|
||||
if (entries.empty()) {
|
||||
invalidation_cache.erase(it);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// @brief Remove shaders marked for deletion
|
||||
/// @pre invalidation_mutex is locked
|
||||
void RemovePendingShaders() {
|
||||
if (marked_for_removal.empty()) {
|
||||
return;
|
||||
}
|
||||
std::scoped_lock lock{lookup_mutex};
|
||||
|
||||
std::vector<T*> removed_shaders;
|
||||
removed_shaders.reserve(marked_for_removal.size());
|
||||
|
||||
for (Entry* const entry : marked_for_removal) {
|
||||
if (lookup_cache.erase(entry->addr_start) > 0) {
|
||||
removed_shaders.push_back(entry->data);
|
||||
}
|
||||
}
|
||||
marked_for_removal.clear();
|
||||
|
||||
if (!removed_shaders.empty()) {
|
||||
RemoveShadersFromStorage(std::move(removed_shaders));
|
||||
}
|
||||
}
|
||||
|
||||
/// @brief Invalidates entries in a given range for the passed page
|
||||
/// @param entries Vector of entries in the page, it will be modified on overlaps
|
||||
/// @param addr Start address of the invalidation
|
||||
/// @param addr_end Non-inclusive end address of the invalidation
|
||||
/// @pre invalidation_mutex is locked
|
||||
void InvalidatePageEntries(std::vector<Entry*>& entries, VAddr addr, VAddr addr_end) {
|
||||
auto it = entries.begin();
|
||||
while (it != entries.end()) {
|
||||
Entry* const entry = *it;
|
||||
if (!entry->Overlaps(addr, addr_end)) {
|
||||
++it;
|
||||
continue;
|
||||
}
|
||||
UnmarkMemory(entry);
|
||||
marked_for_removal.push_back(entry);
|
||||
|
||||
it = entries.erase(it);
|
||||
}
|
||||
}
|
||||
|
||||
/// @brief Unmarks an entry from the rasterizer cache
|
||||
/// @param entry Entry to unmark from memory
|
||||
void UnmarkMemory(Entry* entry) {
|
||||
if (!entry->is_memory_marked) {
|
||||
return;
|
||||
}
|
||||
entry->is_memory_marked = false;
|
||||
|
||||
const VAddr addr = entry->addr_start;
|
||||
const std::size_t size = entry->addr_end - addr;
|
||||
rasterizer.UpdatePagesCachedCount(addr, size, -1);
|
||||
}
|
||||
|
||||
/// @brief Removes a vector of shaders from a list
|
||||
/// @param removed_shaders Shaders to be removed from the storage, it can contain duplicates
|
||||
/// @pre invalidation_mutex is locked
|
||||
/// @pre lookup_mutex is locked
|
||||
void RemoveShadersFromStorage(std::vector<T*> removed_shaders) {
|
||||
// Remove duplicates
|
||||
std::sort(removed_shaders.begin(), removed_shaders.end());
|
||||
removed_shaders.erase(std::unique(removed_shaders.begin(), removed_shaders.end()),
|
||||
removed_shaders.end());
|
||||
|
||||
// Now that there are no duplicates, we can notify removals
|
||||
for (T* const shader : removed_shaders) {
|
||||
OnShaderRemoval(shader);
|
||||
}
|
||||
|
||||
// Remove them from the cache
|
||||
const auto is_removed = [&removed_shaders](std::unique_ptr<T>& shader) {
|
||||
return std::find(removed_shaders.begin(), removed_shaders.end(), shader.get()) !=
|
||||
removed_shaders.end();
|
||||
};
|
||||
storage.erase(std::remove_if(storage.begin(), storage.end(), is_removed), storage.end());
|
||||
}
|
||||
|
||||
/// @brief Creates a new entry in the lookup cache and returns its pointer
|
||||
/// @pre lookup_mutex is locked
|
||||
Entry* NewEntry(VAddr addr, VAddr addr_end, T* data) {
|
||||
auto entry = std::make_unique<Entry>(Entry{addr, addr_end, data});
|
||||
Entry* const entry_pointer = entry.get();
|
||||
|
||||
lookup_cache.emplace(addr, std::move(entry));
|
||||
return entry_pointer;
|
||||
}
|
||||
|
||||
VideoCore::RasterizerInterface& rasterizer;
|
||||
|
||||
mutable std::mutex lookup_mutex;
|
||||
std::mutex invalidation_mutex;
|
||||
|
||||
std::unordered_map<u64, std::unique_ptr<Entry>> lookup_cache;
|
||||
std::unordered_map<u64, std::vector<Entry*>> invalidation_cache;
|
||||
std::vector<std::unique_ptr<T>> storage;
|
||||
std::vector<Entry*> marked_for_removal;
|
||||
};
|
||||
|
||||
} // namespace VideoCommon
|
Loading…
Reference in a new issue