pineapple-src/src/video_core/texture_cache/texture_cache_base.h

509 lines
20 KiB
C++
Executable File

// SPDX-FileCopyrightText: 2023 yuzu Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
#pragma once
#include <atomic>
#include <deque>
#include <limits>
#include <mutex>
#include <span>
#include <type_traits>
#include <unordered_map>
#include <unordered_set>
#include <vector>
#include <boost/container/small_vector.hpp>
#include <queue>
#include "common/common_types.h"
#include "common/hash.h"
#include "common/literals.h"
#include "common/lru_cache.h"
#include "common/polyfill_ranges.h"
#include "common/scratch_buffer.h"
#include "common/thread_worker.h"
#include "video_core/compatible_formats.h"
#include "video_core/control/channel_state_cache.h"
#include "video_core/delayed_destruction_ring.h"
#include "video_core/engines/fermi_2d.h"
#include "video_core/surface.h"
#include "video_core/texture_cache/descriptor_table.h"
#include "video_core/texture_cache/image_base.h"
#include "video_core/texture_cache/image_info.h"
#include "video_core/texture_cache/image_view_base.h"
#include "video_core/texture_cache/render_targets.h"
#include "video_core/texture_cache/slot_vector.h"
#include "video_core/texture_cache/types.h"
#include "video_core/textures/texture.h"
namespace Tegra::Control {
struct ChannelState;
}
namespace VideoCommon {
using Tegra::Texture::TICEntry;
using Tegra::Texture::TSCEntry;
using VideoCore::Surface::PixelFormat;
using namespace Common::Literals;
struct ImageViewInOut {
u32 index{};
bool blacklist{};
ImageViewId id{};
};
struct AsyncDecodeContext {
ImageId image_id;
Common::ScratchBuffer<u8> decoded_data;
boost::container::small_vector<BufferImageCopy, 16> copies;
std::mutex mutex;
std::atomic_bool complete;
};
using TextureCacheGPUMap = std::unordered_map<u64, std::vector<ImageId>, Common::IdentityHash<u64>>;
class TextureCacheChannelInfo : public ChannelInfo {
public:
TextureCacheChannelInfo() = delete;
TextureCacheChannelInfo(Tegra::Control::ChannelState& state) noexcept;
TextureCacheChannelInfo(const TextureCacheChannelInfo& state) = delete;
TextureCacheChannelInfo& operator=(const TextureCacheChannelInfo&) = delete;
DescriptorTable<TICEntry> graphics_image_table{gpu_memory};
DescriptorTable<TSCEntry> graphics_sampler_table{gpu_memory};
std::vector<SamplerId> graphics_sampler_ids;
std::vector<ImageViewId> graphics_image_view_ids;
DescriptorTable<TICEntry> compute_image_table{gpu_memory};
DescriptorTable<TSCEntry> compute_sampler_table{gpu_memory};
std::vector<SamplerId> compute_sampler_ids;
std::vector<ImageViewId> compute_image_view_ids;
std::unordered_map<TICEntry, ImageViewId> image_views;
std::unordered_map<TSCEntry, SamplerId> samplers;
TextureCacheGPUMap* gpu_page_table;
};
template <class P>
class TextureCache : public VideoCommon::ChannelSetupCaches<TextureCacheChannelInfo> {
/// Address shift for caching images into a hash table
static constexpr u64 YUZU_PAGEBITS = 20;
/// Enables debugging features to the texture cache
static constexpr bool ENABLE_VALIDATION = P::ENABLE_VALIDATION;
/// Implement blits as copies between framebuffers
static constexpr bool FRAMEBUFFER_BLITS = P::FRAMEBUFFER_BLITS;
/// True when some copies have to be emulated
static constexpr bool HAS_EMULATED_COPIES = P::HAS_EMULATED_COPIES;
/// True when the API can provide info about the memory of the device.
static constexpr bool HAS_DEVICE_MEMORY_INFO = P::HAS_DEVICE_MEMORY_INFO;
/// True when the API can do asynchronous texture downloads.
static constexpr bool IMPLEMENTS_ASYNC_DOWNLOADS = P::IMPLEMENTS_ASYNC_DOWNLOADS;
static constexpr size_t UNSET_CHANNEL{std::numeric_limits<size_t>::max()};
static constexpr s64 TARGET_THRESHOLD = 4_GiB;
static constexpr s64 DEFAULT_EXPECTED_MEMORY = 1_GiB + 125_MiB;
static constexpr s64 DEFAULT_CRITICAL_MEMORY = 1_GiB + 625_MiB;
static constexpr size_t GC_EMERGENCY_COUNTS = 2;
using Runtime = typename P::Runtime;
using Image = typename P::Image;
using ImageAlloc = typename P::ImageAlloc;
using ImageView = typename P::ImageView;
using Sampler = typename P::Sampler;
using Framebuffer = typename P::Framebuffer;
using AsyncBuffer = typename P::AsyncBuffer;
using BufferType = typename P::BufferType;
struct BlitImages {
ImageId dst_id;
ImageId src_id;
PixelFormat dst_format;
PixelFormat src_format;
};
public:
explicit TextureCache(Runtime&, VideoCore::RasterizerInterface&);
/// Notify the cache that a new frame has been queued
void TickFrame();
/// Return a constant reference to the given image view id
[[nodiscard]] const ImageView& GetImageView(ImageViewId id) const noexcept;
/// Return a reference to the given image view id
[[nodiscard]] ImageView& GetImageView(ImageViewId id) noexcept;
/// Get the imageview from the graphics descriptor table in the specified index
[[nodiscard]] ImageView& GetImageView(u32 index) noexcept;
/// Mark an image as modified from the GPU
void MarkModification(ImageId id) noexcept;
/// Fill image_view_ids with the graphics images in indices
template <bool has_blacklists>
void FillGraphicsImageViews(std::span<ImageViewInOut> views);
/// Fill image_view_ids with the compute images in indices
void FillComputeImageViews(std::span<ImageViewInOut> views);
/// Handle feedback loops during draws.
void CheckFeedbackLoop(std::span<const ImageViewInOut> views);
/// Get the sampler from the graphics descriptor table in the specified index
Sampler* GetGraphicsSampler(u32 index);
/// Get the sampler from the compute descriptor table in the specified index
Sampler* GetComputeSampler(u32 index);
/// Get the sampler id from the graphics descriptor table in the specified index
SamplerId GetGraphicsSamplerId(u32 index);
/// Get the sampler id from the compute descriptor table in the specified index
SamplerId GetComputeSamplerId(u32 index);
/// Return a constant reference to the given sampler id
[[nodiscard]] const Sampler& GetSampler(SamplerId id) const noexcept;
/// Return a reference to the given sampler id
[[nodiscard]] Sampler& GetSampler(SamplerId id) noexcept;
/// Refresh the state for graphics image view and sampler descriptors
void SynchronizeGraphicsDescriptors();
/// Refresh the state for compute image view and sampler descriptors
void SynchronizeComputeDescriptors();
/// Updates the Render Targets if they can be rescaled
/// @retval True if the Render Targets have been rescaled.
bool RescaleRenderTargets();
/// Update bound render targets and upload memory if necessary
/// @param is_clear True when the render targets are being used for clears
void UpdateRenderTargets(bool is_clear);
/// Find a framebuffer with the currently bound render targets
/// UpdateRenderTargets should be called before this
Framebuffer* GetFramebuffer();
/// Mark images in a range as modified from the CPU
void WriteMemory(VAddr cpu_addr, size_t size);
/// Download contents of host images to guest memory in a region
void DownloadMemory(VAddr cpu_addr, size_t size);
std::optional<VideoCore::RasterizerDownloadArea> GetFlushArea(VAddr cpu_addr, u64 size);
/// Remove images in a region
void UnmapMemory(VAddr cpu_addr, size_t size);
/// Remove images in a region
void UnmapGPUMemory(size_t as_id, GPUVAddr gpu_addr, size_t size);
/// Blit an image with the given parameters
bool BlitImage(const Tegra::Engines::Fermi2D::Surface& dst,
const Tegra::Engines::Fermi2D::Surface& src,
const Tegra::Engines::Fermi2D::Config& copy);
/// Try to find a cached image view in the given CPU address
[[nodiscard]] ImageView* TryFindFramebufferImageView(const Tegra::FramebufferConfig& config,
VAddr cpu_addr);
/// Return true when there are uncommitted images to be downloaded
[[nodiscard]] bool HasUncommittedFlushes() const noexcept;
/// Return true when the caller should wait for async downloads
[[nodiscard]] bool ShouldWaitAsyncFlushes() const noexcept;
/// Commit asynchronous downloads
void CommitAsyncFlushes();
/// Pop asynchronous downloads
void PopAsyncFlushes();
[[nodiscard]] ImageId DmaImageId(const Tegra::DMA::ImageOperand& operand, bool is_upload);
[[nodiscard]] std::pair<Image*, BufferImageCopy> DmaBufferImageCopy(
const Tegra::DMA::ImageCopy& copy_info, const Tegra::DMA::BufferOperand& buffer_operand,
const Tegra::DMA::ImageOperand& image_operand, ImageId image_id, bool modifies_image);
void DownloadImageIntoBuffer(Image* image, BufferType buffer, size_t buffer_offset,
std::span<const VideoCommon::BufferImageCopy> copies,
GPUVAddr address = 0, size_t size = 0);
/// Return true when a CPU region is modified from the GPU
[[nodiscard]] bool IsRegionGpuModified(VAddr addr, size_t size);
[[nodiscard]] bool IsRescaling() const noexcept;
[[nodiscard]] bool IsRescaling(const ImageViewBase& image_view) const noexcept;
/// Create channel state.
void CreateChannel(Tegra::Control::ChannelState& channel) final override;
/// Prepare an image to be used
void PrepareImage(ImageId image_id, bool is_modification, bool invalidate);
std::recursive_mutex mutex;
private:
/// Iterate over all page indices in a range
template <typename Func>
static void ForEachCPUPage(VAddr addr, size_t size, Func&& func) {
static constexpr bool RETURNS_BOOL = std::is_same_v<std::invoke_result<Func, u64>, bool>;
const u64 page_end = (addr + size - 1) >> YUZU_PAGEBITS;
for (u64 page = addr >> YUZU_PAGEBITS; page <= page_end; ++page) {
if constexpr (RETURNS_BOOL) {
if (func(page)) {
break;
}
} else {
func(page);
}
}
}
template <typename Func>
static void ForEachGPUPage(GPUVAddr addr, size_t size, Func&& func) {
static constexpr bool RETURNS_BOOL = std::is_same_v<std::invoke_result<Func, u64>, bool>;
const u64 page_end = (addr + size - 1) >> YUZU_PAGEBITS;
for (u64 page = addr >> YUZU_PAGEBITS; page <= page_end; ++page) {
if constexpr (RETURNS_BOOL) {
if (func(page)) {
break;
}
} else {
func(page);
}
}
}
void OnGPUASRegister(size_t map_id) final override;
/// Runs the Garbage Collector.
void RunGarbageCollector();
/// Fills image_view_ids in the image views in indices
template <bool has_blacklists>
void FillImageViews(DescriptorTable<TICEntry>& table,
std::span<ImageViewId> cached_image_view_ids,
std::span<ImageViewInOut> views);
/// Find or create an image view in the guest descriptor table
ImageViewId VisitImageView(DescriptorTable<TICEntry>& table,
std::span<ImageViewId> cached_image_view_ids, u32 index);
/// Find or create a framebuffer with the given render target parameters
FramebufferId GetFramebufferId(const RenderTargets& key);
/// Refresh the contents (pixel data) of an image
void RefreshContents(Image& image, ImageId image_id);
/// Upload data from guest to an image
template <typename StagingBuffer>
void UploadImageContents(Image& image, StagingBuffer& staging_buffer);
/// Find or create an image view from a guest descriptor
[[nodiscard]] ImageViewId FindImageView(const TICEntry& config);
/// Create a new image view from a guest descriptor
[[nodiscard]] ImageViewId CreateImageView(const TICEntry& config);
/// Find or create an image from the given parameters
[[nodiscard]] ImageId FindOrInsertImage(const ImageInfo& info, GPUVAddr gpu_addr,
RelaxedOptions options = RelaxedOptions{});
/// Find an image from the given parameters
[[nodiscard]] ImageId FindImage(const ImageInfo& info, GPUVAddr gpu_addr,
RelaxedOptions options);
/// Create an image from the given parameters
[[nodiscard]] ImageId InsertImage(const ImageInfo& info, GPUVAddr gpu_addr,
RelaxedOptions options);
/// Create a new image and join perfectly matching existing images
/// Remove joined images from the cache
[[nodiscard]] ImageId JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VAddr cpu_addr);
[[nodiscard]] ImageId FindDMAImage(const ImageInfo& info, GPUVAddr gpu_addr);
/// Return a blit image pair from the given guest blit parameters
[[nodiscard]] std::optional<BlitImages> GetBlitImages(
const Tegra::Engines::Fermi2D::Surface& dst, const Tegra::Engines::Fermi2D::Surface& src,
const Tegra::Engines::Fermi2D::Config& copy);
/// Find or create a sampler from a guest descriptor sampler
[[nodiscard]] SamplerId FindSampler(const TSCEntry& config);
/// Find or create an image view for the given color buffer index
[[nodiscard]] ImageViewId FindColorBuffer(size_t index);
/// Find or create an image view for the depth buffer
[[nodiscard]] ImageViewId FindDepthBuffer();
/// Find or create a view for a render target with the given image parameters
[[nodiscard]] ImageViewId FindRenderTargetView(const ImageInfo& info, GPUVAddr gpu_addr);
/// Iterates over all the images in a region calling func
template <typename Func>
void ForEachImageInRegion(VAddr cpu_addr, size_t size, Func&& func);
template <typename Func>
void ForEachImageInRegionGPU(size_t as_id, GPUVAddr gpu_addr, size_t size, Func&& func);
template <typename Func>
void ForEachSparseImageInRegion(GPUVAddr gpu_addr, size_t size, Func&& func);
/// Iterates over all the images in a region calling func
template <typename Func>
void ForEachSparseSegment(ImageBase& image, Func&& func);
/// Find or create an image view in the given image with the passed parameters
[[nodiscard]] ImageViewId FindOrEmplaceImageView(ImageId image_id, const ImageViewInfo& info);
/// Register image in the page table
void RegisterImage(ImageId image);
/// Unregister image from the page table
void UnregisterImage(ImageId image);
/// Track CPU reads and writes for image
void TrackImage(ImageBase& image, ImageId image_id);
/// Stop tracking CPU reads and writes for image
void UntrackImage(ImageBase& image, ImageId image_id);
/// Delete image from the cache
void DeleteImage(ImageId image, bool immediate_delete = false);
/// Remove image views references from the cache
void RemoveImageViewReferences(std::span<const ImageViewId> removed_views);
/// Remove framebuffers using the given image views from the cache
void RemoveFramebuffers(std::span<const ImageViewId> removed_views);
/// Mark an image as modified from the GPU
void MarkModification(ImageBase& image) noexcept;
/// Synchronize image aliases, copying data if needed
void SynchronizeAliases(ImageId image_id);
/// Prepare an image view to be used
void PrepareImageView(ImageViewId image_view_id, bool is_modification, bool invalidate);
/// Execute copies from one image to the other, even if they are incompatible
void CopyImage(ImageId dst_id, ImageId src_id, std::vector<ImageCopy> copies);
/// Bind an image view as render target, downloading resources preemtively if needed
void BindRenderTarget(ImageViewId* old_id, ImageViewId new_id);
/// Create a render target from a given image and image view parameters
[[nodiscard]] std::pair<FramebufferId, ImageViewId> RenderTargetFromImage(
ImageId, const ImageViewInfo& view_info);
/// Returns true if the current clear parameters clear the whole image of a given image view
[[nodiscard]] bool IsFullClear(ImageViewId id);
[[nodiscard]] std::pair<u32, u32> PrepareDmaImage(ImageId dst_id, GPUVAddr base_addr,
bool mark_as_modified);
bool ImageCanRescale(ImageBase& image);
void InvalidateScale(Image& image);
bool ScaleUp(Image& image);
bool ScaleDown(Image& image);
u64 GetScaledImageSizeBytes(const ImageBase& image);
void QueueAsyncDecode(Image& image, ImageId image_id);
void TickAsyncDecode();
Runtime& runtime;
VideoCore::RasterizerInterface& rasterizer;
std::deque<TextureCacheGPUMap> gpu_page_table_storage;
RenderTargets render_targets;
std::unordered_map<RenderTargets, FramebufferId> framebuffers;
std::unordered_map<u64, std::vector<ImageMapId>, Common::IdentityHash<u64>> page_table;
std::unordered_map<u64, std::vector<ImageId>, Common::IdentityHash<u64>> sparse_page_table;
std::unordered_map<ImageId, boost::container::small_vector<ImageViewId, 16>> sparse_views;
VAddr virtual_invalid_space{};
bool has_deleted_images = false;
bool is_rescaling = false;
u64 total_used_memory = 0;
u64 minimum_memory;
u64 expected_memory;
u64 critical_memory;
struct BufferDownload {
GPUVAddr address;
size_t size;
};
struct PendingDownload {
bool is_swizzle;
size_t async_buffer_id;
SlotId object_id;
};
SlotVector<Image> slot_images;
SlotVector<ImageMapView> slot_map_views;
SlotVector<ImageView> slot_image_views;
SlotVector<ImageAlloc> slot_image_allocs;
SlotVector<Sampler> slot_samplers;
SlotVector<Framebuffer> slot_framebuffers;
SlotVector<BufferDownload> slot_buffer_downloads;
// TODO: This data structure is not optimal and it should be reworked
std::vector<PendingDownload> uncommitted_downloads;
std::deque<std::vector<PendingDownload>> committed_downloads;
std::vector<AsyncBuffer> uncommitted_async_buffers;
std::deque<std::vector<AsyncBuffer>> async_buffers;
std::deque<AsyncBuffer> async_buffers_death_ring;
struct LRUItemParams {
using ObjectType = ImageId;
using TickType = u64;
};
Common::LeastRecentlyUsedCache<LRUItemParams> lru_cache;
static constexpr size_t TICKS_TO_DESTROY = 8;
DelayedDestructionRing<Image, TICKS_TO_DESTROY> sentenced_images;
DelayedDestructionRing<ImageView, TICKS_TO_DESTROY> sentenced_image_view;
DelayedDestructionRing<Framebuffer, TICKS_TO_DESTROY> sentenced_framebuffers;
std::unordered_map<GPUVAddr, ImageAllocId> image_allocs_table;
Common::ScratchBuffer<u8> swizzle_data_buffer;
Common::ScratchBuffer<u8> unswizzle_data_buffer;
u64 modification_tick = 0;
u64 frame_tick = 0;
Common::ThreadWorker texture_decode_worker{1, "TextureDecoder"};
std::vector<std::unique_ptr<AsyncDecodeContext>> async_decodes;
// Join caching
boost::container::small_vector<ImageId, 4> join_overlap_ids;
std::unordered_set<ImageId> join_overlaps_found;
boost::container::small_vector<ImageId, 4> join_left_aliased_ids;
boost::container::small_vector<ImageId, 4> join_right_aliased_ids;
std::unordered_set<ImageId> join_ignore_textures;
boost::container::small_vector<ImageId, 4> join_bad_overlap_ids;
struct JoinCopy {
bool is_alias;
ImageId id;
};
boost::container::small_vector<JoinCopy, 4> join_copies_to_do;
std::unordered_map<ImageId, size_t> join_alias_indices;
};
} // namespace VideoCommon