texture_cache: process aliases and overlaps in the correct order
This commit is contained in:
parent
e8a025b4f8
commit
72c1ee1bf9
4 changed files with 105 additions and 61 deletions
|
@ -155,7 +155,7 @@ void ImageBase::CheckAliasState() {
|
||||||
flags &= ~ImageFlagBits::Alias;
|
flags &= ~ImageFlagBits::Alias;
|
||||||
}
|
}
|
||||||
|
|
||||||
void AddImageAlias(ImageBase& lhs, ImageBase& rhs, ImageId lhs_id, ImageId rhs_id) {
|
bool AddImageAlias(ImageBase& lhs, ImageBase& rhs, ImageId lhs_id, ImageId rhs_id) {
|
||||||
static constexpr auto OPTIONS = RelaxedOptions::Size | RelaxedOptions::Format;
|
static constexpr auto OPTIONS = RelaxedOptions::Size | RelaxedOptions::Format;
|
||||||
ASSERT(lhs.info.type == rhs.info.type);
|
ASSERT(lhs.info.type == rhs.info.type);
|
||||||
std::optional<SubresourceBase> base;
|
std::optional<SubresourceBase> base;
|
||||||
|
@ -169,7 +169,7 @@ void AddImageAlias(ImageBase& lhs, ImageBase& rhs, ImageId lhs_id, ImageId rhs_i
|
||||||
}
|
}
|
||||||
if (!base) {
|
if (!base) {
|
||||||
LOG_ERROR(HW_GPU, "Image alias should have been flipped");
|
LOG_ERROR(HW_GPU, "Image alias should have been flipped");
|
||||||
return;
|
return false;
|
||||||
}
|
}
|
||||||
const PixelFormat lhs_format = lhs.info.format;
|
const PixelFormat lhs_format = lhs.info.format;
|
||||||
const PixelFormat rhs_format = rhs.info.format;
|
const PixelFormat rhs_format = rhs.info.format;
|
||||||
|
@ -248,12 +248,13 @@ void AddImageAlias(ImageBase& lhs, ImageBase& rhs, ImageId lhs_id, ImageId rhs_i
|
||||||
}
|
}
|
||||||
ASSERT(lhs_alias.copies.empty() == rhs_alias.copies.empty());
|
ASSERT(lhs_alias.copies.empty() == rhs_alias.copies.empty());
|
||||||
if (lhs_alias.copies.empty()) {
|
if (lhs_alias.copies.empty()) {
|
||||||
return;
|
return false;
|
||||||
}
|
}
|
||||||
lhs.aliased_images.push_back(std::move(lhs_alias));
|
lhs.aliased_images.push_back(std::move(lhs_alias));
|
||||||
rhs.aliased_images.push_back(std::move(rhs_alias));
|
rhs.aliased_images.push_back(std::move(rhs_alias));
|
||||||
lhs.flags &= ~ImageFlagBits::IsRescalable;
|
lhs.flags &= ~ImageFlagBits::IsRescalable;
|
||||||
rhs.flags &= ~ImageFlagBits::IsRescalable;
|
rhs.flags &= ~ImageFlagBits::IsRescalable;
|
||||||
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace VideoCommon
|
} // namespace VideoCommon
|
||||||
|
|
|
@ -142,6 +142,6 @@ struct ImageAllocBase {
|
||||||
std::vector<ImageId> images;
|
std::vector<ImageId> images;
|
||||||
};
|
};
|
||||||
|
|
||||||
void AddImageAlias(ImageBase& lhs, ImageBase& rhs, ImageId lhs_id, ImageId rhs_id);
|
bool AddImageAlias(ImageBase& lhs, ImageBase& rhs, ImageId lhs_id, ImageId rhs_id);
|
||||||
|
|
||||||
} // namespace VideoCommon
|
} // namespace VideoCommon
|
||||||
|
|
|
@ -1274,17 +1274,18 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA
|
||||||
const size_t size_bytes = CalculateGuestSizeInBytes(new_info);
|
const size_t size_bytes = CalculateGuestSizeInBytes(new_info);
|
||||||
const bool broken_views = runtime.HasBrokenTextureViewFormats();
|
const bool broken_views = runtime.HasBrokenTextureViewFormats();
|
||||||
const bool native_bgr = runtime.HasNativeBgr();
|
const bool native_bgr = runtime.HasNativeBgr();
|
||||||
boost::container::small_vector<ImageId, 4> overlap_ids;
|
join_overlap_ids.clear();
|
||||||
std::unordered_set<ImageId> overlaps_found;
|
join_overlaps_found.clear();
|
||||||
boost::container::small_vector<ImageId, 4> left_aliased_ids;
|
join_left_aliased_ids.clear();
|
||||||
boost::container::small_vector<ImageId, 4> right_aliased_ids;
|
join_right_aliased_ids.clear();
|
||||||
std::unordered_set<ImageId> ignore_textures;
|
join_ignore_textures.clear();
|
||||||
boost::container::small_vector<ImageId, 4> bad_overlap_ids;
|
join_bad_overlap_ids.clear();
|
||||||
boost::container::small_vector<ImageId, 4> all_siblings;
|
join_copies_to_do.clear();
|
||||||
|
join_alias_indices.clear();
|
||||||
const bool this_is_linear = info.type == ImageType::Linear;
|
const bool this_is_linear = info.type == ImageType::Linear;
|
||||||
const auto region_check = [&](ImageId overlap_id, ImageBase& overlap) {
|
const auto region_check = [&](ImageId overlap_id, ImageBase& overlap) {
|
||||||
if (True(overlap.flags & ImageFlagBits::Remapped)) {
|
if (True(overlap.flags & ImageFlagBits::Remapped)) {
|
||||||
ignore_textures.insert(overlap_id);
|
join_ignore_textures.insert(overlap_id);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
const bool overlap_is_linear = overlap.info.type == ImageType::Linear;
|
const bool overlap_is_linear = overlap.info.type == ImageType::Linear;
|
||||||
|
@ -1294,11 +1295,11 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA
|
||||||
if (this_is_linear && overlap_is_linear) {
|
if (this_is_linear && overlap_is_linear) {
|
||||||
if (info.pitch == overlap.info.pitch && gpu_addr == overlap.gpu_addr) {
|
if (info.pitch == overlap.info.pitch && gpu_addr == overlap.gpu_addr) {
|
||||||
// Alias linear images with the same pitch
|
// Alias linear images with the same pitch
|
||||||
left_aliased_ids.push_back(overlap_id);
|
join_left_aliased_ids.push_back(overlap_id);
|
||||||
}
|
}
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
overlaps_found.insert(overlap_id);
|
join_overlaps_found.insert(overlap_id);
|
||||||
static constexpr bool strict_size = true;
|
static constexpr bool strict_size = true;
|
||||||
const std::optional<OverlapResult> solution = ResolveOverlap(
|
const std::optional<OverlapResult> solution = ResolveOverlap(
|
||||||
new_info, gpu_addr, cpu_addr, overlap, strict_size, broken_views, native_bgr);
|
new_info, gpu_addr, cpu_addr, overlap, strict_size, broken_views, native_bgr);
|
||||||
|
@ -1306,33 +1307,33 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA
|
||||||
gpu_addr = solution->gpu_addr;
|
gpu_addr = solution->gpu_addr;
|
||||||
cpu_addr = solution->cpu_addr;
|
cpu_addr = solution->cpu_addr;
|
||||||
new_info.resources = solution->resources;
|
new_info.resources = solution->resources;
|
||||||
overlap_ids.push_back(overlap_id);
|
join_overlap_ids.push_back(overlap_id);
|
||||||
all_siblings.push_back(overlap_id);
|
join_copies_to_do.emplace_back(JoinCopy{false, overlap_id});
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
static constexpr auto options = RelaxedOptions::Size | RelaxedOptions::Format;
|
static constexpr auto options = RelaxedOptions::Size | RelaxedOptions::Format;
|
||||||
const ImageBase new_image_base(new_info, gpu_addr, cpu_addr);
|
const ImageBase new_image_base(new_info, gpu_addr, cpu_addr);
|
||||||
if (IsSubresource(new_info, overlap, gpu_addr, options, broken_views, native_bgr)) {
|
if (IsSubresource(new_info, overlap, gpu_addr, options, broken_views, native_bgr)) {
|
||||||
left_aliased_ids.push_back(overlap_id);
|
join_left_aliased_ids.push_back(overlap_id);
|
||||||
overlap.flags |= ImageFlagBits::Alias;
|
overlap.flags |= ImageFlagBits::Alias;
|
||||||
all_siblings.push_back(overlap_id);
|
join_copies_to_do.emplace_back(JoinCopy{true, overlap_id});
|
||||||
} else if (IsSubresource(overlap.info, new_image_base, overlap.gpu_addr, options,
|
} else if (IsSubresource(overlap.info, new_image_base, overlap.gpu_addr, options,
|
||||||
broken_views, native_bgr)) {
|
broken_views, native_bgr)) {
|
||||||
right_aliased_ids.push_back(overlap_id);
|
join_right_aliased_ids.push_back(overlap_id);
|
||||||
overlap.flags |= ImageFlagBits::Alias;
|
overlap.flags |= ImageFlagBits::Alias;
|
||||||
all_siblings.push_back(overlap_id);
|
join_copies_to_do.emplace_back(JoinCopy{true, overlap_id});
|
||||||
} else {
|
} else {
|
||||||
bad_overlap_ids.push_back(overlap_id);
|
join_bad_overlap_ids.push_back(overlap_id);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
ForEachImageInRegion(cpu_addr, size_bytes, region_check);
|
ForEachImageInRegion(cpu_addr, size_bytes, region_check);
|
||||||
const auto region_check_gpu = [&](ImageId overlap_id, ImageBase& overlap) {
|
const auto region_check_gpu = [&](ImageId overlap_id, ImageBase& overlap) {
|
||||||
if (!overlaps_found.contains(overlap_id)) {
|
if (!join_overlaps_found.contains(overlap_id)) {
|
||||||
if (True(overlap.flags & ImageFlagBits::Remapped)) {
|
if (True(overlap.flags & ImageFlagBits::Remapped)) {
|
||||||
ignore_textures.insert(overlap_id);
|
join_ignore_textures.insert(overlap_id);
|
||||||
}
|
}
|
||||||
if (overlap.gpu_addr == gpu_addr && overlap.guest_size_bytes == size_bytes) {
|
if (overlap.gpu_addr == gpu_addr && overlap.guest_size_bytes == size_bytes) {
|
||||||
ignore_textures.insert(overlap_id);
|
join_ignore_textures.insert(overlap_id);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
@ -1340,11 +1341,11 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA
|
||||||
|
|
||||||
bool can_rescale = info.rescaleable;
|
bool can_rescale = info.rescaleable;
|
||||||
bool any_rescaled = false;
|
bool any_rescaled = false;
|
||||||
for (const ImageId sibling_id : all_siblings) {
|
for (const auto& copy : join_copies_to_do) {
|
||||||
if (!can_rescale) {
|
if (!can_rescale) {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
Image& sibling = slot_images[sibling_id];
|
Image& sibling = slot_images[copy.id];
|
||||||
can_rescale &= ImageCanRescale(sibling);
|
can_rescale &= ImageCanRescale(sibling);
|
||||||
any_rescaled |= True(sibling.flags & ImageFlagBits::Rescaled);
|
any_rescaled |= True(sibling.flags & ImageFlagBits::Rescaled);
|
||||||
}
|
}
|
||||||
|
@ -1352,13 +1353,13 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA
|
||||||
can_rescale &= any_rescaled;
|
can_rescale &= any_rescaled;
|
||||||
|
|
||||||
if (can_rescale) {
|
if (can_rescale) {
|
||||||
for (const ImageId sibling_id : all_siblings) {
|
for (const auto& copy : join_copies_to_do) {
|
||||||
Image& sibling = slot_images[sibling_id];
|
Image& sibling = slot_images[copy.id];
|
||||||
ScaleUp(sibling);
|
ScaleUp(sibling);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
for (const ImageId sibling_id : all_siblings) {
|
for (const auto& copy : join_copies_to_do) {
|
||||||
Image& sibling = slot_images[sibling_id];
|
Image& sibling = slot_images[copy.id];
|
||||||
ScaleDown(sibling);
|
ScaleDown(sibling);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1370,7 +1371,7 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA
|
||||||
new_image.flags |= ImageFlagBits::Sparse;
|
new_image.flags |= ImageFlagBits::Sparse;
|
||||||
}
|
}
|
||||||
|
|
||||||
for (const ImageId overlap_id : ignore_textures) {
|
for (const ImageId overlap_id : join_ignore_textures) {
|
||||||
Image& overlap = slot_images[overlap_id];
|
Image& overlap = slot_images[overlap_id];
|
||||||
if (True(overlap.flags & ImageFlagBits::GpuModified)) {
|
if (True(overlap.flags & ImageFlagBits::GpuModified)) {
|
||||||
UNIMPLEMENTED();
|
UNIMPLEMENTED();
|
||||||
|
@ -1391,14 +1392,60 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA
|
||||||
ScaleDown(new_image);
|
ScaleDown(new_image);
|
||||||
}
|
}
|
||||||
|
|
||||||
std::ranges::sort(overlap_ids, [this](const ImageId lhs, const ImageId rhs) {
|
std::ranges::sort(join_copies_to_do, [this](const JoinCopy& lhs, const JoinCopy& rhs) {
|
||||||
const ImageBase& lhs_image = slot_images[lhs];
|
const ImageBase& lhs_image = slot_images[lhs.id];
|
||||||
const ImageBase& rhs_image = slot_images[rhs];
|
const ImageBase& rhs_image = slot_images[rhs.id];
|
||||||
return lhs_image.modification_tick < rhs_image.modification_tick;
|
return lhs_image.modification_tick < rhs_image.modification_tick;
|
||||||
});
|
});
|
||||||
|
|
||||||
for (const ImageId overlap_id : overlap_ids) {
|
ImageBase& new_image_base = new_image;
|
||||||
Image& overlap = slot_images[overlap_id];
|
for (const ImageId aliased_id : join_right_aliased_ids) {
|
||||||
|
ImageBase& aliased = slot_images[aliased_id];
|
||||||
|
size_t alias_index = new_image_base.aliased_images.size();
|
||||||
|
if (!AddImageAlias(new_image_base, aliased, new_image_id, aliased_id)) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
join_alias_indices.emplace(aliased_id, alias_index);
|
||||||
|
new_image.flags |= ImageFlagBits::Alias;
|
||||||
|
}
|
||||||
|
for (const ImageId aliased_id : join_left_aliased_ids) {
|
||||||
|
ImageBase& aliased = slot_images[aliased_id];
|
||||||
|
size_t alias_index = new_image_base.aliased_images.size();
|
||||||
|
if (!AddImageAlias(aliased, new_image_base, aliased_id, new_image_id)) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
join_alias_indices.emplace(aliased_id, alias_index);
|
||||||
|
new_image.flags |= ImageFlagBits::Alias;
|
||||||
|
}
|
||||||
|
for (const ImageId aliased_id : join_bad_overlap_ids) {
|
||||||
|
ImageBase& aliased = slot_images[aliased_id];
|
||||||
|
aliased.overlapping_images.push_back(new_image_id);
|
||||||
|
new_image.overlapping_images.push_back(aliased_id);
|
||||||
|
if (aliased.info.resources.levels == 1 && aliased.info.block.depth == 0 &&
|
||||||
|
aliased.overlapping_images.size() > 1) {
|
||||||
|
aliased.flags |= ImageFlagBits::BadOverlap;
|
||||||
|
}
|
||||||
|
if (new_image.info.resources.levels == 1 && new_image.info.block.depth == 0 &&
|
||||||
|
new_image.overlapping_images.size() > 1) {
|
||||||
|
new_image.flags |= ImageFlagBits::BadOverlap;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for (const auto& copy_object : join_copies_to_do) {
|
||||||
|
Image& overlap = slot_images[copy_object.id];
|
||||||
|
if (copy_object.is_alias) {
|
||||||
|
if (!overlap.IsSafeDownload()) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
const auto alias_pointer = join_alias_indices.find(copy_object.id);
|
||||||
|
if (alias_pointer == join_alias_indices.end()) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
const AliasedImage& aliased = new_image.aliased_images[alias_pointer->second];
|
||||||
|
CopyImage(new_image_id, aliased.id, aliased.copies);
|
||||||
|
new_image.modification_tick = overlap.modification_tick;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
if (True(overlap.flags & ImageFlagBits::GpuModified)) {
|
if (True(overlap.flags & ImageFlagBits::GpuModified)) {
|
||||||
new_image.flags |= ImageFlagBits::GpuModified;
|
new_image.flags |= ImageFlagBits::GpuModified;
|
||||||
const auto& resolution = Settings::values.resolution_info;
|
const auto& resolution = Settings::values.resolution_info;
|
||||||
|
@ -1411,35 +1458,15 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA
|
||||||
} else {
|
} else {
|
||||||
runtime.CopyImage(new_image, overlap, std::move(copies));
|
runtime.CopyImage(new_image, overlap, std::move(copies));
|
||||||
}
|
}
|
||||||
|
new_image.modification_tick = overlap.modification_tick;
|
||||||
}
|
}
|
||||||
if (True(overlap.flags & ImageFlagBits::Tracked)) {
|
if (True(overlap.flags & ImageFlagBits::Tracked)) {
|
||||||
UntrackImage(overlap, overlap_id);
|
UntrackImage(overlap, copy_object.id);
|
||||||
}
|
|
||||||
UnregisterImage(overlap_id);
|
|
||||||
DeleteImage(overlap_id);
|
|
||||||
}
|
|
||||||
ImageBase& new_image_base = new_image;
|
|
||||||
for (const ImageId aliased_id : right_aliased_ids) {
|
|
||||||
ImageBase& aliased = slot_images[aliased_id];
|
|
||||||
AddImageAlias(new_image_base, aliased, new_image_id, aliased_id);
|
|
||||||
new_image.flags |= ImageFlagBits::Alias;
|
|
||||||
}
|
|
||||||
for (const ImageId aliased_id : left_aliased_ids) {
|
|
||||||
ImageBase& aliased = slot_images[aliased_id];
|
|
||||||
AddImageAlias(aliased, new_image_base, aliased_id, new_image_id);
|
|
||||||
new_image.flags |= ImageFlagBits::Alias;
|
|
||||||
}
|
|
||||||
for (const ImageId aliased_id : bad_overlap_ids) {
|
|
||||||
ImageBase& aliased = slot_images[aliased_id];
|
|
||||||
aliased.overlapping_images.push_back(new_image_id);
|
|
||||||
new_image.overlapping_images.push_back(aliased_id);
|
|
||||||
if (aliased.info.resources.levels == 1 && aliased.overlapping_images.size() > 1) {
|
|
||||||
aliased.flags |= ImageFlagBits::BadOverlap;
|
|
||||||
}
|
|
||||||
if (new_image.info.resources.levels == 1 && new_image.overlapping_images.size() > 1) {
|
|
||||||
new_image.flags |= ImageFlagBits::BadOverlap;
|
|
||||||
}
|
}
|
||||||
|
UnregisterImage(copy_object.id);
|
||||||
|
DeleteImage(copy_object.id);
|
||||||
}
|
}
|
||||||
|
|
||||||
RegisterImage(new_image_id);
|
RegisterImage(new_image_id);
|
||||||
return new_image_id;
|
return new_image_id;
|
||||||
}
|
}
|
||||||
|
|
|
@ -10,7 +10,9 @@
|
||||||
#include <span>
|
#include <span>
|
||||||
#include <type_traits>
|
#include <type_traits>
|
||||||
#include <unordered_map>
|
#include <unordered_map>
|
||||||
|
#include <unordered_set>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
#include <boost/container/small_vector.hpp>
|
||||||
#include <queue>
|
#include <queue>
|
||||||
|
|
||||||
#include "common/common_types.h"
|
#include "common/common_types.h"
|
||||||
|
@ -474,6 +476,20 @@ private:
|
||||||
|
|
||||||
Common::ThreadWorker texture_decode_worker{1, "TextureDecoder"};
|
Common::ThreadWorker texture_decode_worker{1, "TextureDecoder"};
|
||||||
std::vector<std::unique_ptr<AsyncDecodeContext>> async_decodes;
|
std::vector<std::unique_ptr<AsyncDecodeContext>> async_decodes;
|
||||||
|
|
||||||
|
// Join caching
|
||||||
|
boost::container::small_vector<ImageId, 4> join_overlap_ids;
|
||||||
|
std::unordered_set<ImageId> join_overlaps_found;
|
||||||
|
boost::container::small_vector<ImageId, 4> join_left_aliased_ids;
|
||||||
|
boost::container::small_vector<ImageId, 4> join_right_aliased_ids;
|
||||||
|
std::unordered_set<ImageId> join_ignore_textures;
|
||||||
|
boost::container::small_vector<ImageId, 4> join_bad_overlap_ids;
|
||||||
|
struct JoinCopy {
|
||||||
|
bool is_alias;
|
||||||
|
ImageId id;
|
||||||
|
};
|
||||||
|
boost::container::small_vector<JoinCopy, 4> join_copies_to_do;
|
||||||
|
std::unordered_map<ImageId, size_t> join_alias_indices;
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace VideoCommon
|
} // namespace VideoCommon
|
||||||
|
|
Loading…
Reference in a new issue