early-access version 2642
This commit is contained in:
parent
3236e6f599
commit
ec43dfdade
9 changed files with 64 additions and 58 deletions
|
@ -1,7 +1,7 @@
|
||||||
yuzu emulator early access
|
yuzu emulator early access
|
||||||
=============
|
=============
|
||||||
|
|
||||||
This is the source code for early-access 2641.
|
This is the source code for early-access 2642.
|
||||||
|
|
||||||
## Legal Notice
|
## Legal Notice
|
||||||
|
|
||||||
|
|
|
@ -10,25 +10,49 @@
|
||||||
#include "common/uint128.h"
|
#include "common/uint128.h"
|
||||||
#include "common/x64/native_clock.h"
|
#include "common/x64/native_clock.h"
|
||||||
|
|
||||||
|
#ifdef _MSC_VER
|
||||||
|
#include <intrin.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
namespace Common {
|
namespace Common {
|
||||||
|
|
||||||
|
#ifdef _MSC_VER
|
||||||
|
__forceinline static u64 FencedRDTSC() {
|
||||||
|
_mm_lfence();
|
||||||
|
_ReadWriteBarrier();
|
||||||
|
const u64 result = __rdtsc();
|
||||||
|
_mm_lfence();
|
||||||
|
_ReadWriteBarrier();
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
static u64 FencedRDTSC() {
|
||||||
|
u64 result;
|
||||||
|
asm volatile("lfence\n\t"
|
||||||
|
"rdtsc\n\t"
|
||||||
|
"shl $32, %%rdx\n\t"
|
||||||
|
"or %%rdx, %0\n\t"
|
||||||
|
"lfence"
|
||||||
|
: "=a"(result)
|
||||||
|
:
|
||||||
|
: "rdx", "memory", "cc");
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
u64 EstimateRDTSCFrequency() {
|
u64 EstimateRDTSCFrequency() {
|
||||||
// Discard the first result measuring the rdtsc.
|
// Discard the first result measuring the rdtsc.
|
||||||
_mm_mfence();
|
FencedRDTSC();
|
||||||
__rdtsc();
|
|
||||||
std::this_thread::sleep_for(std::chrono::milliseconds{1});
|
std::this_thread::sleep_for(std::chrono::milliseconds{1});
|
||||||
_mm_mfence();
|
FencedRDTSC();
|
||||||
__rdtsc();
|
|
||||||
|
|
||||||
// Get the current time.
|
// Get the current time.
|
||||||
const auto start_time = std::chrono::steady_clock::now();
|
const auto start_time = std::chrono::steady_clock::now();
|
||||||
_mm_mfence();
|
const u64 tsc_start = FencedRDTSC();
|
||||||
const u64 tsc_start = __rdtsc();
|
|
||||||
// Wait for 200 milliseconds.
|
// Wait for 200 milliseconds.
|
||||||
std::this_thread::sleep_for(std::chrono::milliseconds{200});
|
std::this_thread::sleep_for(std::chrono::milliseconds{200});
|
||||||
const auto end_time = std::chrono::steady_clock::now();
|
const auto end_time = std::chrono::steady_clock::now();
|
||||||
_mm_mfence();
|
const u64 tsc_end = FencedRDTSC();
|
||||||
const u64 tsc_end = __rdtsc();
|
|
||||||
// Calculate differences.
|
// Calculate differences.
|
||||||
const u64 timer_diff = static_cast<u64>(
|
const u64 timer_diff = static_cast<u64>(
|
||||||
std::chrono::duration_cast<std::chrono::nanoseconds>(end_time - start_time).count());
|
std::chrono::duration_cast<std::chrono::nanoseconds>(end_time - start_time).count());
|
||||||
|
@ -42,8 +66,7 @@ NativeClock::NativeClock(u64 emulated_cpu_frequency_, u64 emulated_clock_frequen
|
||||||
u64 rtsc_frequency_)
|
u64 rtsc_frequency_)
|
||||||
: WallClock(emulated_cpu_frequency_, emulated_clock_frequency_, true), rtsc_frequency{
|
: WallClock(emulated_cpu_frequency_, emulated_clock_frequency_, true), rtsc_frequency{
|
||||||
rtsc_frequency_} {
|
rtsc_frequency_} {
|
||||||
_mm_mfence();
|
time_point.inner.last_measure = FencedRDTSC();
|
||||||
time_point.inner.last_measure = __rdtsc();
|
|
||||||
time_point.inner.accumulated_ticks = 0U;
|
time_point.inner.accumulated_ticks = 0U;
|
||||||
ns_rtsc_factor = GetFixedPoint64Factor(NS_RATIO, rtsc_frequency);
|
ns_rtsc_factor = GetFixedPoint64Factor(NS_RATIO, rtsc_frequency);
|
||||||
us_rtsc_factor = GetFixedPoint64Factor(US_RATIO, rtsc_frequency);
|
us_rtsc_factor = GetFixedPoint64Factor(US_RATIO, rtsc_frequency);
|
||||||
|
@ -58,8 +81,7 @@ u64 NativeClock::GetRTSC() {
|
||||||
|
|
||||||
current_time_point.pack = Common::AtomicLoad128(time_point.pack.data());
|
current_time_point.pack = Common::AtomicLoad128(time_point.pack.data());
|
||||||
do {
|
do {
|
||||||
_mm_mfence();
|
const u64 current_measure = FencedRDTSC();
|
||||||
const u64 current_measure = __rdtsc();
|
|
||||||
u64 diff = current_measure - current_time_point.inner.last_measure;
|
u64 diff = current_measure - current_time_point.inner.last_measure;
|
||||||
diff = diff & ~static_cast<u64>(static_cast<s64>(diff) >> 63); // max(diff, 0)
|
diff = diff & ~static_cast<u64>(static_cast<s64>(diff) >> 63); // max(diff, 0)
|
||||||
new_time_point.inner.last_measure = current_measure > current_time_point.inner.last_measure
|
new_time_point.inner.last_measure = current_measure > current_time_point.inner.last_measure
|
||||||
|
@ -80,8 +102,7 @@ void NativeClock::Pause(bool is_paused) {
|
||||||
current_time_point.pack = Common::AtomicLoad128(time_point.pack.data());
|
current_time_point.pack = Common::AtomicLoad128(time_point.pack.data());
|
||||||
do {
|
do {
|
||||||
new_time_point.pack = current_time_point.pack;
|
new_time_point.pack = current_time_point.pack;
|
||||||
_mm_mfence();
|
new_time_point.inner.last_measure = FencedRDTSC();
|
||||||
new_time_point.inner.last_measure = __rdtsc();
|
|
||||||
} while (!Common::AtomicCompareAndSwap(time_point.pack.data(), new_time_point.pack,
|
} while (!Common::AtomicCompareAndSwap(time_point.pack.data(), new_time_point.pack,
|
||||||
current_time_point.pack, current_time_point.pack));
|
current_time_point.pack, current_time_point.pack));
|
||||||
}
|
}
|
||||||
|
|
|
@ -40,9 +40,6 @@ GPUVAddr MemoryManager::UpdateRange(GPUVAddr gpu_addr, PageEntry page_entry, std
|
||||||
}
|
}
|
||||||
|
|
||||||
GPUVAddr MemoryManager::Map(VAddr cpu_addr, GPUVAddr gpu_addr, std::size_t size) {
|
GPUVAddr MemoryManager::Map(VAddr cpu_addr, GPUVAddr gpu_addr, std::size_t size) {
|
||||||
// Mark any pre-existing rasterizer memory in this range as remapped
|
|
||||||
rasterizer->ModifyGPUMemory(gpu_addr, size);
|
|
||||||
|
|
||||||
const auto it = std::ranges::lower_bound(map_ranges, gpu_addr, {}, &MapRange::first);
|
const auto it = std::ranges::lower_bound(map_ranges, gpu_addr, {}, &MapRange::first);
|
||||||
if (it != map_ranges.end() && it->first == gpu_addr) {
|
if (it != map_ranges.end() && it->first == gpu_addr) {
|
||||||
it->second = size;
|
it->second = size;
|
||||||
|
|
|
@ -200,7 +200,7 @@ void ComputePipeline::Configure(Tegra::Engines::KeplerCompute& kepler_compute,
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
const void* const descriptor_data{update_descriptor_queue.UpdateData()};
|
const void* const descriptor_data{update_descriptor_queue.UpdateData()};
|
||||||
const bool is_rescaling = !info.texture_descriptors.empty() || !info.image_descriptors.empty();
|
const bool is_rescaling = info.uses_rescaling_uniform;
|
||||||
scheduler.Record([this, descriptor_data, is_rescaling,
|
scheduler.Record([this, descriptor_data, is_rescaling,
|
||||||
rescaling_data = rescaling.Data()](vk::CommandBuffer cmdbuf) {
|
rescaling_data = rescaling.Data()](vk::CommandBuffer cmdbuf) {
|
||||||
cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline);
|
cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline);
|
||||||
|
|
|
@ -238,6 +238,7 @@ GraphicsPipeline::GraphicsPipeline(
|
||||||
enabled_uniform_buffer_masks[stage] = info->constant_buffer_mask;
|
enabled_uniform_buffer_masks[stage] = info->constant_buffer_mask;
|
||||||
std::ranges::copy(info->constant_buffer_used_sizes, uniform_buffer_sizes[stage].begin());
|
std::ranges::copy(info->constant_buffer_used_sizes, uniform_buffer_sizes[stage].begin());
|
||||||
num_textures += Shader::NumDescriptors(info->texture_descriptors);
|
num_textures += Shader::NumDescriptors(info->texture_descriptors);
|
||||||
|
uses_rescale_unfiorm |= info->uses_rescaling_uniform;
|
||||||
}
|
}
|
||||||
auto func{[this, shader_notify, &render_pass_cache, &descriptor_pool, pipeline_statistics] {
|
auto func{[this, shader_notify, &render_pass_cache, &descriptor_pool, pipeline_statistics] {
|
||||||
DescriptorLayoutBuilder builder{MakeBuilder(device, stage_infos)};
|
DescriptorLayoutBuilder builder{MakeBuilder(device, stage_infos)};
|
||||||
|
@ -471,7 +472,8 @@ void GraphicsPipeline::ConfigureDraw(const RescalingPushConstant& rescaling) {
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
const bool is_rescaling{texture_cache.IsRescaling()};
|
const bool is_rescaling{texture_cache.IsRescaling()};
|
||||||
const bool update_rescaling{scheduler.UpdateRescaling(is_rescaling)};
|
const bool update_rescaling{uses_rescale_unfiorm ? scheduler.UpdateRescaling(is_rescaling)
|
||||||
|
: false};
|
||||||
const bool bind_pipeline{scheduler.UpdateGraphicsPipeline(this)};
|
const bool bind_pipeline{scheduler.UpdateGraphicsPipeline(this)};
|
||||||
const void* const descriptor_data{update_descriptor_queue.UpdateData()};
|
const void* const descriptor_data{update_descriptor_queue.UpdateData()};
|
||||||
scheduler.Record([this, descriptor_data, bind_pipeline, rescaling_data = rescaling.Data(),
|
scheduler.Record([this, descriptor_data, bind_pipeline, rescaling_data = rescaling.Data(),
|
||||||
|
@ -479,10 +481,12 @@ void GraphicsPipeline::ConfigureDraw(const RescalingPushConstant& rescaling) {
|
||||||
if (bind_pipeline) {
|
if (bind_pipeline) {
|
||||||
cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline);
|
cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline);
|
||||||
}
|
}
|
||||||
|
if (uses_rescale_unfiorm) {
|
||||||
cmdbuf.PushConstants(*pipeline_layout, VK_SHADER_STAGE_ALL_GRAPHICS,
|
cmdbuf.PushConstants(*pipeline_layout, VK_SHADER_STAGE_ALL_GRAPHICS,
|
||||||
RESCALING_LAYOUT_WORDS_OFFSET, sizeof(rescaling_data),
|
RESCALING_LAYOUT_WORDS_OFFSET, sizeof(rescaling_data),
|
||||||
rescaling_data.data());
|
rescaling_data.data());
|
||||||
if (update_rescaling) {
|
}
|
||||||
|
if (uses_rescale_unfiorm && update_rescaling) {
|
||||||
const f32 config_down_factor{Settings::values.resolution_info.down_factor};
|
const f32 config_down_factor{Settings::values.resolution_info.down_factor};
|
||||||
const f32 scale_down_factor{is_rescaling ? config_down_factor : 1.0f};
|
const f32 scale_down_factor{is_rescaling ? config_down_factor : 1.0f};
|
||||||
cmdbuf.PushConstants(*pipeline_layout, VK_SHADER_STAGE_ALL_GRAPHICS,
|
cmdbuf.PushConstants(*pipeline_layout, VK_SHADER_STAGE_ALL_GRAPHICS,
|
||||||
|
|
|
@ -151,6 +151,7 @@ private:
|
||||||
std::mutex build_mutex;
|
std::mutex build_mutex;
|
||||||
std::atomic_bool is_built{false};
|
std::atomic_bool is_built{false};
|
||||||
bool uses_push_descriptor{false};
|
bool uses_push_descriptor{false};
|
||||||
|
bool uses_rescale_unfiorm{false};
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace Vulkan
|
} // namespace Vulkan
|
||||||
|
|
|
@ -234,12 +234,9 @@ void RasterizerVulkan::Clear() {
|
||||||
const VkExtent2D render_area = framebuffer->RenderArea();
|
const VkExtent2D render_area = framebuffer->RenderArea();
|
||||||
scheduler.RequestRenderpass(framebuffer);
|
scheduler.RequestRenderpass(framebuffer);
|
||||||
|
|
||||||
u32 up_scale = 1;
|
const bool is_rescaling = texture_cache.IsRescaling();
|
||||||
u32 down_shift = 0;
|
const u32 up_scale = is_rescaling ? Settings::values.resolution_info.up_scale : 1U;
|
||||||
if (texture_cache.IsRescaling()) {
|
const u32 down_shift = is_rescaling ? Settings::values.resolution_info.down_shift : 0U;
|
||||||
up_scale = Settings::values.resolution_info.up_scale;
|
|
||||||
down_shift = Settings::values.resolution_info.down_shift;
|
|
||||||
}
|
|
||||||
UpdateViewportsState(regs);
|
UpdateViewportsState(regs);
|
||||||
|
|
||||||
VkClearRect clear_rect{
|
VkClearRect clear_rect{
|
||||||
|
@ -695,12 +692,9 @@ void RasterizerVulkan::UpdateScissorsState(Tegra::Engines::Maxwell3D::Regs& regs
|
||||||
if (!state_tracker.TouchScissors()) {
|
if (!state_tracker.TouchScissors()) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
u32 up_scale = 1;
|
const bool is_rescaling = texture_cache.IsRescaling();
|
||||||
u32 down_shift = 0;
|
const u32 up_scale = is_rescaling ? Settings::values.resolution_info.up_scale : 1U;
|
||||||
if (texture_cache.IsRescaling()) {
|
const u32 down_shift = is_rescaling ? Settings::values.resolution_info.down_shift : 0U;
|
||||||
up_scale = Settings::values.resolution_info.up_scale;
|
|
||||||
down_shift = Settings::values.resolution_info.down_shift;
|
|
||||||
}
|
|
||||||
const std::array scissors{
|
const std::array scissors{
|
||||||
GetScissorState(regs, 0, up_scale, down_shift),
|
GetScissorState(regs, 0, up_scale, down_shift),
|
||||||
GetScissorState(regs, 1, up_scale, down_shift),
|
GetScissorState(regs, 1, up_scale, down_shift),
|
||||||
|
|
|
@ -328,7 +328,8 @@ void TextureCache<P>::UpdateRenderTargets(bool is_clear) {
|
||||||
}
|
}
|
||||||
|
|
||||||
const bool rescaled = RescaleRenderTargets(is_clear);
|
const bool rescaled = RescaleRenderTargets(is_clear);
|
||||||
if (is_rescaling != rescaled) {
|
const auto& resolution_info = Settings::values.resolution_info;
|
||||||
|
if (resolution_info.active && is_rescaling != rescaled) {
|
||||||
flags[Dirty::RescaleViewports] = true;
|
flags[Dirty::RescaleViewports] = true;
|
||||||
flags[Dirty::RescaleScissors] = true;
|
flags[Dirty::RescaleScissors] = true;
|
||||||
is_rescaling = rescaled;
|
is_rescaling = rescaled;
|
||||||
|
@ -345,12 +346,8 @@ void TextureCache<P>::UpdateRenderTargets(bool is_clear) {
|
||||||
for (size_t index = 0; index < NUM_RT; ++index) {
|
for (size_t index = 0; index < NUM_RT; ++index) {
|
||||||
render_targets.draw_buffers[index] = static_cast<u8>(maxwell3d.regs.rt_control.Map(index));
|
render_targets.draw_buffers[index] = static_cast<u8>(maxwell3d.regs.rt_control.Map(index));
|
||||||
}
|
}
|
||||||
u32 up_scale = 1;
|
const u32 up_scale = is_rescaling ? resolution_info.up_scale : 1U;
|
||||||
u32 down_shift = 0;
|
const u32 down_shift = is_rescaling ? resolution_info.down_shift : 0U;
|
||||||
if (is_rescaling) {
|
|
||||||
up_scale = Settings::values.resolution_info.up_scale;
|
|
||||||
down_shift = Settings::values.resolution_info.down_shift;
|
|
||||||
}
|
|
||||||
render_targets.size = Extent2D{
|
render_targets.size = Extent2D{
|
||||||
(maxwell3d.regs.render_area.width * up_scale) >> down_shift,
|
(maxwell3d.regs.render_area.width * up_scale) >> down_shift,
|
||||||
(maxwell3d.regs.render_area.height * up_scale) >> down_shift,
|
(maxwell3d.regs.render_area.height * up_scale) >> down_shift,
|
||||||
|
@ -454,18 +451,13 @@ void TextureCache<P>::DownloadMemory(VAddr cpu_addr, size_t size) {
|
||||||
return slot_images[lhs].modification_tick < slot_images[rhs].modification_tick;
|
return slot_images[lhs].modification_tick < slot_images[rhs].modification_tick;
|
||||||
});
|
});
|
||||||
for (const ImageId image_id : images) {
|
for (const ImageId image_id : images) {
|
||||||
DownloadImage(image_id);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
template <class P>
|
|
||||||
void TextureCache<P>::DownloadImage(ImageId image_id) {
|
|
||||||
Image& image = slot_images[image_id];
|
Image& image = slot_images[image_id];
|
||||||
auto map = runtime.DownloadStagingBuffer(image.unswizzled_size_bytes);
|
auto map = runtime.DownloadStagingBuffer(image.unswizzled_size_bytes);
|
||||||
const auto copies = FullDownloadCopies(image.info);
|
const auto copies = FullDownloadCopies(image.info);
|
||||||
image.DownloadMemory(map, copies);
|
image.DownloadMemory(map, copies);
|
||||||
runtime.Finish();
|
runtime.Finish();
|
||||||
SwizzleImage(gpu_memory, image.gpu_addr, image.info, copies, map.mapped_span);
|
SwizzleImage(gpu_memory, image.gpu_addr, image.info, copies, map.mapped_span);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template <class P>
|
template <class P>
|
||||||
|
@ -1063,7 +1055,7 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA
|
||||||
for (const ImageId overlap_id : ignore_textures) {
|
for (const ImageId overlap_id : ignore_textures) {
|
||||||
Image& overlap = slot_images[overlap_id];
|
Image& overlap = slot_images[overlap_id];
|
||||||
if (True(overlap.flags & ImageFlagBits::GpuModified)) {
|
if (True(overlap.flags & ImageFlagBits::GpuModified)) {
|
||||||
DownloadImage(overlap_id);
|
UNIMPLEMENTED();
|
||||||
}
|
}
|
||||||
if (True(overlap.flags & ImageFlagBits::Tracked)) {
|
if (True(overlap.flags & ImageFlagBits::Tracked)) {
|
||||||
UntrackImage(overlap, overlap_id);
|
UntrackImage(overlap, overlap_id);
|
||||||
|
|
|
@ -139,9 +139,6 @@ public:
|
||||||
/// Download contents of host images to guest memory in a region
|
/// Download contents of host images to guest memory in a region
|
||||||
void DownloadMemory(VAddr cpu_addr, size_t size);
|
void DownloadMemory(VAddr cpu_addr, size_t size);
|
||||||
|
|
||||||
/// Download contents of host images to guest memory
|
|
||||||
void DownloadImage(ImageId image_id);
|
|
||||||
|
|
||||||
/// Remove images in a region
|
/// Remove images in a region
|
||||||
void UnmapMemory(VAddr cpu_addr, size_t size);
|
void UnmapMemory(VAddr cpu_addr, size_t size);
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue