From ec43dfdadeeb76c43eff142bc0c4a4e7c88d0c79 Mon Sep 17 00:00:00 2001 From: pineappleEA Date: Mon, 4 Apr 2022 00:41:58 +0200 Subject: [PATCH] early-access version 2642 --- README.md | 2 +- src/common/x64/native_clock.cpp | 49 +++++++++++++------ src/video_core/memory_manager.cpp | 3 -- .../renderer_vulkan/vk_compute_pipeline.cpp | 2 +- .../renderer_vulkan/vk_graphics_pipeline.cpp | 14 ++++-- .../renderer_vulkan/vk_graphics_pipeline.h | 1 + .../renderer_vulkan/vk_rasterizer.cpp | 18 +++---- src/video_core/texture_cache/texture_cache.h | 30 +++++------- .../texture_cache/texture_cache_base.h | 3 -- 9 files changed, 64 insertions(+), 58 deletions(-) diff --git a/README.md b/README.md index a63c717ee..c2703d2ad 100755 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ yuzu emulator early access ============= -This is the source code for early-access 2641. +This is the source code for early-access 2642. ## Legal Notice diff --git a/src/common/x64/native_clock.cpp b/src/common/x64/native_clock.cpp index 7a3f21dcf..7fd9d22f8 100755 --- a/src/common/x64/native_clock.cpp +++ b/src/common/x64/native_clock.cpp @@ -10,25 +10,49 @@ #include "common/uint128.h" #include "common/x64/native_clock.h" +#ifdef _MSC_VER +#include +#endif + namespace Common { +#ifdef _MSC_VER +__forceinline static u64 FencedRDTSC() { + _mm_lfence(); + _ReadWriteBarrier(); + const u64 result = __rdtsc(); + _mm_lfence(); + _ReadWriteBarrier(); + return result; +} +#else +static u64 FencedRDTSC() { + u64 result; + asm volatile("lfence\n\t" + "rdtsc\n\t" + "shl $32, %%rdx\n\t" + "or %%rdx, %0\n\t" + "lfence" + : "=a"(result) + : + : "rdx", "memory", "cc"); + return result; +} +#endif + u64 EstimateRDTSCFrequency() { // Discard the first result measuring the rdtsc. - _mm_mfence(); - __rdtsc(); + FencedRDTSC(); std::this_thread::sleep_for(std::chrono::milliseconds{1}); - _mm_mfence(); - __rdtsc(); + FencedRDTSC(); // Get the current time. const auto start_time = std::chrono::steady_clock::now(); - _mm_mfence(); - const u64 tsc_start = __rdtsc(); + const u64 tsc_start = FencedRDTSC(); // Wait for 200 milliseconds. std::this_thread::sleep_for(std::chrono::milliseconds{200}); const auto end_time = std::chrono::steady_clock::now(); - _mm_mfence(); - const u64 tsc_end = __rdtsc(); + const u64 tsc_end = FencedRDTSC(); // Calculate differences. const u64 timer_diff = static_cast( std::chrono::duration_cast(end_time - start_time).count()); @@ -42,8 +66,7 @@ NativeClock::NativeClock(u64 emulated_cpu_frequency_, u64 emulated_clock_frequen u64 rtsc_frequency_) : WallClock(emulated_cpu_frequency_, emulated_clock_frequency_, true), rtsc_frequency{ rtsc_frequency_} { - _mm_mfence(); - time_point.inner.last_measure = __rdtsc(); + time_point.inner.last_measure = FencedRDTSC(); time_point.inner.accumulated_ticks = 0U; ns_rtsc_factor = GetFixedPoint64Factor(NS_RATIO, rtsc_frequency); us_rtsc_factor = GetFixedPoint64Factor(US_RATIO, rtsc_frequency); @@ -58,8 +81,7 @@ u64 NativeClock::GetRTSC() { current_time_point.pack = Common::AtomicLoad128(time_point.pack.data()); do { - _mm_mfence(); - const u64 current_measure = __rdtsc(); + const u64 current_measure = FencedRDTSC(); u64 diff = current_measure - current_time_point.inner.last_measure; diff = diff & ~static_cast(static_cast(diff) >> 63); // max(diff, 0) new_time_point.inner.last_measure = current_measure > current_time_point.inner.last_measure @@ -80,8 +102,7 @@ void NativeClock::Pause(bool is_paused) { current_time_point.pack = Common::AtomicLoad128(time_point.pack.data()); do { new_time_point.pack = current_time_point.pack; - _mm_mfence(); - new_time_point.inner.last_measure = __rdtsc(); + new_time_point.inner.last_measure = FencedRDTSC(); } while (!Common::AtomicCompareAndSwap(time_point.pack.data(), new_time_point.pack, current_time_point.pack, current_time_point.pack)); } diff --git a/src/video_core/memory_manager.cpp b/src/video_core/memory_manager.cpp index 65a0d826c..722ebd9ad 100755 --- a/src/video_core/memory_manager.cpp +++ b/src/video_core/memory_manager.cpp @@ -40,9 +40,6 @@ GPUVAddr MemoryManager::UpdateRange(GPUVAddr gpu_addr, PageEntry page_entry, std } GPUVAddr MemoryManager::Map(VAddr cpu_addr, GPUVAddr gpu_addr, std::size_t size) { - // Mark any pre-existing rasterizer memory in this range as remapped - rasterizer->ModifyGPUMemory(gpu_addr, size); - const auto it = std::ranges::lower_bound(map_ranges, gpu_addr, {}, &MapRange::first); if (it != map_ranges.end() && it->first == gpu_addr) { it->second = size; diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp index de36bcdb7..902a0319c 100755 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp @@ -200,7 +200,7 @@ void ComputePipeline::Configure(Tegra::Engines::KeplerCompute& kepler_compute, }); } const void* const descriptor_data{update_descriptor_queue.UpdateData()}; - const bool is_rescaling = !info.texture_descriptors.empty() || !info.image_descriptors.empty(); + const bool is_rescaling = info.uses_rescaling_uniform; scheduler.Record([this, descriptor_data, is_rescaling, rescaling_data = rescaling.Data()](vk::CommandBuffer cmdbuf) { cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline); diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index d514b71d0..15d3ec480 100755 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -238,6 +238,7 @@ GraphicsPipeline::GraphicsPipeline( enabled_uniform_buffer_masks[stage] = info->constant_buffer_mask; std::ranges::copy(info->constant_buffer_used_sizes, uniform_buffer_sizes[stage].begin()); num_textures += Shader::NumDescriptors(info->texture_descriptors); + uses_rescale_unfiorm |= info->uses_rescaling_uniform; } auto func{[this, shader_notify, &render_pass_cache, &descriptor_pool, pipeline_statistics] { DescriptorLayoutBuilder builder{MakeBuilder(device, stage_infos)}; @@ -471,7 +472,8 @@ void GraphicsPipeline::ConfigureDraw(const RescalingPushConstant& rescaling) { }); } const bool is_rescaling{texture_cache.IsRescaling()}; - const bool update_rescaling{scheduler.UpdateRescaling(is_rescaling)}; + const bool update_rescaling{uses_rescale_unfiorm ? scheduler.UpdateRescaling(is_rescaling) + : false}; const bool bind_pipeline{scheduler.UpdateGraphicsPipeline(this)}; const void* const descriptor_data{update_descriptor_queue.UpdateData()}; scheduler.Record([this, descriptor_data, bind_pipeline, rescaling_data = rescaling.Data(), @@ -479,10 +481,12 @@ void GraphicsPipeline::ConfigureDraw(const RescalingPushConstant& rescaling) { if (bind_pipeline) { cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline); } - cmdbuf.PushConstants(*pipeline_layout, VK_SHADER_STAGE_ALL_GRAPHICS, - RESCALING_LAYOUT_WORDS_OFFSET, sizeof(rescaling_data), - rescaling_data.data()); - if (update_rescaling) { + if (uses_rescale_unfiorm) { + cmdbuf.PushConstants(*pipeline_layout, VK_SHADER_STAGE_ALL_GRAPHICS, + RESCALING_LAYOUT_WORDS_OFFSET, sizeof(rescaling_data), + rescaling_data.data()); + } + if (uses_rescale_unfiorm && update_rescaling) { const f32 config_down_factor{Settings::values.resolution_info.down_factor}; const f32 scale_down_factor{is_rescaling ? config_down_factor : 1.0f}; cmdbuf.PushConstants(*pipeline_layout, VK_SHADER_STAGE_ALL_GRAPHICS, diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h index a0c1d8f07..934b8da8e 100755 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h @@ -151,6 +151,7 @@ private: std::mutex build_mutex; std::atomic_bool is_built{false}; bool uses_push_descriptor{false}; + bool uses_rescale_unfiorm{false}; }; } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index fa87d37f8..4b5ee151f 100755 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -234,12 +234,9 @@ void RasterizerVulkan::Clear() { const VkExtent2D render_area = framebuffer->RenderArea(); scheduler.RequestRenderpass(framebuffer); - u32 up_scale = 1; - u32 down_shift = 0; - if (texture_cache.IsRescaling()) { - up_scale = Settings::values.resolution_info.up_scale; - down_shift = Settings::values.resolution_info.down_shift; - } + const bool is_rescaling = texture_cache.IsRescaling(); + const u32 up_scale = is_rescaling ? Settings::values.resolution_info.up_scale : 1U; + const u32 down_shift = is_rescaling ? Settings::values.resolution_info.down_shift : 0U; UpdateViewportsState(regs); VkClearRect clear_rect{ @@ -695,12 +692,9 @@ void RasterizerVulkan::UpdateScissorsState(Tegra::Engines::Maxwell3D::Regs& regs if (!state_tracker.TouchScissors()) { return; } - u32 up_scale = 1; - u32 down_shift = 0; - if (texture_cache.IsRescaling()) { - up_scale = Settings::values.resolution_info.up_scale; - down_shift = Settings::values.resolution_info.down_shift; - } + const bool is_rescaling = texture_cache.IsRescaling(); + const u32 up_scale = is_rescaling ? Settings::values.resolution_info.up_scale : 1U; + const u32 down_shift = is_rescaling ? Settings::values.resolution_info.down_shift : 0U; const std::array scissors{ GetScissorState(regs, 0, up_scale, down_shift), GetScissorState(regs, 1, up_scale, down_shift), diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 77450ddfe..959f6f228 100755 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -328,7 +328,8 @@ void TextureCache

::UpdateRenderTargets(bool is_clear) { } const bool rescaled = RescaleRenderTargets(is_clear); - if (is_rescaling != rescaled) { + const auto& resolution_info = Settings::values.resolution_info; + if (resolution_info.active && is_rescaling != rescaled) { flags[Dirty::RescaleViewports] = true; flags[Dirty::RescaleScissors] = true; is_rescaling = rescaled; @@ -345,12 +346,8 @@ void TextureCache

::UpdateRenderTargets(bool is_clear) { for (size_t index = 0; index < NUM_RT; ++index) { render_targets.draw_buffers[index] = static_cast(maxwell3d.regs.rt_control.Map(index)); } - u32 up_scale = 1; - u32 down_shift = 0; - if (is_rescaling) { - up_scale = Settings::values.resolution_info.up_scale; - down_shift = Settings::values.resolution_info.down_shift; - } + const u32 up_scale = is_rescaling ? resolution_info.up_scale : 1U; + const u32 down_shift = is_rescaling ? resolution_info.down_shift : 0U; render_targets.size = Extent2D{ (maxwell3d.regs.render_area.width * up_scale) >> down_shift, (maxwell3d.regs.render_area.height * up_scale) >> down_shift, @@ -454,20 +451,15 @@ void TextureCache

::DownloadMemory(VAddr cpu_addr, size_t size) { return slot_images[lhs].modification_tick < slot_images[rhs].modification_tick; }); for (const ImageId image_id : images) { - DownloadImage(image_id); + Image& image = slot_images[image_id]; + auto map = runtime.DownloadStagingBuffer(image.unswizzled_size_bytes); + const auto copies = FullDownloadCopies(image.info); + image.DownloadMemory(map, copies); + runtime.Finish(); + SwizzleImage(gpu_memory, image.gpu_addr, image.info, copies, map.mapped_span); } } -template -void TextureCache

::DownloadImage(ImageId image_id) { - Image& image = slot_images[image_id]; - auto map = runtime.DownloadStagingBuffer(image.unswizzled_size_bytes); - const auto copies = FullDownloadCopies(image.info); - image.DownloadMemory(map, copies); - runtime.Finish(); - SwizzleImage(gpu_memory, image.gpu_addr, image.info, copies, map.mapped_span); -} - template void TextureCache

::UnmapMemory(VAddr cpu_addr, size_t size) { std::vector deleted_images; @@ -1063,7 +1055,7 @@ ImageId TextureCache

::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA for (const ImageId overlap_id : ignore_textures) { Image& overlap = slot_images[overlap_id]; if (True(overlap.flags & ImageFlagBits::GpuModified)) { - DownloadImage(overlap_id); + UNIMPLEMENTED(); } if (True(overlap.flags & ImageFlagBits::Tracked)) { UntrackImage(overlap, overlap_id); diff --git a/src/video_core/texture_cache/texture_cache_base.h b/src/video_core/texture_cache/texture_cache_base.h index 7d9b9e5e9..b1324edf3 100755 --- a/src/video_core/texture_cache/texture_cache_base.h +++ b/src/video_core/texture_cache/texture_cache_base.h @@ -139,9 +139,6 @@ public: /// Download contents of host images to guest memory in a region void DownloadMemory(VAddr cpu_addr, size_t size); - /// Download contents of host images to guest memory - void DownloadImage(ImageId image_id); - /// Remove images in a region void UnmapMemory(VAddr cpu_addr, size_t size);