From dd12dd4c67dd4bc6e7a7d071b925afc38e687f8b Mon Sep 17 00:00:00 2001 From: Morph <39850852+Morph1984@users.noreply.github.com> Date: Sat, 22 Apr 2023 22:55:03 -0400 Subject: [PATCH 1/8] x64: Deduplicate RDTSC usage --- src/common/CMakeLists.txt | 2 ++ src/common/x64/cpu_detect.cpp | 3 +++ src/common/x64/cpu_wait.cpp | 20 +----------------- src/common/x64/rdtsc.cpp | 39 +++++++++++++++++++++++++++++++++++ src/common/x64/rdtsc.h | 37 +++++++++++++++++++++++++++++++++ 5 files changed, 82 insertions(+), 19 deletions(-) create mode 100644 src/common/x64/rdtsc.cpp create mode 100644 src/common/x64/rdtsc.h diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt index efc4a9fe9..3adf13a3f 100644 --- a/src/common/CMakeLists.txt +++ b/src/common/CMakeLists.txt @@ -172,6 +172,8 @@ if(ARCHITECTURE_x86_64) x64/cpu_wait.h x64/native_clock.cpp x64/native_clock.h + x64/rdtsc.cpp + x64/rdtsc.h x64/xbyak_abi.h x64/xbyak_util.h ) diff --git a/src/common/x64/cpu_detect.cpp b/src/common/x64/cpu_detect.cpp index 72ed6e96c..c998b1197 100644 --- a/src/common/x64/cpu_detect.cpp +++ b/src/common/x64/cpu_detect.cpp @@ -14,6 +14,7 @@ #include "common/common_types.h" #include "common/logging/log.h" #include "common/x64/cpu_detect.h" +#include "common/x64/rdtsc.h" #ifdef _WIN32 #include @@ -187,6 +188,8 @@ static CPUCaps Detect() { caps.tsc_frequency = static_cast(caps.crystal_frequency) * caps.tsc_crystal_ratio_numerator / caps.tsc_crystal_ratio_denominator; + } else { + caps.tsc_frequency = X64::EstimateRDTSCFrequency(); } } diff --git a/src/common/x64/cpu_wait.cpp b/src/common/x64/cpu_wait.cpp index cfeef6a3d..c53dd4945 100644 --- a/src/common/x64/cpu_wait.cpp +++ b/src/common/x64/cpu_wait.cpp @@ -9,19 +9,11 @@ #include "common/x64/cpu_detect.h" #include "common/x64/cpu_wait.h" +#include "common/x64/rdtsc.h" namespace Common::X64 { #ifdef _MSC_VER -__forceinline static u64 FencedRDTSC() { - _mm_lfence(); - _ReadWriteBarrier(); - const u64 result = __rdtsc(); - _mm_lfence(); - _ReadWriteBarrier(); - return result; -} - __forceinline static void TPAUSE() { // 100,000 cycles is a reasonable amount of time to wait to save on CPU resources. // For reference: @@ -32,16 +24,6 @@ __forceinline static void TPAUSE() { _tpause(0, FencedRDTSC() + PauseCycles); } #else -static u64 FencedRDTSC() { - u64 eax; - u64 edx; - asm volatile("lfence\n\t" - "rdtsc\n\t" - "lfence\n\t" - : "=a"(eax), "=d"(edx)); - return (edx << 32) | eax; -} - static void TPAUSE() { // 100,000 cycles is a reasonable amount of time to wait to save on CPU resources. // For reference: diff --git a/src/common/x64/rdtsc.cpp b/src/common/x64/rdtsc.cpp new file mode 100644 index 000000000..9273274a3 --- /dev/null +++ b/src/common/x64/rdtsc.cpp @@ -0,0 +1,39 @@ +// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#include + +#include "common/steady_clock.h" +#include "common/uint128.h" +#include "common/x64/rdtsc.h" + +namespace Common::X64 { + +template +static u64 RoundToNearest(u64 value) { + const auto mod = value % Nearest; + return mod >= (Nearest / 2) ? (value - mod + Nearest) : (value - mod); +} + +u64 EstimateRDTSCFrequency() { + // Discard the first result measuring the rdtsc. + FencedRDTSC(); + std::this_thread::sleep_for(std::chrono::milliseconds{1}); + FencedRDTSC(); + + // Get the current time. + const auto start_time = RealTimeClock::Now(); + const u64 tsc_start = FencedRDTSC(); + // Wait for 100 milliseconds. + std::this_thread::sleep_for(std::chrono::milliseconds{100}); + const auto end_time = RealTimeClock::Now(); + const u64 tsc_end = FencedRDTSC(); + // Calculate differences. + const u64 timer_diff = static_cast( + std::chrono::duration_cast(end_time - start_time).count()); + const u64 tsc_diff = tsc_end - tsc_start; + const u64 tsc_freq = MultiplyAndDivide64(tsc_diff, 1000000000ULL, timer_diff); + return RoundToNearest<100'000>(tsc_freq); +} + +} // namespace Common::X64 diff --git a/src/common/x64/rdtsc.h b/src/common/x64/rdtsc.h new file mode 100644 index 000000000..0ec4f52f9 --- /dev/null +++ b/src/common/x64/rdtsc.h @@ -0,0 +1,37 @@ +// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project +// SPDX-License-Identifier: GPL-2.0-or-later + +#pragma once + +#ifdef _MSC_VER +#include +#endif + +#include "common/common_types.h" + +namespace Common::X64 { + +#ifdef _MSC_VER +__forceinline static u64 FencedRDTSC() { + _mm_lfence(); + _ReadWriteBarrier(); + const u64 result = __rdtsc(); + _mm_lfence(); + _ReadWriteBarrier(); + return result; +} +#else +static inline u64 FencedRDTSC() { + u64 eax; + u64 edx; + asm volatile("lfence\n\t" + "rdtsc\n\t" + "lfence\n\t" + : "=a"(eax), "=d"(edx)); + return (edx << 32) | eax; +} +#endif + +u64 EstimateRDTSCFrequency(); + +} // namespace Common::X64 From 1492a65454d6a03f641b136cc61e68870be00218 Mon Sep 17 00:00:00 2001 From: Morph <39850852+Morph1984@users.noreply.github.com> Date: Sat, 22 Apr 2023 23:08:28 -0400 Subject: [PATCH 2/8] (wall, native)_clock: Rework NativeClock --- src/common/steady_clock.cpp | 5 +- src/common/wall_clock.cpp | 73 ++++++-------- src/common/wall_clock.h | 54 +++++------ src/common/x64/native_clock.cpp | 165 +++++--------------------------- src/common/x64/native_clock.h | 56 ++++------- 5 files changed, 94 insertions(+), 259 deletions(-) diff --git a/src/common/steady_clock.cpp b/src/common/steady_clock.cpp index 782859196..9415eed29 100644 --- a/src/common/steady_clock.cpp +++ b/src/common/steady_clock.cpp @@ -28,13 +28,12 @@ static s64 GetSystemTimeNS() { // GetSystemTimePreciseAsFileTime returns the file time in 100ns units. static constexpr s64 Multiplier = 100; // Convert Windows epoch to Unix epoch. - static constexpr s64 WindowsEpochToUnixEpochNS = 0x19DB1DED53E8000LL; + static constexpr s64 WindowsEpochToUnixEpoch = 0x19DB1DED53E8000LL; FILETIME filetime; GetSystemTimePreciseAsFileTime(&filetime); return Multiplier * ((static_cast(filetime.dwHighDateTime) << 32) + - static_cast(filetime.dwLowDateTime)) - - WindowsEpochToUnixEpochNS; + static_cast(filetime.dwLowDateTime) - WindowsEpochToUnixEpoch); } #endif diff --git a/src/common/wall_clock.cpp b/src/common/wall_clock.cpp index 817e71d52..ad8db06b0 100644 --- a/src/common/wall_clock.cpp +++ b/src/common/wall_clock.cpp @@ -2,88 +2,71 @@ // SPDX-License-Identifier: GPL-2.0-or-later #include "common/steady_clock.h" -#include "common/uint128.h" #include "common/wall_clock.h" #ifdef ARCHITECTURE_x86_64 #include "common/x64/cpu_detect.h" #include "common/x64/native_clock.h" +#include "common/x64/rdtsc.h" #endif namespace Common { class StandardWallClock final : public WallClock { public: - explicit StandardWallClock(u64 emulated_cpu_frequency_, u64 emulated_clock_frequency_) - : WallClock{emulated_cpu_frequency_, emulated_clock_frequency_, false}, - start_time{SteadyClock::Now()} {} + explicit StandardWallClock() : start_time{SteadyClock::Now()} {} - std::chrono::nanoseconds GetTimeNS() override { + std::chrono::nanoseconds GetTimeNS() const override { return SteadyClock::Now() - start_time; } - std::chrono::microseconds GetTimeUS() override { - return std::chrono::duration_cast(GetTimeNS()); + std::chrono::microseconds GetTimeUS() const override { + return static_cast(GetHostTicksElapsed() / NsToUsRatio::den); } - std::chrono::milliseconds GetTimeMS() override { - return std::chrono::duration_cast(GetTimeNS()); + std::chrono::milliseconds GetTimeMS() const override { + return static_cast(GetHostTicksElapsed() / NsToMsRatio::den); } - u64 GetClockCycles() override { - const u128 temp = Common::Multiply64Into128(GetTimeNS().count(), emulated_clock_frequency); - return Common::Divide128On32(temp, NS_RATIO).first; + u64 GetCNTPCT() const override { + return GetHostTicksElapsed() * NsToCNTPCTRatio::num / NsToCNTPCTRatio::den; } - u64 GetCPUCycles() override { - const u128 temp = Common::Multiply64Into128(GetTimeNS().count(), emulated_cpu_frequency); - return Common::Divide128On32(temp, NS_RATIO).first; + u64 GetHostTicksNow() const override { + return static_cast(SteadyClock::Now().time_since_epoch().count()); } - void Pause([[maybe_unused]] bool is_paused) override { - // Do nothing in this clock type. + u64 GetHostTicksElapsed() const override { + return static_cast(GetTimeNS().count()); + } + + bool IsNative() const override { + return false; } private: SteadyClock::time_point start_time; }; +std::unique_ptr CreateOptimalClock() { #ifdef ARCHITECTURE_x86_64 - -std::unique_ptr CreateBestMatchingClock(u64 emulated_cpu_frequency, - u64 emulated_clock_frequency) { const auto& caps = GetCPUCaps(); - u64 rtsc_frequency = 0; - if (caps.invariant_tsc) { - rtsc_frequency = caps.tsc_frequency ? caps.tsc_frequency : EstimateRDTSCFrequency(); - } - // Fallback to StandardWallClock if the hardware TSC does not have the precision greater than: - // - A nanosecond - // - The emulated CPU frequency - // - The emulated clock counter frequency (CNTFRQ) - if (rtsc_frequency <= WallClock::NS_RATIO || rtsc_frequency <= emulated_cpu_frequency || - rtsc_frequency <= emulated_clock_frequency) { - return std::make_unique(emulated_cpu_frequency, - emulated_clock_frequency); + if (caps.invariant_tsc && caps.tsc_frequency >= WallClock::CNTFRQ) { + return std::make_unique(caps.tsc_frequency); } else { - return std::make_unique(emulated_cpu_frequency, emulated_clock_frequency, - rtsc_frequency); + // Fallback to StandardWallClock if the hardware TSC + // - Is not invariant + // - Is not more precise than CNTFRQ + return std::make_unique(); } -} - #else - -std::unique_ptr CreateBestMatchingClock(u64 emulated_cpu_frequency, - u64 emulated_clock_frequency) { - return std::make_unique(emulated_cpu_frequency, emulated_clock_frequency); + return std::make_unique(); +#endif } -#endif - -std::unique_ptr CreateStandardWallClock(u64 emulated_cpu_frequency, - u64 emulated_clock_frequency) { - return std::make_unique(emulated_cpu_frequency, emulated_clock_frequency); +std::unique_ptr CreateStandardWallClock() { + return std::make_unique(); } } // namespace Common diff --git a/src/common/wall_clock.h b/src/common/wall_clock.h index 157ec5eae..a73e6e644 100644 --- a/src/common/wall_clock.h +++ b/src/common/wall_clock.h @@ -5,6 +5,7 @@ #include #include +#include #include "common/common_types.h" @@ -12,50 +13,43 @@ namespace Common { class WallClock { public: - static constexpr u64 NS_RATIO = 1'000'000'000; - static constexpr u64 US_RATIO = 1'000'000; - static constexpr u64 MS_RATIO = 1'000; + static constexpr u64 CNTFRQ = 19'200'000; // CNTPCT_EL0 Frequency = 19.2 MHz virtual ~WallClock() = default; - /// Returns current wall time in nanoseconds - [[nodiscard]] virtual std::chrono::nanoseconds GetTimeNS() = 0; + /// @returns The time in nanoseconds since the construction of this clock. + virtual std::chrono::nanoseconds GetTimeNS() const = 0; - /// Returns current wall time in microseconds - [[nodiscard]] virtual std::chrono::microseconds GetTimeUS() = 0; + /// @returns The time in microseconds since the construction of this clock. + virtual std::chrono::microseconds GetTimeUS() const = 0; - /// Returns current wall time in milliseconds - [[nodiscard]] virtual std::chrono::milliseconds GetTimeMS() = 0; + /// @returns The time in milliseconds since the construction of this clock. + virtual std::chrono::milliseconds GetTimeMS() const = 0; - /// Returns current wall time in emulated clock cycles - [[nodiscard]] virtual u64 GetClockCycles() = 0; + /// @returns The guest CNTPCT ticks since the construction of this clock. + virtual u64 GetCNTPCT() const = 0; - /// Returns current wall time in emulated cpu cycles - [[nodiscard]] virtual u64 GetCPUCycles() = 0; + /// @returns The raw host timer ticks since an indeterminate epoch. + virtual u64 GetHostTicksNow() const = 0; - virtual void Pause(bool is_paused) = 0; + /// @returns The raw host timer ticks since the construction of this clock. + virtual u64 GetHostTicksElapsed() const = 0; - /// Tells if the wall clock, uses the host CPU's hardware clock - [[nodiscard]] bool IsNative() const { - return is_native; - } + /// @returns Whether the clock directly uses the host's hardware clock. + virtual bool IsNative() const = 0; protected: - explicit WallClock(u64 emulated_cpu_frequency_, u64 emulated_clock_frequency_, bool is_native_) - : emulated_cpu_frequency{emulated_cpu_frequency_}, - emulated_clock_frequency{emulated_clock_frequency_}, is_native{is_native_} {} + using NsRatio = std::nano; + using UsRatio = std::micro; + using MsRatio = std::milli; - u64 emulated_cpu_frequency; - u64 emulated_clock_frequency; - -private: - bool is_native; + using NsToUsRatio = std::ratio_divide; + using NsToMsRatio = std::ratio_divide; + using NsToCNTPCTRatio = std::ratio; }; -[[nodiscard]] std::unique_ptr CreateBestMatchingClock(u64 emulated_cpu_frequency, - u64 emulated_clock_frequency); +std::unique_ptr CreateOptimalClock(); -[[nodiscard]] std::unique_ptr CreateStandardWallClock(u64 emulated_cpu_frequency, - u64 emulated_clock_frequency); +std::unique_ptr CreateStandardWallClock(); } // namespace Common diff --git a/src/common/x64/native_clock.cpp b/src/common/x64/native_clock.cpp index 277b00662..5d1eb0590 100644 --- a/src/common/x64/native_clock.cpp +++ b/src/common/x64/native_clock.cpp @@ -1,164 +1,45 @@ // SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later -#include -#include -#include - -#include "common/atomic_ops.h" -#include "common/steady_clock.h" #include "common/uint128.h" #include "common/x64/native_clock.h" +#include "common/x64/rdtsc.h" -#ifdef _MSC_VER -#include -#endif +namespace Common::X64 { -namespace Common { +NativeClock::NativeClock(u64 rdtsc_frequency_) + : start_ticks{FencedRDTSC()}, rdtsc_frequency{rdtsc_frequency_}, + ns_rdtsc_factor{GetFixedPoint64Factor(NsRatio::den, rdtsc_frequency)}, + us_rdtsc_factor{GetFixedPoint64Factor(UsRatio::den, rdtsc_frequency)}, + ms_rdtsc_factor{GetFixedPoint64Factor(MsRatio::den, rdtsc_frequency)}, + cntpct_rdtsc_factor{GetFixedPoint64Factor(CNTFRQ, rdtsc_frequency)} {} -#ifdef _MSC_VER -__forceinline static u64 FencedRDTSC() { - _mm_lfence(); - _ReadWriteBarrier(); - const u64 result = __rdtsc(); - _mm_lfence(); - _ReadWriteBarrier(); - return result; -} -#else -static u64 FencedRDTSC() { - u64 eax; - u64 edx; - asm volatile("lfence\n\t" - "rdtsc\n\t" - "lfence\n\t" - : "=a"(eax), "=d"(edx)); - return (edx << 32) | eax; -} -#endif - -template -static u64 RoundToNearest(u64 value) { - const auto mod = value % Nearest; - return mod >= (Nearest / 2) ? (value - mod + Nearest) : (value - mod); +std::chrono::nanoseconds NativeClock::GetTimeNS() const { + return std::chrono::nanoseconds{MultiplyHigh(GetHostTicksElapsed(), ns_rdtsc_factor)}; } -u64 EstimateRDTSCFrequency() { - // Discard the first result measuring the rdtsc. - FencedRDTSC(); - std::this_thread::sleep_for(std::chrono::milliseconds{1}); - FencedRDTSC(); - - // Get the current time. - const auto start_time = Common::RealTimeClock::Now(); - const u64 tsc_start = FencedRDTSC(); - // Wait for 250 milliseconds. - std::this_thread::sleep_for(std::chrono::milliseconds{250}); - const auto end_time = Common::RealTimeClock::Now(); - const u64 tsc_end = FencedRDTSC(); - // Calculate differences. - const u64 timer_diff = static_cast( - std::chrono::duration_cast(end_time - start_time).count()); - const u64 tsc_diff = tsc_end - tsc_start; - const u64 tsc_freq = MultiplyAndDivide64(tsc_diff, 1000000000ULL, timer_diff); - return RoundToNearest<1000>(tsc_freq); +std::chrono::microseconds NativeClock::GetTimeUS() const { + return std::chrono::microseconds{MultiplyHigh(GetHostTicksElapsed(), us_rdtsc_factor)}; } -namespace X64 { -NativeClock::NativeClock(u64 emulated_cpu_frequency_, u64 emulated_clock_frequency_, - u64 rtsc_frequency_) - : WallClock(emulated_cpu_frequency_, emulated_clock_frequency_, true), rtsc_frequency{ - rtsc_frequency_} { - // Thread to re-adjust the RDTSC frequency after 10 seconds has elapsed. - time_sync_thread = std::jthread{[this](std::stop_token token) { - // Get the current time. - const auto start_time = Common::RealTimeClock::Now(); - const u64 tsc_start = FencedRDTSC(); - // Wait for 10 seconds. - if (!Common::StoppableTimedWait(token, std::chrono::seconds{10})) { - return; - } - const auto end_time = Common::RealTimeClock::Now(); - const u64 tsc_end = FencedRDTSC(); - // Calculate differences. - const u64 timer_diff = static_cast( - std::chrono::duration_cast(end_time - start_time).count()); - const u64 tsc_diff = tsc_end - tsc_start; - const u64 tsc_freq = MultiplyAndDivide64(tsc_diff, 1000000000ULL, timer_diff); - rtsc_frequency = tsc_freq; - CalculateAndSetFactors(); - }}; - - time_point.inner.last_measure = FencedRDTSC(); - time_point.inner.accumulated_ticks = 0U; - CalculateAndSetFactors(); +std::chrono::milliseconds NativeClock::GetTimeMS() const { + return std::chrono::milliseconds{MultiplyHigh(GetHostTicksElapsed(), ms_rdtsc_factor)}; } -u64 NativeClock::GetRTSC() { - TimePoint new_time_point{}; - TimePoint current_time_point{}; - - current_time_point.pack = Common::AtomicLoad128(time_point.pack.data()); - do { - const u64 current_measure = FencedRDTSC(); - u64 diff = current_measure - current_time_point.inner.last_measure; - diff = diff & ~static_cast(static_cast(diff) >> 63); // max(diff, 0) - new_time_point.inner.last_measure = current_measure > current_time_point.inner.last_measure - ? current_measure - : current_time_point.inner.last_measure; - new_time_point.inner.accumulated_ticks = current_time_point.inner.accumulated_ticks + diff; - } while (!Common::AtomicCompareAndSwap(time_point.pack.data(), new_time_point.pack, - current_time_point.pack, current_time_point.pack)); - return new_time_point.inner.accumulated_ticks; +u64 NativeClock::GetCNTPCT() const { + return MultiplyHigh(GetHostTicksElapsed(), cntpct_rdtsc_factor); } -void NativeClock::Pause(bool is_paused) { - if (!is_paused) { - TimePoint current_time_point{}; - TimePoint new_time_point{}; - - current_time_point.pack = Common::AtomicLoad128(time_point.pack.data()); - do { - new_time_point.pack = current_time_point.pack; - new_time_point.inner.last_measure = FencedRDTSC(); - } while (!Common::AtomicCompareAndSwap(time_point.pack.data(), new_time_point.pack, - current_time_point.pack, current_time_point.pack)); - } +u64 NativeClock::GetHostTicksNow() const { + return FencedRDTSC(); } -std::chrono::nanoseconds NativeClock::GetTimeNS() { - const u64 rtsc_value = GetRTSC(); - return std::chrono::nanoseconds{MultiplyHigh(rtsc_value, ns_rtsc_factor)}; +u64 NativeClock::GetHostTicksElapsed() const { + return FencedRDTSC() - start_ticks; } -std::chrono::microseconds NativeClock::GetTimeUS() { - const u64 rtsc_value = GetRTSC(); - return std::chrono::microseconds{MultiplyHigh(rtsc_value, us_rtsc_factor)}; +bool NativeClock::IsNative() const { + return true; } -std::chrono::milliseconds NativeClock::GetTimeMS() { - const u64 rtsc_value = GetRTSC(); - return std::chrono::milliseconds{MultiplyHigh(rtsc_value, ms_rtsc_factor)}; -} - -u64 NativeClock::GetClockCycles() { - const u64 rtsc_value = GetRTSC(); - return MultiplyHigh(rtsc_value, clock_rtsc_factor); -} - -u64 NativeClock::GetCPUCycles() { - const u64 rtsc_value = GetRTSC(); - return MultiplyHigh(rtsc_value, cpu_rtsc_factor); -} - -void NativeClock::CalculateAndSetFactors() { - ns_rtsc_factor = GetFixedPoint64Factor(NS_RATIO, rtsc_frequency); - us_rtsc_factor = GetFixedPoint64Factor(US_RATIO, rtsc_frequency); - ms_rtsc_factor = GetFixedPoint64Factor(MS_RATIO, rtsc_frequency); - clock_rtsc_factor = GetFixedPoint64Factor(emulated_clock_frequency, rtsc_frequency); - cpu_rtsc_factor = GetFixedPoint64Factor(emulated_cpu_frequency, rtsc_frequency); -} - -} // namespace X64 - -} // namespace Common +} // namespace Common::X64 diff --git a/src/common/x64/native_clock.h b/src/common/x64/native_clock.h index 03ca291d8..d6f8626c1 100644 --- a/src/common/x64/native_clock.h +++ b/src/common/x64/native_clock.h @@ -3,58 +3,36 @@ #pragma once -#include "common/polyfill_thread.h" #include "common/wall_clock.h" -namespace Common { +namespace Common::X64 { -namespace X64 { class NativeClock final : public WallClock { public: - explicit NativeClock(u64 emulated_cpu_frequency_, u64 emulated_clock_frequency_, - u64 rtsc_frequency_); + explicit NativeClock(u64 rdtsc_frequency_); - std::chrono::nanoseconds GetTimeNS() override; + std::chrono::nanoseconds GetTimeNS() const override; - std::chrono::microseconds GetTimeUS() override; + std::chrono::microseconds GetTimeUS() const override; - std::chrono::milliseconds GetTimeMS() override; + std::chrono::milliseconds GetTimeMS() const override; - u64 GetClockCycles() override; + u64 GetCNTPCT() const override; - u64 GetCPUCycles() override; + u64 GetHostTicksNow() const override; - void Pause(bool is_paused) override; + u64 GetHostTicksElapsed() const override; + + bool IsNative() const override; private: - u64 GetRTSC(); + u64 start_ticks; + u64 rdtsc_frequency; - void CalculateAndSetFactors(); - - union alignas(16) TimePoint { - TimePoint() : pack{} {} - u128 pack{}; - struct Inner { - u64 last_measure{}; - u64 accumulated_ticks{}; - } inner; - }; - - TimePoint time_point; - - // factors - u64 clock_rtsc_factor{}; - u64 cpu_rtsc_factor{}; - u64 ns_rtsc_factor{}; - u64 us_rtsc_factor{}; - u64 ms_rtsc_factor{}; - - u64 rtsc_frequency; - - std::jthread time_sync_thread; + u64 ns_rdtsc_factor; + u64 us_rdtsc_factor; + u64 ms_rdtsc_factor; + u64 cntpct_rdtsc_factor; }; -} // namespace X64 -u64 EstimateRDTSCFrequency(); - -} // namespace Common +} // namespace Common::X64 From bbd502f67adb17b00b33f1b7158cfff2b2fa8a3e Mon Sep 17 00:00:00 2001 From: Morph <39850852+Morph1984@users.noreply.github.com> Date: Sat, 22 Apr 2023 23:58:27 -0400 Subject: [PATCH 3/8] nvnflinger: Acquire lock prior to signaling the vsync variable --- src/core/hle/service/nvnflinger/nvnflinger.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/core/hle/service/nvnflinger/nvnflinger.cpp b/src/core/hle/service/nvnflinger/nvnflinger.cpp index 4988e6e17..aa3356611 100644 --- a/src/core/hle/service/nvnflinger/nvnflinger.cpp +++ b/src/core/hle/service/nvnflinger/nvnflinger.cpp @@ -70,7 +70,8 @@ Nvnflinger::Nvnflinger(Core::System& system_, HosBinderDriverServer& hos_binder_ [this](std::uintptr_t, s64 time, std::chrono::nanoseconds ns_late) -> std::optional { vsync_signal.store(true); - vsync_signal.notify_all(); + { const auto lock_guard = Lock(); } + vsync_signal.notify_one(); return std::chrono::nanoseconds(GetNextTicks()); }); From 8e56a84566036cfff0aa5c3d80ae1b051d2bd0bf Mon Sep 17 00:00:00 2001 From: Morph <39850852+Morph1984@users.noreply.github.com> Date: Sun, 23 Apr 2023 00:01:08 -0400 Subject: [PATCH 4/8] core_timing: Use CNTPCT as the guest CPU tick Previously, we were mixing the raw CPU frequency and CNTFRQ. The raw CPU frequency (1020 MHz) should've never been used as CNTPCT (whose frequency is CNTFRQ) is the only counter available. --- src/audio_core/renderer/adsp/adsp.cpp | 1 - .../renderer/adsp/audio_renderer.cpp | 5 +- .../renderer/adsp/command_list_processor.cpp | 1 - .../command/performance/performance.cpp | 15 ++--- src/audio_core/sink/sink_stream.cpp | 1 - src/common/wall_clock.h | 17 ++++++ src/core/CMakeLists.txt | 1 - src/core/core_timing.cpp | 35 +++-------- src/core/core_timing.h | 11 +--- src/core/core_timing_util.h | 58 ------------------- src/core/hle/kernel/k_scheduler.cpp | 5 +- src/core/hle/kernel/svc/svc_info.cpp | 4 +- src/core/hle/service/hid/hidbus.cpp | 1 - src/video_core/gpu.cpp | 14 ++--- 14 files changed, 47 insertions(+), 122 deletions(-) delete mode 100644 src/core/core_timing_util.h diff --git a/src/audio_core/renderer/adsp/adsp.cpp b/src/audio_core/renderer/adsp/adsp.cpp index 74772fc50..b1db31e93 100644 --- a/src/audio_core/renderer/adsp/adsp.cpp +++ b/src/audio_core/renderer/adsp/adsp.cpp @@ -7,7 +7,6 @@ #include "common/logging/log.h" #include "core/core.h" #include "core/core_timing.h" -#include "core/core_timing_util.h" #include "core/memory.h" namespace AudioCore::AudioRenderer::ADSP { diff --git a/src/audio_core/renderer/adsp/audio_renderer.cpp b/src/audio_core/renderer/adsp/audio_renderer.cpp index 8bc39f9f9..9ca716b60 100644 --- a/src/audio_core/renderer/adsp/audio_renderer.cpp +++ b/src/audio_core/renderer/adsp/audio_renderer.cpp @@ -13,7 +13,6 @@ #include "common/thread.h" #include "core/core.h" #include "core/core_timing.h" -#include "core/core_timing_util.h" MICROPROFILE_DEFINE(Audio_Renderer, "Audio", "DSP", MP_RGB(60, 19, 97)); @@ -144,6 +143,7 @@ void AudioRenderer::ThreadFunc(std::stop_token stop_token) { mailbox->ADSPSendMessage(RenderMessage::AudioRenderer_InitializeOK); + // 0.12 seconds (2304000 / 19200000) constexpr u64 max_process_time{2'304'000ULL}; while (!stop_token.stop_requested()) { @@ -184,8 +184,7 @@ void AudioRenderer::ThreadFunc(std::stop_token stop_token) { u64 max_time{max_process_time}; if (index == 1 && command_buffer.applet_resource_user_id == mailbox->GetCommandBuffer(0).applet_resource_user_id) { - max_time = max_process_time - - Core::Timing::CyclesToNs(render_times_taken[0]).count(); + max_time = max_process_time - render_times_taken[0]; if (render_times_taken[0] > max_process_time) { max_time = 0; } diff --git a/src/audio_core/renderer/adsp/command_list_processor.cpp b/src/audio_core/renderer/adsp/command_list_processor.cpp index 7a300d216..3a0f1ae38 100644 --- a/src/audio_core/renderer/adsp/command_list_processor.cpp +++ b/src/audio_core/renderer/adsp/command_list_processor.cpp @@ -9,7 +9,6 @@ #include "common/settings.h" #include "core/core.h" #include "core/core_timing.h" -#include "core/core_timing_util.h" #include "core/memory.h" namespace AudioCore::AudioRenderer::ADSP { diff --git a/src/audio_core/renderer/command/performance/performance.cpp b/src/audio_core/renderer/command/performance/performance.cpp index 985958b03..4a881547f 100644 --- a/src/audio_core/renderer/command/performance/performance.cpp +++ b/src/audio_core/renderer/command/performance/performance.cpp @@ -5,7 +5,6 @@ #include "audio_core/renderer/command/performance/performance.h" #include "core/core.h" #include "core/core_timing.h" -#include "core/core_timing_util.h" namespace AudioCore::AudioRenderer { @@ -18,20 +17,18 @@ void PerformanceCommand::Process(const ADSP::CommandListProcessor& processor) { auto base{entry_address.translated_address}; if (state == PerformanceState::Start) { auto start_time_ptr{reinterpret_cast(base + entry_address.entry_start_time_offset)}; - *start_time_ptr = static_cast( - Core::Timing::CyclesToUs(processor.system->CoreTiming().GetClockTicks() - - processor.start_time - processor.current_processing_time) - .count()); + *start_time_ptr = + static_cast(processor.system->CoreTiming().GetClockTicks() - processor.start_time - + processor.current_processing_time); } else if (state == PerformanceState::Stop) { auto processed_time_ptr{ reinterpret_cast(base + entry_address.entry_processed_time_offset)}; auto entry_count_ptr{ reinterpret_cast(base + entry_address.header_entry_count_offset)}; - *processed_time_ptr = static_cast( - Core::Timing::CyclesToUs(processor.system->CoreTiming().GetClockTicks() - - processor.start_time - processor.current_processing_time) - .count()); + *processed_time_ptr = + static_cast(processor.system->CoreTiming().GetClockTicks() - processor.start_time - + processor.current_processing_time); (*entry_count_ptr)++; } } diff --git a/src/audio_core/sink/sink_stream.cpp b/src/audio_core/sink/sink_stream.cpp index f44fedfd5..9a718a9cc 100644 --- a/src/audio_core/sink/sink_stream.cpp +++ b/src/audio_core/sink/sink_stream.cpp @@ -15,7 +15,6 @@ #include "common/settings.h" #include "core/core.h" #include "core/core_timing.h" -#include "core/core_timing_util.h" namespace AudioCore::Sink { diff --git a/src/common/wall_clock.h b/src/common/wall_clock.h index a73e6e644..56c18ca25 100644 --- a/src/common/wall_clock.h +++ b/src/common/wall_clock.h @@ -38,6 +38,22 @@ public: /// @returns Whether the clock directly uses the host's hardware clock. virtual bool IsNative() const = 0; + static inline u64 NSToCNTPCT(u64 ns) { + return ns * NsToCNTPCTRatio::num / NsToCNTPCTRatio::den; + } + + static inline u64 USToCNTPCT(u64 us) { + return us * UsToCNTPCTRatio::num / UsToCNTPCTRatio::den; + } + + static inline u64 CNTPCTToNS(u64 cntpct) { + return cntpct * NsToCNTPCTRatio::den / NsToCNTPCTRatio::num; + } + + static inline u64 CNTPCTToUS(u64 cntpct) { + return cntpct * UsToCNTPCTRatio::den / UsToCNTPCTRatio::num; + } + protected: using NsRatio = std::nano; using UsRatio = std::micro; @@ -46,6 +62,7 @@ protected: using NsToUsRatio = std::ratio_divide; using NsToMsRatio = std::ratio_divide; using NsToCNTPCTRatio = std::ratio; + using UsToCNTPCTRatio = std::ratio; }; std::unique_ptr CreateOptimalClock(); diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt index 99602699a..3df4094a7 100644 --- a/src/core/CMakeLists.txt +++ b/src/core/CMakeLists.txt @@ -16,7 +16,6 @@ add_library(core STATIC core.h core_timing.cpp core_timing.h - core_timing_util.h cpu_manager.cpp cpu_manager.h crypto/aes_util.cpp diff --git a/src/core/core_timing.cpp b/src/core/core_timing.cpp index 4f2692b05..9a1d5a69a 100644 --- a/src/core/core_timing.cpp +++ b/src/core/core_timing.cpp @@ -16,7 +16,6 @@ #include "common/microprofile.h" #include "core/core_timing.h" -#include "core/core_timing_util.h" #include "core/hardware_properties.h" namespace Core::Timing { @@ -45,9 +44,7 @@ struct CoreTiming::Event { } }; -CoreTiming::CoreTiming() - : cpu_clock{Common::CreateBestMatchingClock(Hardware::BASE_CLOCK_RATE, Hardware::CNTFREQ)}, - event_clock{Common::CreateStandardWallClock(Hardware::BASE_CLOCK_RATE, Hardware::CNTFREQ)} {} +CoreTiming::CoreTiming() : clock{Common::CreateOptimalClock()} {} CoreTiming::~CoreTiming() { Reset(); @@ -180,7 +177,7 @@ void CoreTiming::AddTicks(u64 ticks_to_add) { void CoreTiming::Idle() { if (!event_queue.empty()) { const u64 next_event_time = event_queue.front().time; - const u64 next_ticks = nsToCycles(std::chrono::nanoseconds(next_event_time)) + 10U; + const u64 next_ticks = Common::WallClock::NSToCNTPCT(next_event_time) + 10U; if (next_ticks > ticks) { ticks = next_ticks; } @@ -193,18 +190,11 @@ void CoreTiming::ResetTicks() { downcount = MAX_SLICE_LENGTH; } -u64 CoreTiming::GetCPUTicks() const { - if (is_multicore) [[likely]] { - return cpu_clock->GetCPUCycles(); - } - return ticks; -} - u64 CoreTiming::GetClockTicks() const { if (is_multicore) [[likely]] { - return cpu_clock->GetClockCycles(); + return clock->GetCNTPCT(); } - return CpuCyclesToClockCycles(ticks); + return ticks; } std::optional CoreTiming::Advance() { @@ -297,9 +287,7 @@ void CoreTiming::ThreadLoop() { } paused_set = true; - event_clock->Pause(true); pause_event.Wait(); - event_clock->Pause(false); } } @@ -315,25 +303,18 @@ void CoreTiming::Reset() { has_started = false; } -std::chrono::nanoseconds CoreTiming::GetCPUTimeNs() const { - if (is_multicore) [[likely]] { - return cpu_clock->GetTimeNS(); - } - return CyclesToNs(ticks); -} - std::chrono::nanoseconds CoreTiming::GetGlobalTimeNs() const { if (is_multicore) [[likely]] { - return event_clock->GetTimeNS(); + return clock->GetTimeNS(); } - return CyclesToNs(ticks); + return std::chrono::nanoseconds{Common::WallClock::CNTPCTToNS(ticks)}; } std::chrono::microseconds CoreTiming::GetGlobalTimeUs() const { if (is_multicore) [[likely]] { - return event_clock->GetTimeUS(); + return clock->GetTimeUS(); } - return CyclesToUs(ticks); + return std::chrono::microseconds{Common::WallClock::CNTPCTToUS(ticks)}; } } // namespace Core::Timing diff --git a/src/core/core_timing.h b/src/core/core_timing.h index e7c4a949f..fdacdd94a 100644 --- a/src/core/core_timing.h +++ b/src/core/core_timing.h @@ -116,15 +116,9 @@ public: return downcount; } - /// Returns current time in emulated CPU cycles - u64 GetCPUTicks() const; - - /// Returns current time in emulated in Clock cycles + /// Returns the current CNTPCT tick value. u64 GetClockTicks() const; - /// Returns current time in nanoseconds. - std::chrono::nanoseconds GetCPUTimeNs() const; - /// Returns current time in microseconds. std::chrono::microseconds GetGlobalTimeUs() const; @@ -142,8 +136,7 @@ private: void Reset(); - std::unique_ptr cpu_clock; - std::unique_ptr event_clock; + std::unique_ptr clock; s64 global_timer = 0; diff --git a/src/core/core_timing_util.h b/src/core/core_timing_util.h deleted file mode 100644 index fe5aaefc7..000000000 --- a/src/core/core_timing_util.h +++ /dev/null @@ -1,58 +0,0 @@ -// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project -// SPDX-License-Identifier: GPL-2.0-or-later - -#pragma once - -#include - -#include "common/common_types.h" -#include "core/hardware_properties.h" - -namespace Core::Timing { - -namespace detail { -constexpr u64 CNTFREQ_ADJUSTED = Hardware::CNTFREQ / 1000; -constexpr u64 BASE_CLOCK_RATE_ADJUSTED = Hardware::BASE_CLOCK_RATE / 1000; -} // namespace detail - -[[nodiscard]] constexpr s64 msToCycles(std::chrono::milliseconds ms) { - return ms.count() * detail::BASE_CLOCK_RATE_ADJUSTED; -} - -[[nodiscard]] constexpr s64 usToCycles(std::chrono::microseconds us) { - return us.count() * detail::BASE_CLOCK_RATE_ADJUSTED / 1000; -} - -[[nodiscard]] constexpr s64 nsToCycles(std::chrono::nanoseconds ns) { - return ns.count() * detail::BASE_CLOCK_RATE_ADJUSTED / 1000000; -} - -[[nodiscard]] constexpr u64 msToClockCycles(std::chrono::milliseconds ms) { - return static_cast(ms.count()) * detail::CNTFREQ_ADJUSTED; -} - -[[nodiscard]] constexpr u64 usToClockCycles(std::chrono::microseconds us) { - return us.count() * detail::CNTFREQ_ADJUSTED / 1000; -} - -[[nodiscard]] constexpr u64 nsToClockCycles(std::chrono::nanoseconds ns) { - return ns.count() * detail::CNTFREQ_ADJUSTED / 1000000; -} - -[[nodiscard]] constexpr u64 CpuCyclesToClockCycles(u64 ticks) { - return ticks * detail::CNTFREQ_ADJUSTED / detail::BASE_CLOCK_RATE_ADJUSTED; -} - -[[nodiscard]] constexpr std::chrono::milliseconds CyclesToMs(s64 cycles) { - return std::chrono::milliseconds(cycles / detail::BASE_CLOCK_RATE_ADJUSTED); -} - -[[nodiscard]] constexpr std::chrono::nanoseconds CyclesToNs(s64 cycles) { - return std::chrono::nanoseconds(cycles * 1000000 / detail::BASE_CLOCK_RATE_ADJUSTED); -} - -[[nodiscard]] constexpr std::chrono::microseconds CyclesToUs(s64 cycles) { - return std::chrono::microseconds(cycles * 1000 / detail::BASE_CLOCK_RATE_ADJUSTED); -} - -} // namespace Core::Timing diff --git a/src/core/hle/kernel/k_scheduler.cpp b/src/core/hle/kernel/k_scheduler.cpp index faa12b4f0..75ce5a23c 100644 --- a/src/core/hle/kernel/k_scheduler.cpp +++ b/src/core/hle/kernel/k_scheduler.cpp @@ -184,7 +184,8 @@ u64 KScheduler::UpdateHighestPriorityThread(KThread* highest_thread) { prev_highest_thread != highest_thread) [[likely]] { if (prev_highest_thread != nullptr) [[likely]] { IncrementScheduledCount(prev_highest_thread); - prev_highest_thread->SetLastScheduledTick(m_kernel.System().CoreTiming().GetCPUTicks()); + prev_highest_thread->SetLastScheduledTick( + m_kernel.System().CoreTiming().GetClockTicks()); } if (m_state.should_count_idle) { if (highest_thread != nullptr) [[likely]] { @@ -351,7 +352,7 @@ void KScheduler::SwitchThread(KThread* next_thread) { // Update the CPU time tracking variables. const s64 prev_tick = m_last_context_switch_time; - const s64 cur_tick = m_kernel.System().CoreTiming().GetCPUTicks(); + const s64 cur_tick = m_kernel.System().CoreTiming().GetClockTicks(); const s64 tick_diff = cur_tick - prev_tick; cur_thread->AddCpuTime(m_core_id, tick_diff); if (cur_process != nullptr) { diff --git a/src/core/hle/kernel/svc/svc_info.cpp b/src/core/hle/kernel/svc/svc_info.cpp index 2b2c878b5..445cdd87b 100644 --- a/src/core/hle/kernel/svc/svc_info.cpp +++ b/src/core/hle/kernel/svc/svc_info.cpp @@ -199,9 +199,9 @@ Result GetInfo(Core::System& system, u64* result, InfoType info_id_type, Handle if (same_thread && info_sub_id == 0xFFFFFFFFFFFFFFFF) { const u64 thread_ticks = current_thread->GetCpuTime(); - out_ticks = thread_ticks + (core_timing.GetCPUTicks() - prev_ctx_ticks); + out_ticks = thread_ticks + (core_timing.GetClockTicks() - prev_ctx_ticks); } else if (same_thread && info_sub_id == system.Kernel().CurrentPhysicalCoreIndex()) { - out_ticks = core_timing.GetCPUTicks() - prev_ctx_ticks; + out_ticks = core_timing.GetClockTicks() - prev_ctx_ticks; } *result = out_ticks; diff --git a/src/core/hle/service/hid/hidbus.cpp b/src/core/hle/service/hid/hidbus.cpp index 5604a6fda..80aac221b 100644 --- a/src/core/hle/service/hid/hidbus.cpp +++ b/src/core/hle/service/hid/hidbus.cpp @@ -5,7 +5,6 @@ #include "common/settings.h" #include "core/core.h" #include "core/core_timing.h" -#include "core/core_timing_util.h" #include "core/hid/hid_types.h" #include "core/hle/kernel/k_event.h" #include "core/hle/kernel/k_readable_event.h" diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp index 456f733cf..70762c51a 100644 --- a/src/video_core/gpu.cpp +++ b/src/video_core/gpu.cpp @@ -194,17 +194,17 @@ struct GPU::Impl { [[nodiscard]] u64 GetTicks() const { // This values were reversed engineered by fincs from NVN - // The gpu clock is reported in units of 385/625 nanoseconds - constexpr u64 gpu_ticks_num = 384; - constexpr u64 gpu_ticks_den = 625; + // The GPU clock is 614.4 MHz + using NsToGPUTickRatio = std::ratio<614'400'000, std::nano::den>; + static_assert(NsToGPUTickRatio::num == 384 && NsToGPUTickRatio::den == 625); + + u64 nanoseconds = system.CoreTiming().GetGlobalTimeNs().count(); - u64 nanoseconds = system.CoreTiming().GetCPUTimeNs().count(); if (Settings::values.use_fast_gpu_time.GetValue()) { nanoseconds /= 256; } - const u64 nanoseconds_num = nanoseconds / gpu_ticks_den; - const u64 nanoseconds_rem = nanoseconds % gpu_ticks_den; - return nanoseconds_num * gpu_ticks_num + (nanoseconds_rem * gpu_ticks_num) / gpu_ticks_den; + + return nanoseconds * NsToGPUTickRatio::num / NsToGPUTickRatio::den; } [[nodiscard]] bool IsAsync() const { From 9dcc7bde8bb05dbc62fa196bcbe1484762e66917 Mon Sep 17 00:00:00 2001 From: Morph <39850852+Morph1984@users.noreply.github.com> Date: Sun, 23 Apr 2023 00:09:49 -0400 Subject: [PATCH 5/8] time: Use compile time division for TimeSpanType conversion --- src/core/hle/service/time/clock_types.h | 13 ++++++++----- .../hle/service/time/standard_steady_clock_core.cpp | 2 +- .../service/time/tick_based_steady_clock_core.cpp | 2 +- src/core/hle/service/time/time.cpp | 4 ++-- src/core/hle/service/time/time_sharedmemory.cpp | 5 +++-- 5 files changed, 15 insertions(+), 11 deletions(-) diff --git a/src/core/hle/service/time/clock_types.h b/src/core/hle/service/time/clock_types.h index e6293ffb9..9fc01ea90 100644 --- a/src/core/hle/service/time/clock_types.h +++ b/src/core/hle/service/time/clock_types.h @@ -3,6 +3,8 @@ #pragma once +#include + #include "common/common_funcs.h" #include "common/common_types.h" #include "common/uuid.h" @@ -74,18 +76,19 @@ static_assert(std::is_trivially_copyable_v, /// https://switchbrew.org/wiki/Glue_services#TimeSpanType struct TimeSpanType { s64 nanoseconds{}; - static constexpr s64 ns_per_second{1000000000ULL}; s64 ToSeconds() const { - return nanoseconds / ns_per_second; + return nanoseconds / std::nano::den; } static TimeSpanType FromSeconds(s64 seconds) { - return {seconds * ns_per_second}; + return {seconds * std::nano::den}; } - static TimeSpanType FromTicks(u64 ticks, u64 frequency) { - return FromSeconds(static_cast(ticks) / static_cast(frequency)); + template + static TimeSpanType FromTicks(u64 ticks) { + using TicksToNSRatio = std::ratio; + return {static_cast(ticks * TicksToNSRatio::num / TicksToNSRatio::den)}; } }; static_assert(sizeof(TimeSpanType) == 8, "TimeSpanType is incorrect size"); diff --git a/src/core/hle/service/time/standard_steady_clock_core.cpp b/src/core/hle/service/time/standard_steady_clock_core.cpp index 3dbbb9850..5627b7003 100644 --- a/src/core/hle/service/time/standard_steady_clock_core.cpp +++ b/src/core/hle/service/time/standard_steady_clock_core.cpp @@ -10,7 +10,7 @@ namespace Service::Time::Clock { TimeSpanType StandardSteadyClockCore::GetCurrentRawTimePoint(Core::System& system) { const TimeSpanType ticks_time_span{ - TimeSpanType::FromTicks(system.CoreTiming().GetClockTicks(), Core::Hardware::CNTFREQ)}; + TimeSpanType::FromTicks(system.CoreTiming().GetClockTicks())}; TimeSpanType raw_time_point{setup_value.nanoseconds + ticks_time_span.nanoseconds}; if (raw_time_point.nanoseconds < cached_raw_time_point.nanoseconds) { diff --git a/src/core/hle/service/time/tick_based_steady_clock_core.cpp b/src/core/hle/service/time/tick_based_steady_clock_core.cpp index 27600413e..0d9fb3143 100644 --- a/src/core/hle/service/time/tick_based_steady_clock_core.cpp +++ b/src/core/hle/service/time/tick_based_steady_clock_core.cpp @@ -10,7 +10,7 @@ namespace Service::Time::Clock { SteadyClockTimePoint TickBasedSteadyClockCore::GetTimePoint(Core::System& system) { const TimeSpanType ticks_time_span{ - TimeSpanType::FromTicks(system.CoreTiming().GetClockTicks(), Core::Hardware::CNTFREQ)}; + TimeSpanType::FromTicks(system.CoreTiming().GetClockTicks())}; return {ticks_time_span.ToSeconds(), GetClockSourceId()}; } diff --git a/src/core/hle/service/time/time.cpp b/src/core/hle/service/time/time.cpp index 868be60c5..7197ca30f 100644 --- a/src/core/hle/service/time/time.cpp +++ b/src/core/hle/service/time/time.cpp @@ -240,8 +240,8 @@ void Module::Interface::CalculateMonotonicSystemClockBaseTimePoint(HLERequestCon const auto current_time_point{steady_clock_core.GetCurrentTimePoint(system)}; if (current_time_point.clock_source_id == context.steady_time_point.clock_source_id) { - const auto ticks{Clock::TimeSpanType::FromTicks(system.CoreTiming().GetClockTicks(), - Core::Hardware::CNTFREQ)}; + const auto ticks{Clock::TimeSpanType::FromTicks( + system.CoreTiming().GetClockTicks())}; const s64 base_time_point{context.offset + current_time_point.time_point - ticks.ToSeconds()}; IPC::ResponseBuilder rb{ctx, (sizeof(s64) / 4) + 2}; diff --git a/src/core/hle/service/time/time_sharedmemory.cpp b/src/core/hle/service/time/time_sharedmemory.cpp index ce1c85bcc..a00676669 100644 --- a/src/core/hle/service/time/time_sharedmemory.cpp +++ b/src/core/hle/service/time/time_sharedmemory.cpp @@ -21,8 +21,9 @@ SharedMemory::~SharedMemory() = default; void SharedMemory::SetupStandardSteadyClock(const Common::UUID& clock_source_id, Clock::TimeSpanType current_time_point) { - const Clock::TimeSpanType ticks_time_span{Clock::TimeSpanType::FromTicks( - system.CoreTiming().GetClockTicks(), Core::Hardware::CNTFREQ)}; + const Clock::TimeSpanType ticks_time_span{ + Clock::TimeSpanType::FromTicks( + system.CoreTiming().GetClockTicks())}; const Clock::SteadyClockContext context{ static_cast(current_time_point.nanoseconds - ticks_time_span.nanoseconds), clock_source_id}; From 907507886d755fa56099713c4b8f05bb640a8b7d Mon Sep 17 00:00:00 2001 From: Morph <39850852+Morph1984@users.noreply.github.com> Date: Sun, 28 May 2023 17:45:47 -0400 Subject: [PATCH 6/8] (wall, native)_clock: Add GetGPUTick Allows us to directly calculate the GPU tick without double conversion to and from the host clock tick. --- src/common/wall_clock.cpp | 8 ++++++-- src/common/wall_clock.h | 20 +++++++++++++++++++- src/common/x64/native_clock.cpp | 7 ++++++- src/common/x64/native_clock.h | 3 +++ src/core/core_timing.cpp | 7 +++++++ src/core/core_timing.h | 3 +++ src/video_core/gpu.cpp | 11 +++-------- 7 files changed, 47 insertions(+), 12 deletions(-) diff --git a/src/common/wall_clock.cpp b/src/common/wall_clock.cpp index ad8db06b0..dc0dcbd68 100644 --- a/src/common/wall_clock.cpp +++ b/src/common/wall_clock.cpp @@ -32,6 +32,10 @@ public: return GetHostTicksElapsed() * NsToCNTPCTRatio::num / NsToCNTPCTRatio::den; } + u64 GetGPUTick() const override { + return GetHostTicksElapsed() * NsToGPUTickRatio::num / NsToGPUTickRatio::den; + } + u64 GetHostTicksNow() const override { return static_cast(SteadyClock::Now().time_since_epoch().count()); } @@ -52,12 +56,12 @@ std::unique_ptr CreateOptimalClock() { #ifdef ARCHITECTURE_x86_64 const auto& caps = GetCPUCaps(); - if (caps.invariant_tsc && caps.tsc_frequency >= WallClock::CNTFRQ) { + if (caps.invariant_tsc && caps.tsc_frequency >= WallClock::GPUTickFreq) { return std::make_unique(caps.tsc_frequency); } else { // Fallback to StandardWallClock if the hardware TSC // - Is not invariant - // - Is not more precise than CNTFRQ + // - Is not more precise than GPUTickFreq return std::make_unique(); } #else diff --git a/src/common/wall_clock.h b/src/common/wall_clock.h index 56c18ca25..fcfdd637c 100644 --- a/src/common/wall_clock.h +++ b/src/common/wall_clock.h @@ -13,7 +13,8 @@ namespace Common { class WallClock { public: - static constexpr u64 CNTFRQ = 19'200'000; // CNTPCT_EL0 Frequency = 19.2 MHz + static constexpr u64 CNTFRQ = 19'200'000; // CNTPCT_EL0 Frequency = 19.2 MHz + static constexpr u64 GPUTickFreq = 614'400'000; // GM20B GPU Tick Frequency = 614.4 MHz virtual ~WallClock() = default; @@ -29,6 +30,9 @@ public: /// @returns The guest CNTPCT ticks since the construction of this clock. virtual u64 GetCNTPCT() const = 0; + /// @returns The guest GPU ticks since the construction of this clock. + virtual u64 GetGPUTick() const = 0; + /// @returns The raw host timer ticks since an indeterminate epoch. virtual u64 GetHostTicksNow() const = 0; @@ -46,6 +50,10 @@ public: return us * UsToCNTPCTRatio::num / UsToCNTPCTRatio::den; } + static inline u64 NSToGPUTick(u64 ns) { + return ns * NsToGPUTickRatio::num / NsToGPUTickRatio::den; + } + static inline u64 CNTPCTToNS(u64 cntpct) { return cntpct * NsToCNTPCTRatio::den / NsToCNTPCTRatio::num; } @@ -54,6 +62,14 @@ public: return cntpct * UsToCNTPCTRatio::den / UsToCNTPCTRatio::num; } + static inline u64 GPUTickToNS(u64 gpu_tick) { + return gpu_tick * NsToGPUTickRatio::den / NsToGPUTickRatio::num; + } + + static inline u64 CNTPCTToGPUTick(u64 cntpct) { + return cntpct * CNTPCTToGPUTickRatio::num / CNTPCTToGPUTickRatio::den; + } + protected: using NsRatio = std::nano; using UsRatio = std::micro; @@ -63,6 +79,8 @@ protected: using NsToMsRatio = std::ratio_divide; using NsToCNTPCTRatio = std::ratio; using UsToCNTPCTRatio = std::ratio; + using NsToGPUTickRatio = std::ratio; + using CNTPCTToGPUTickRatio = std::ratio; }; std::unique_ptr CreateOptimalClock(); diff --git a/src/common/x64/native_clock.cpp b/src/common/x64/native_clock.cpp index 5d1eb0590..7d2a26bd9 100644 --- a/src/common/x64/native_clock.cpp +++ b/src/common/x64/native_clock.cpp @@ -12,7 +12,8 @@ NativeClock::NativeClock(u64 rdtsc_frequency_) ns_rdtsc_factor{GetFixedPoint64Factor(NsRatio::den, rdtsc_frequency)}, us_rdtsc_factor{GetFixedPoint64Factor(UsRatio::den, rdtsc_frequency)}, ms_rdtsc_factor{GetFixedPoint64Factor(MsRatio::den, rdtsc_frequency)}, - cntpct_rdtsc_factor{GetFixedPoint64Factor(CNTFRQ, rdtsc_frequency)} {} + cntpct_rdtsc_factor{GetFixedPoint64Factor(CNTFRQ, rdtsc_frequency)}, + gputick_rdtsc_factor{GetFixedPoint64Factor(GPUTickFreq, rdtsc_frequency)} {} std::chrono::nanoseconds NativeClock::GetTimeNS() const { return std::chrono::nanoseconds{MultiplyHigh(GetHostTicksElapsed(), ns_rdtsc_factor)}; @@ -30,6 +31,10 @@ u64 NativeClock::GetCNTPCT() const { return MultiplyHigh(GetHostTicksElapsed(), cntpct_rdtsc_factor); } +u64 NativeClock::GetGPUTick() const { + return MultiplyHigh(GetHostTicksElapsed(), gputick_rdtsc_factor); +} + u64 NativeClock::GetHostTicksNow() const { return FencedRDTSC(); } diff --git a/src/common/x64/native_clock.h b/src/common/x64/native_clock.h index d6f8626c1..334415eff 100644 --- a/src/common/x64/native_clock.h +++ b/src/common/x64/native_clock.h @@ -19,6 +19,8 @@ public: u64 GetCNTPCT() const override; + u64 GetGPUTick() const override; + u64 GetHostTicksNow() const override; u64 GetHostTicksElapsed() const override; @@ -33,6 +35,7 @@ private: u64 us_rdtsc_factor; u64 ms_rdtsc_factor; u64 cntpct_rdtsc_factor; + u64 gputick_rdtsc_factor; }; } // namespace Common::X64 diff --git a/src/core/core_timing.cpp b/src/core/core_timing.cpp index 9a1d5a69a..e57bca70a 100644 --- a/src/core/core_timing.cpp +++ b/src/core/core_timing.cpp @@ -197,6 +197,13 @@ u64 CoreTiming::GetClockTicks() const { return ticks; } +u64 CoreTiming::GetGPUTicks() const { + if (is_multicore) [[likely]] { + return clock->GetGPUTick(); + } + return Common::WallClock::CNTPCTToGPUTick(ticks); +} + std::optional CoreTiming::Advance() { std::scoped_lock lock{advance_lock, basic_lock}; global_timer = GetGlobalTimeNs().count(); diff --git a/src/core/core_timing.h b/src/core/core_timing.h index fdacdd94a..1873852c4 100644 --- a/src/core/core_timing.h +++ b/src/core/core_timing.h @@ -119,6 +119,9 @@ public: /// Returns the current CNTPCT tick value. u64 GetClockTicks() const; + /// Returns the current GPU tick value. + u64 GetGPUTicks() const; + /// Returns current time in microseconds. std::chrono::microseconds GetGlobalTimeUs() const; diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp index 70762c51a..db385076d 100644 --- a/src/video_core/gpu.cpp +++ b/src/video_core/gpu.cpp @@ -193,18 +193,13 @@ struct GPU::Impl { } [[nodiscard]] u64 GetTicks() const { - // This values were reversed engineered by fincs from NVN - // The GPU clock is 614.4 MHz - using NsToGPUTickRatio = std::ratio<614'400'000, std::nano::den>; - static_assert(NsToGPUTickRatio::num == 384 && NsToGPUTickRatio::den == 625); - - u64 nanoseconds = system.CoreTiming().GetGlobalTimeNs().count(); + u64 gpu_tick = system.CoreTiming().GetGPUTicks(); if (Settings::values.use_fast_gpu_time.GetValue()) { - nanoseconds /= 256; + gpu_tick /= 256; } - return nanoseconds * NsToGPUTickRatio::num / NsToGPUTickRatio::den; + return gpu_tick; } [[nodiscard]] bool IsAsync() const { From 2e1e7254436b032f133372c76d9484aa756d56df Mon Sep 17 00:00:00 2001 From: Morph <39850852+Morph1984@users.noreply.github.com> Date: Wed, 7 Jun 2023 21:38:28 -0400 Subject: [PATCH 7/8] core_timing: Fix SingleCore cycle timer --- src/common/wall_clock.h | 36 +++++++++++++++------------- src/core/core_timing.cpp | 26 +++++++------------- src/core/core_timing.h | 2 +- src/core/hle/kernel/svc/svc_tick.cpp | 10 +------- 4 files changed, 31 insertions(+), 43 deletions(-) diff --git a/src/common/wall_clock.h b/src/common/wall_clock.h index fcfdd637c..f45d3d8c5 100644 --- a/src/common/wall_clock.h +++ b/src/common/wall_clock.h @@ -13,8 +13,9 @@ namespace Common { class WallClock { public: - static constexpr u64 CNTFRQ = 19'200'000; // CNTPCT_EL0 Frequency = 19.2 MHz - static constexpr u64 GPUTickFreq = 614'400'000; // GM20B GPU Tick Frequency = 614.4 MHz + static constexpr u64 CNTFRQ = 19'200'000; // CNTPCT_EL0 Frequency = 19.2 MHz + static constexpr u64 GPUTickFreq = 614'400'000; // GM20B GPU Tick Frequency = 614.4 MHz + static constexpr u64 CPUTickFreq = 1'020'000'000; // T210/4 A57 CPU Tick Frequency = 1020.0 MHz virtual ~WallClock() = default; @@ -46,28 +47,26 @@ public: return ns * NsToCNTPCTRatio::num / NsToCNTPCTRatio::den; } - static inline u64 USToCNTPCT(u64 us) { - return us * UsToCNTPCTRatio::num / UsToCNTPCTRatio::den; - } - static inline u64 NSToGPUTick(u64 ns) { return ns * NsToGPUTickRatio::num / NsToGPUTickRatio::den; } - static inline u64 CNTPCTToNS(u64 cntpct) { - return cntpct * NsToCNTPCTRatio::den / NsToCNTPCTRatio::num; + // Cycle Timing + + static inline u64 CPUTickToNS(u64 cpu_tick) { + return cpu_tick * CPUTickToNsRatio::num / CPUTickToNsRatio::den; } - static inline u64 CNTPCTToUS(u64 cntpct) { - return cntpct * UsToCNTPCTRatio::den / UsToCNTPCTRatio::num; + static inline u64 CPUTickToUS(u64 cpu_tick) { + return cpu_tick * CPUTickToUsRatio::num / CPUTickToUsRatio::den; } - static inline u64 GPUTickToNS(u64 gpu_tick) { - return gpu_tick * NsToGPUTickRatio::den / NsToGPUTickRatio::num; + static inline u64 CPUTickToCNTPCT(u64 cpu_tick) { + return cpu_tick * CPUTickToCNTPCTRatio::num / CPUTickToCNTPCTRatio::den; } - static inline u64 CNTPCTToGPUTick(u64 cntpct) { - return cntpct * CNTPCTToGPUTickRatio::num / CNTPCTToGPUTickRatio::den; + static inline u64 CPUTickToGPUTick(u64 cpu_tick) { + return cpu_tick * CPUTickToGPUTickRatio::num / CPUTickToGPUTickRatio::den; } protected: @@ -78,9 +77,14 @@ protected: using NsToUsRatio = std::ratio_divide; using NsToMsRatio = std::ratio_divide; using NsToCNTPCTRatio = std::ratio; - using UsToCNTPCTRatio = std::ratio; using NsToGPUTickRatio = std::ratio; - using CNTPCTToGPUTickRatio = std::ratio; + + // Cycle Timing + + using CPUTickToNsRatio = std::ratio; + using CPUTickToUsRatio = std::ratio; + using CPUTickToCNTPCTRatio = std::ratio; + using CPUTickToGPUTickRatio = std::ratio; }; std::unique_ptr CreateOptimalClock(); diff --git a/src/core/core_timing.cpp b/src/core/core_timing.cpp index e57bca70a..4f0a3f8ea 100644 --- a/src/core/core_timing.cpp +++ b/src/core/core_timing.cpp @@ -20,7 +20,7 @@ namespace Core::Timing { -constexpr s64 MAX_SLICE_LENGTH = 4000; +constexpr s64 MAX_SLICE_LENGTH = 10000; std::shared_ptr CreateEvent(std::string name, TimedCallback&& callback) { return std::make_shared(std::move(callback), std::move(name)); @@ -65,7 +65,7 @@ void CoreTiming::Initialize(std::function&& on_thread_init_) { on_thread_init = std::move(on_thread_init_); event_fifo_id = 0; shutting_down = false; - ticks = 0; + cpu_ticks = 0; const auto empty_timed_callback = [](std::uintptr_t, u64, std::chrono::nanoseconds) -> std::optional { return std::nullopt; }; ev_lost = CreateEvent("_lost_event", empty_timed_callback); @@ -170,20 +170,12 @@ void CoreTiming::UnscheduleEvent(const std::shared_ptr& event_type, } void CoreTiming::AddTicks(u64 ticks_to_add) { - ticks += ticks_to_add; - downcount -= static_cast(ticks); + cpu_ticks += ticks_to_add; + downcount -= static_cast(cpu_ticks); } void CoreTiming::Idle() { - if (!event_queue.empty()) { - const u64 next_event_time = event_queue.front().time; - const u64 next_ticks = Common::WallClock::NSToCNTPCT(next_event_time) + 10U; - if (next_ticks > ticks) { - ticks = next_ticks; - } - return; - } - ticks += 1000U; + cpu_ticks += 1000U; } void CoreTiming::ResetTicks() { @@ -194,14 +186,14 @@ u64 CoreTiming::GetClockTicks() const { if (is_multicore) [[likely]] { return clock->GetCNTPCT(); } - return ticks; + return Common::WallClock::CPUTickToCNTPCT(cpu_ticks); } u64 CoreTiming::GetGPUTicks() const { if (is_multicore) [[likely]] { return clock->GetGPUTick(); } - return Common::WallClock::CNTPCTToGPUTick(ticks); + return Common::WallClock::CPUTickToGPUTick(cpu_ticks); } std::optional CoreTiming::Advance() { @@ -314,14 +306,14 @@ std::chrono::nanoseconds CoreTiming::GetGlobalTimeNs() const { if (is_multicore) [[likely]] { return clock->GetTimeNS(); } - return std::chrono::nanoseconds{Common::WallClock::CNTPCTToNS(ticks)}; + return std::chrono::nanoseconds{Common::WallClock::CPUTickToNS(cpu_ticks)}; } std::chrono::microseconds CoreTiming::GetGlobalTimeUs() const { if (is_multicore) [[likely]] { return clock->GetTimeUS(); } - return std::chrono::microseconds{Common::WallClock::CNTPCTToUS(ticks)}; + return std::chrono::microseconds{Common::WallClock::CPUTickToUS(cpu_ticks)}; } } // namespace Core::Timing diff --git a/src/core/core_timing.h b/src/core/core_timing.h index 1873852c4..10db1de55 100644 --- a/src/core/core_timing.h +++ b/src/core/core_timing.h @@ -167,7 +167,7 @@ private: s64 pause_end_time{}; /// Cycle timing - u64 ticks{}; + u64 cpu_ticks{}; s64 downcount{}; }; diff --git a/src/core/hle/kernel/svc/svc_tick.cpp b/src/core/hle/kernel/svc/svc_tick.cpp index 561336482..7dd7c6e51 100644 --- a/src/core/hle/kernel/svc/svc_tick.cpp +++ b/src/core/hle/kernel/svc/svc_tick.cpp @@ -12,16 +12,8 @@ namespace Kernel::Svc { int64_t GetSystemTick(Core::System& system) { LOG_TRACE(Kernel_SVC, "called"); - auto& core_timing = system.CoreTiming(); - // Returns the value of cntpct_el0 (https://switchbrew.org/wiki/SVC#svcGetSystemTick) - const u64 result{core_timing.GetClockTicks()}; - - if (!system.Kernel().IsMulticore()) { - core_timing.AddTicks(400U); - } - - return static_cast(result); + return static_cast(system.CoreTiming().GetClockTicks()); } int64_t GetSystemTick64(Core::System& system) { From 3e6d81a00899f7f488bfedd849edb64f927b124d Mon Sep 17 00:00:00 2001 From: Morph <39850852+Morph1984@users.noreply.github.com> Date: Wed, 7 Jun 2023 22:04:02 -0400 Subject: [PATCH 8/8] nvdisp: Fix SingleCore frametime reporting --- src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp b/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp index 5a5b2e305..0fe242e9d 100644 --- a/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp +++ b/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp @@ -51,8 +51,8 @@ void nvdisp_disp0::flip(u32 buffer_handle, u32 offset, android::PixelFormat form stride, format, transform, crop_rect}; system.GPU().RequestSwapBuffers(&framebuffer, fences, num_fences); - system.GetPerfStats().EndSystemFrame(); system.SpeedLimiter().DoSpeedLimiting(system.CoreTiming().GetGlobalTimeUs()); + system.GetPerfStats().EndSystemFrame(); system.GetPerfStats().BeginSystemFrame(); }