Merge pull request #9982 from Morph1984/tpause
x64: Make use of waitpkg instructions for power efficient sleeps
This commit is contained in:
commit
40efd2ab56
8 changed files with 97 additions and 8 deletions
|
@ -160,6 +160,8 @@ if(ARCHITECTURE_x86_64)
|
||||||
PRIVATE
|
PRIVATE
|
||||||
x64/cpu_detect.cpp
|
x64/cpu_detect.cpp
|
||||||
x64/cpu_detect.h
|
x64/cpu_detect.h
|
||||||
|
x64/cpu_wait.cpp
|
||||||
|
x64/cpu_wait.h
|
||||||
x64/native_clock.cpp
|
x64/native_clock.cpp
|
||||||
x64/native_clock.h
|
x64/native_clock.h
|
||||||
x64/xbyak_abi.h
|
x64/xbyak_abi.h
|
||||||
|
|
|
@ -97,6 +97,7 @@ void AppendCPUInfo(FieldCollection& fc) {
|
||||||
add_field("CPU_Extension_x64_PCLMULQDQ", caps.pclmulqdq);
|
add_field("CPU_Extension_x64_PCLMULQDQ", caps.pclmulqdq);
|
||||||
add_field("CPU_Extension_x64_POPCNT", caps.popcnt);
|
add_field("CPU_Extension_x64_POPCNT", caps.popcnt);
|
||||||
add_field("CPU_Extension_x64_SHA", caps.sha);
|
add_field("CPU_Extension_x64_SHA", caps.sha);
|
||||||
|
add_field("CPU_Extension_x64_WAITPKG", caps.waitpkg);
|
||||||
#else
|
#else
|
||||||
fc.AddField(FieldType::UserSystem, "CPU_Model", "Other");
|
fc.AddField(FieldType::UserSystem, "CPU_Model", "Other");
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -144,6 +144,7 @@ static CPUCaps Detect() {
|
||||||
caps.bmi2 = Common::Bit<8>(cpu_id[1]);
|
caps.bmi2 = Common::Bit<8>(cpu_id[1]);
|
||||||
caps.sha = Common::Bit<29>(cpu_id[1]);
|
caps.sha = Common::Bit<29>(cpu_id[1]);
|
||||||
|
|
||||||
|
caps.waitpkg = Common::Bit<5>(cpu_id[2]);
|
||||||
caps.gfni = Common::Bit<8>(cpu_id[2]);
|
caps.gfni = Common::Bit<8>(cpu_id[2]);
|
||||||
|
|
||||||
__cpuidex(cpu_id, 0x00000007, 0x00000001);
|
__cpuidex(cpu_id, 0x00000007, 0x00000001);
|
||||||
|
|
|
@ -67,6 +67,7 @@ struct CPUCaps {
|
||||||
bool pclmulqdq : 1;
|
bool pclmulqdq : 1;
|
||||||
bool popcnt : 1;
|
bool popcnt : 1;
|
||||||
bool sha : 1;
|
bool sha : 1;
|
||||||
|
bool waitpkg : 1;
|
||||||
};
|
};
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
69
src/common/x64/cpu_wait.cpp
Normal file
69
src/common/x64/cpu_wait.cpp
Normal file
|
@ -0,0 +1,69 @@
|
||||||
|
// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
|
||||||
|
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||||
|
|
||||||
|
#include <thread>
|
||||||
|
|
||||||
|
#ifdef _MSC_VER
|
||||||
|
#include <intrin.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#include "common/x64/cpu_detect.h"
|
||||||
|
#include "common/x64/cpu_wait.h"
|
||||||
|
|
||||||
|
namespace Common::X64 {
|
||||||
|
|
||||||
|
#ifdef _MSC_VER
|
||||||
|
__forceinline static u64 FencedRDTSC() {
|
||||||
|
_mm_lfence();
|
||||||
|
_ReadWriteBarrier();
|
||||||
|
const u64 result = __rdtsc();
|
||||||
|
_mm_lfence();
|
||||||
|
_ReadWriteBarrier();
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
__forceinline static void TPAUSE() {
|
||||||
|
// 100,000 cycles is a reasonable amount of time to wait to save on CPU resources.
|
||||||
|
// For reference:
|
||||||
|
// At 1 GHz, 100K cycles is 100us
|
||||||
|
// At 2 GHz, 100K cycles is 50us
|
||||||
|
// At 4 GHz, 100K cycles is 25us
|
||||||
|
static constexpr auto PauseCycles = 100'000;
|
||||||
|
_tpause(0, FencedRDTSC() + PauseCycles);
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
static u64 FencedRDTSC() {
|
||||||
|
u64 eax;
|
||||||
|
u64 edx;
|
||||||
|
asm volatile("lfence\n\t"
|
||||||
|
"rdtsc\n\t"
|
||||||
|
"lfence\n\t"
|
||||||
|
: "=a"(eax), "=d"(edx));
|
||||||
|
return (edx << 32) | eax;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void TPAUSE() {
|
||||||
|
// 100,000 cycles is a reasonable amount of time to wait to save on CPU resources.
|
||||||
|
// For reference:
|
||||||
|
// At 1 GHz, 100K cycles is 100us
|
||||||
|
// At 2 GHz, 100K cycles is 50us
|
||||||
|
// At 4 GHz, 100K cycles is 25us
|
||||||
|
static constexpr auto PauseCycles = 100'000;
|
||||||
|
const auto tsc = FencedRDTSC() + PauseCycles;
|
||||||
|
const auto eax = static_cast<u32>(tsc & 0xFFFFFFFF);
|
||||||
|
const auto edx = static_cast<u32>(tsc >> 32);
|
||||||
|
asm volatile("tpause %0" : : "r"(0), "d"(edx), "a"(eax));
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
void MicroSleep() {
|
||||||
|
static const bool has_waitpkg = GetCPUCaps().waitpkg;
|
||||||
|
|
||||||
|
if (has_waitpkg) {
|
||||||
|
TPAUSE();
|
||||||
|
} else {
|
||||||
|
std::this_thread::yield();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace Common::X64
|
10
src/common/x64/cpu_wait.h
Normal file
10
src/common/x64/cpu_wait.h
Normal file
|
@ -0,0 +1,10 @@
|
||||||
|
// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
|
||||||
|
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
namespace Common::X64 {
|
||||||
|
|
||||||
|
void MicroSleep();
|
||||||
|
|
||||||
|
} // namespace Common::X64
|
|
@ -27,16 +27,13 @@ __forceinline static u64 FencedRDTSC() {
|
||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
static u64 FencedRDTSC() {
|
static u64 FencedRDTSC() {
|
||||||
u64 result;
|
u64 eax;
|
||||||
|
u64 edx;
|
||||||
asm volatile("lfence\n\t"
|
asm volatile("lfence\n\t"
|
||||||
"rdtsc\n\t"
|
"rdtsc\n\t"
|
||||||
"shl $32, %%rdx\n\t"
|
"lfence\n\t"
|
||||||
"or %%rdx, %0\n\t"
|
: "=a"(eax), "=d"(edx));
|
||||||
"lfence"
|
return (edx << 32) | eax;
|
||||||
: "=a"(result)
|
|
||||||
:
|
|
||||||
: "rdx", "memory", "cc");
|
|
||||||
return result;
|
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
|
@ -10,6 +10,10 @@
|
||||||
#include "common/windows/timer_resolution.h"
|
#include "common/windows/timer_resolution.h"
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#ifdef ARCHITECTURE_x86_64
|
||||||
|
#include "common/x64/cpu_wait.h"
|
||||||
|
#endif
|
||||||
|
|
||||||
#include "common/microprofile.h"
|
#include "common/microprofile.h"
|
||||||
#include "core/core_timing.h"
|
#include "core/core_timing.h"
|
||||||
#include "core/core_timing_util.h"
|
#include "core/core_timing_util.h"
|
||||||
|
@ -269,7 +273,11 @@ void CoreTiming::ThreadLoop() {
|
||||||
if (wait_time >= timer_resolution_ns) {
|
if (wait_time >= timer_resolution_ns) {
|
||||||
Common::Windows::SleepForOneTick();
|
Common::Windows::SleepForOneTick();
|
||||||
} else {
|
} else {
|
||||||
|
#ifdef ARCHITECTURE_x86_64
|
||||||
|
Common::X64::MicroSleep();
|
||||||
|
#else
|
||||||
std::this_thread::yield();
|
std::this_thread::yield();
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue