forked from etc/pineapple-src
early-access version 2796
This commit is contained in:
parent
bbb22ae7cd
commit
63427abbf6
90 changed files with 4016 additions and 1215 deletions
55
CMakeModules/MinGWClangCross.cmake
Executable file
55
CMakeModules/MinGWClangCross.cmake
Executable file
|
@ -0,0 +1,55 @@
|
|||
set(MINGW_PREFIX /usr/x86_64-w64-mingw32/)
|
||||
set(CMAKE_SYSTEM_NAME Windows)
|
||||
set(CMAKE_SYSTEM_PROCESSOR x86_64)
|
||||
|
||||
set(CMAKE_FIND_ROOT_PATH ${MINGW_PREFIX})
|
||||
set(SDL2_PATH ${MINGW_PREFIX})
|
||||
set(MINGW_TOOL_PREFIX ${CMAKE_SYSTEM_PROCESSOR}-w64-mingw32-)
|
||||
|
||||
# Specify the cross compiler
|
||||
set(CMAKE_C_COMPILER ${MINGW_TOOL_PREFIX}clang)
|
||||
set(CMAKE_CXX_COMPILER ${MINGW_TOOL_PREFIX}clang++)
|
||||
set(CMAKE_RC_COMPILER ${MINGW_TOOL_PREFIX}windres)
|
||||
set(CMAKE_C_COMPILER_AR ${MINGW_TOOL_PREFIX}ar)
|
||||
set(CMAKE_CXX_COMPILER_AR ${MINGW_TOOL_PREFIX}ar)
|
||||
set(CMAKE_C_COMPILER_RANLIB ${MINGW_TOOL_PREFIX}ranlib)
|
||||
set(CMAKE_CXX_COMPILER_RANLIB ${MINGW_TOOL_PREFIX}ranlib)
|
||||
|
||||
# Mingw tools
|
||||
set(STRIP ${MINGW_TOOL_PREFIX}strip)
|
||||
set(WINDRES ${MINGW_TOOL_PREFIX}windres)
|
||||
set(ENV{PKG_CONFIG} ${MINGW_TOOL_PREFIX}pkg-config)
|
||||
|
||||
# ccache wrapper
|
||||
option(USE_CCACHE "Use ccache for compilation" OFF)
|
||||
if(USE_CCACHE)
|
||||
find_program(CCACHE ccache)
|
||||
if(CCACHE)
|
||||
message(STATUS "Using ccache found in PATH")
|
||||
set_property(GLOBAL PROPERTY RULE_LAUNCH_COMPILE ${CCACHE})
|
||||
set_property(GLOBAL PROPERTY RULE_LAUNCH_LINK ${CCACHE})
|
||||
else(CCACHE)
|
||||
message(WARNING "USE_CCACHE enabled, but no ccache found")
|
||||
endif(CCACHE)
|
||||
endif(USE_CCACHE)
|
||||
|
||||
# Search for programs in the build host directories
|
||||
set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER)
|
||||
|
||||
|
||||
# Echo modified cmake vars to screen for debugging purposes
|
||||
if(NOT DEFINED ENV{MINGW_DEBUG_INFO})
|
||||
message("")
|
||||
message("Custom cmake vars: (blank = system default)")
|
||||
message("-----------------------------------------")
|
||||
message("* CMAKE_C_COMPILER : ${CMAKE_C_COMPILER}")
|
||||
message("* CMAKE_CXX_COMPILER : ${CMAKE_CXX_COMPILER}")
|
||||
message("* CMAKE_RC_COMPILER : ${CMAKE_RC_COMPILER}")
|
||||
message("* WINDRES : ${WINDRES}")
|
||||
message("* ENV{PKG_CONFIG} : $ENV{PKG_CONFIG}")
|
||||
message("* STRIP : ${STRIP}")
|
||||
message("* USE_CCACHE : ${USE_CCACHE}")
|
||||
message("")
|
||||
# So that the debug info only appears once
|
||||
set(ENV{MINGW_DEBUG_INFO} SHOWN)
|
||||
endif()
|
|
@ -1,7 +1,7 @@
|
|||
yuzu emulator early access
|
||||
=============
|
||||
|
||||
This is the source code for early-access 2795.
|
||||
This is the source code for early-access 2796.
|
||||
|
||||
## Legal Notice
|
||||
|
||||
|
|
5
externals/dynarmic/externals/CMakeLists.txt
vendored
5
externals/dynarmic/externals/CMakeLists.txt
vendored
|
@ -1,5 +1,8 @@
|
|||
# Always build externals as static libraries, even when dynarmic is built as shared
|
||||
set(BUILD_SHARED_LIBS OFF)
|
||||
if (BUILD_SHARED_LIBS)
|
||||
set(BUILD_SHARED_LIBS OFF)
|
||||
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
|
||||
endif()
|
||||
|
||||
# For libraries that already come with a CMakeLists file,
|
||||
# simply add the directory to that file as a subdirectory
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
cmake_minimum_required(VERSION 3.12 FATAL_ERROR)
|
||||
include(GNUInstallDirs)
|
||||
|
||||
project(mcl LANGUAGES CXX VERSION 0.1.5)
|
||||
project(mcl LANGUAGES CXX VERSION 0.1.8)
|
||||
|
||||
# Project options
|
||||
option(MCL_WARNINGS_AS_ERRORS "Warnings as errors" ON)
|
||||
|
|
|
@ -143,11 +143,9 @@ template<size_t bit_count, BitIntegral T>
|
|||
constexpr T sign_extend(T value) {
|
||||
static_assert(bit_count != 0, "cannot sign-extend zero-sized value");
|
||||
|
||||
constexpr T m = ones<bit_count, T>();
|
||||
if (get_bit<bit_count - 1, T>(value)) {
|
||||
return value | ~m;
|
||||
}
|
||||
return value;
|
||||
using S = std::make_signed_t<T>;
|
||||
constexpr size_t shift_amount = bitsizeof<T> - bit_count;
|
||||
return static_cast<T>(static_cast<S>(value << shift_amount) >> shift_amount);
|
||||
}
|
||||
|
||||
/// Sign-extends a value that has bit_count bits to the full bitwidth of type T.
|
||||
|
@ -155,11 +153,9 @@ template<BitIntegral T>
|
|||
constexpr T sign_extend(size_t bit_count, T value) {
|
||||
ASSERT_MSG(bit_count != 0, "cannot sign-extend zero-sized value");
|
||||
|
||||
const T m = ones<T>(bit_count);
|
||||
if (get_bit<T>(bit_count - 1, value)) {
|
||||
return value | ~m;
|
||||
}
|
||||
return value;
|
||||
using S = std::make_signed_t<T>;
|
||||
const size_t shift_amount = bitsizeof<T> - bit_count;
|
||||
return static_cast<T>(static_cast<S>(value << shift_amount) >> shift_amount);
|
||||
}
|
||||
|
||||
/// Replicate an element across a value of type T.
|
||||
|
|
|
@ -63,6 +63,7 @@ target_include_directories(mcl
|
|||
)
|
||||
target_compile_options(mcl PRIVATE ${MCL_CXX_FLAGS})
|
||||
target_link_libraries(mcl PUBLIC $<BUILD_INTERFACE:fmt::fmt>)
|
||||
set_property(TARGET mcl PROPERTY POSITION_INDEPENDENT_CODE ON)
|
||||
add_library(merry::mcl ALIAS mcl)
|
||||
|
||||
include(CreateTargetDirectoryGroups)
|
||||
|
|
|
@ -14,6 +14,7 @@ namespace mcl::detail {
|
|||
[[noreturn]] void assert_terminate_impl(fmt::string_view msg, fmt::format_args args) {
|
||||
fmt::print(stderr, "assertion failed: ");
|
||||
fmt::vprint(stderr, msg, args);
|
||||
std::fflush(stderr);
|
||||
std::terminate();
|
||||
}
|
||||
|
||||
|
|
1
externals/dynarmic/externals/xbyak/.github/FUNDING.yml
vendored
Executable file
1
externals/dynarmic/externals/xbyak/.github/FUNDING.yml
vendored
Executable file
|
@ -0,0 +1 @@
|
|||
github: herumi
|
|
@ -7,5 +7,7 @@ jobs:
|
|||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
- run: sudo apt update
|
||||
- run: sudo apt install nasm yasm g++-multilib tcsh
|
||||
- run: make test
|
||||
- run: make -C sample CXXFLAGS="-DXBYAK_NO_EXCEPTION"
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
cmake_minimum_required(VERSION 2.6...3.0.2)
|
||||
|
||||
project(xbyak CXX)
|
||||
project(xbyak LANGUAGES CXX VERSION 6.60.1)
|
||||
|
||||
file(GLOB headers xbyak/*.h)
|
||||
|
||||
|
@ -18,17 +18,26 @@ if (DEFINED CMAKE_VERSION AND CMAKE_VERSION VERSION_GREATER_EQUAL 3.0.2)
|
|||
install(
|
||||
TARGETS ${PROJECT_NAME}
|
||||
EXPORT ${PROJECT_NAME}-targets
|
||||
INCLUDES DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/${PROJECT_NAME}
|
||||
)
|
||||
|
||||
configure_file(
|
||||
include(CMakePackageConfigHelpers)
|
||||
configure_package_config_file(
|
||||
cmake/config.cmake.in
|
||||
${PROJECT_NAME}Config.cmake
|
||||
@ONLY
|
||||
"${CMAKE_CURRENT_BINARY_DIR}/${PROJECT_NAME}-config.cmake"
|
||||
INSTALL_DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/${PROJECT_NAME}
|
||||
)
|
||||
write_basic_package_version_file(
|
||||
"${CMAKE_CURRENT_BINARY_DIR}/${PROJECT_NAME}-config-version.cmake"
|
||||
COMPATIBILITY SameMajorVersion
|
||||
)
|
||||
|
||||
install(
|
||||
FILES ${CMAKE_CURRENT_BINARY_DIR}/${PROJECT_NAME}Config.cmake
|
||||
DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/${PROJECT_NAME}
|
||||
FILES
|
||||
"${CMAKE_CURRENT_BINARY_DIR}/${PROJECT_NAME}-config.cmake"
|
||||
"${CMAKE_CURRENT_BINARY_DIR}/${PROJECT_NAME}-config-version.cmake"
|
||||
DESTINATION
|
||||
${CMAKE_INSTALL_LIBDIR}/cmake/${PROJECT_NAME}
|
||||
)
|
||||
|
||||
install(
|
||||
|
|
20
externals/dynarmic/externals/xbyak/COPYRIGHT
vendored
20
externals/dynarmic/externals/xbyak/COPYRIGHT
vendored
|
@ -25,23 +25,3 @@ INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
|||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
|
||||
THE POSSIBILITY OF SUCH DAMAGE.
|
||||
-----------------------------------------------------------------------------
|
||||
ソースコード形式かバイナリ形式か、変更するかしないかを問わず、以下の条件を満た
|
||||
す場合に限り、再頒布および使用が許可されます。
|
||||
|
||||
ソースコードを再頒布する場合、上記の著作権表示、本条件一覧、および下記免責条項
|
||||
を含めること。
|
||||
バイナリ形式で再頒布する場合、頒布物に付属のドキュメント等の資料に、上記の著作
|
||||
権表示、本条件一覧、および下記免責条項を含めること。
|
||||
書面による特別の許可なしに、本ソフトウェアから派生した製品の宣伝または販売促進
|
||||
に、著作権者の名前またはコントリビューターの名前を使用してはならない。
|
||||
本ソフトウェアは、著作権者およびコントリビューターによって「現状のまま」提供さ
|
||||
れており、明示黙示を問わず、商業的な使用可能性、および特定の目的に対する適合性
|
||||
に関する暗黙の保証も含め、またそれに限定されない、いかなる保証もありません。
|
||||
著作権者もコントリビューターも、事由のいかんを問わず、 損害発生の原因いかんを
|
||||
問わず、かつ責任の根拠が契約であるか厳格責任であるか(過失その他の)不法行為で
|
||||
あるかを問わず、仮にそのような損害が発生する可能性を知らされていたとしても、
|
||||
本ソフトウェアの使用によって発生した(代替品または代用サービスの調達、使用の
|
||||
喪失、データの喪失、利益の喪失、業務の中断も含め、またそれに限定されない)直接
|
||||
損害、間接損害、偶発的な損害、特別損害、懲罰的損害、または結果損害について、
|
||||
一切責任を負わないものとします。
|
||||
|
|
|
@ -1 +1,3 @@
|
|||
@PACKAGE_INIT@
|
||||
|
||||
include("${CMAKE_CURRENT_LIST_DIR}/@PROJECT_NAME@-targets.cmake")
|
||||
|
|
8
externals/dynarmic/externals/xbyak/cmake/meson-config.cmake.in
vendored
Executable file
8
externals/dynarmic/externals/xbyak/cmake/meson-config.cmake.in
vendored
Executable file
|
@ -0,0 +1,8 @@
|
|||
@PACKAGE_INIT@
|
||||
|
||||
if(NOT TARGET @TARGET_NAME@)
|
||||
add_library(@TARGET_NAME@ INTERFACE IMPORTED)
|
||||
set_target_properties(@TARGET_NAME@ PROPERTIES
|
||||
INTERFACE_INCLUDE_DIRECTORIES "@ABSOLUTE_INCLUDE_DIR@"
|
||||
)
|
||||
endif()
|
182
externals/dynarmic/externals/xbyak/doc/changelog.md
vendored
Executable file
182
externals/dynarmic/externals/xbyak/doc/changelog.md
vendored
Executable file
|
@ -0,0 +1,182 @@
|
|||
# History
|
||||
|
||||
* 2022/Jun/15 ver 6.60.1 fix link error of Xbyak::util::Cpu on Visual Studio with /O0 option
|
||||
* 2022/Jun/06 ver 6.60 change the version format to avoid it going backward
|
||||
* 2022/Jun/01 ver 6.06 refactor Cpu::Type class and improve MmapAllocator when XBYAK_USE_MEMFD is defined.
|
||||
* 2022/Mar/20 ver 6.052 add Cpu::operator==()
|
||||
* 2022/Mar/13 ver 6.051 fix compile error when XBYAK_NO_EXCEPTION is defined
|
||||
* 2022/Mar/12 ver 6.05 add movdiri, movdir64b, clwb, cldemote
|
||||
* 2022/Apr/22 ver 6.041 consider Android and mingw
|
||||
* 2022/Apr/05 ver 6.04 add tpause, umonitor, umwait
|
||||
* 2022/Mar/08 ver 6.03 MmapAllocator supports memfd with user-defined strings.
|
||||
* 2022/Jan/28 ver 6.02 strict check the range of 32-bit dispacement
|
||||
* 2021/Dec/14 ver 6.01 support T_FAR jump/call and retf
|
||||
* 2021/Sep/14 ver 6.00 fully support AVX512-FP16
|
||||
* 2021/Sep/09 ver 5.997 fix vrndscale* to support {sae}
|
||||
* 2021/Sep/03 ver 5.996 fix v{add,sub,mul,div,max,min}{sd,ss} to support T_rd_sae.
|
||||
* 2021/Aug/15 ver 5.995 add a label to /proc/self/maps if XBYAK_USE_MEMFD is defined on Linux
|
||||
* 2021/Jun/17 ver 5.994 add alias of vcmpXX{ps,pd,ss,sd} with mask register
|
||||
* 2021/Jun/06 ver 5.993 strict check of gather/scatter register combination
|
||||
* 2021/May/09 ver 5.992 support endbr32 and endbr64
|
||||
* 2020/Nov/16 ver 5.991 disable constexpr for gcc-5 with -std=c++-14
|
||||
* 2020/Oct/19 ver 5.99 support VNNI instructions(Thanks to akharito)
|
||||
* 2020/Oct/17 ver 5.98 support the form of [scale * reg]
|
||||
* 2020/Sep/08 ver 5.97 replace uint32 with uint32_t etc.
|
||||
* 2020/Aug/28 ver 5.95 some constructors of register classes support constexpr if C++14 or later
|
||||
* 2020/Aug/04 ver 5.941 `CodeGenerator::reset()` calls `ClearError()`.
|
||||
* 2020/Jul/28 ver 5.94 remove #include <winsock2.h> (only windows)
|
||||
* 2020/Jul/21 ver 5.93 support exception-less mode
|
||||
* 2020/Jun/30 ver 5.92 support Intel AMX instruction set (Thanks to nshustrov)
|
||||
* 2020/Jun/22 ver 5.913 fix mov(r64, imm64) on 32-bit env with XBYAK64
|
||||
* 2020/Jun/19 ver 5.912 define MAP_JIT on macOS regardless of Xcode version (Thanks to rsdubtso)
|
||||
* 2020/May/10 ver 5.911 XBYAK_USE_MMAP_ALLOCATOR is defined unless XBYAK_DONT_USE_MMAP_ALLOCATOR is defined.
|
||||
* 2020/Apr/20 ver 5.91 accept mask register k0 (it means no mask)
|
||||
* 2020/Apr/09 ver 5.90 kmov{b,d,w,q} throws exception for an unsupported register
|
||||
* 2020/Feb/26 ver 5.891 fix typo of type
|
||||
* 2020/Jan/03 ver 5.89 fix error of vfpclasspd
|
||||
* 2019/Dec/20 ver 5.88 fix compile error on Windows
|
||||
* 2019/Dec/19 ver 5.87 add setDefaultJmpNEAR(), which deals with `jmp` of an undefined label as T_NEAR if no type is specified.
|
||||
* 2019/Dec/13 ver 5.86 [changed] revert to the behavior before v5.84 if -fno-operator-names is defined (and() is available)
|
||||
* 2019/Dec/07 ver 5.85 append MAP_JIT flag to mmap for macOS mojave or later
|
||||
* 2019/Nov/29 ver 5.84 [changed] XBYAK_NO_OP_NAMES is defined unless XBYAK_USE_OP_NAMES is defined
|
||||
* 2019/Oct/12 ver 5.83 exit(1) was removed
|
||||
* 2019/Sep/23 ver 5.82 support monitorx, mwaitx, clzero (thanks to @MagurosanTeam)
|
||||
* 2019/Sep/14 ver 5.81 support some generic mnemonics.
|
||||
* 2019/Aug/01 ver 5.802 fix detection of AVX512_BF16 (thanks to vpirogov)
|
||||
* 2019/May/27 support vp2intersectd, vp2intersectq (not tested)
|
||||
* 2019/May/26 ver 5.80 support vcvtne2ps2bf16, vcvtneps2bf16, vdpbf16ps
|
||||
* 2019/Apr/27 ver 5.79 vcmppd/vcmpps supports ptr_b(thanks to jkopinsky)
|
||||
* 2019/Apr/15 ver 5.78 rewrite Reg::changeBit() (thanks to MerryMage)
|
||||
* 2019/Mar/06 ver 5.77 fix number of cores that share LLC cache by densamoilov
|
||||
* 2019/Jan/17 ver 5.76 add Cpu::getNumCores() by shelleygoel
|
||||
* 2018/Oct/31 ver 5.751 recover Xbyak::CastTo for compatibility
|
||||
* 2018/Oct/29 ver 5.75 unlink LabelManager from Label when msg is destroyed
|
||||
* 2018/Oct/21 ver 5.74 support RegRip +/- int. Xbyak::CastTo is removed
|
||||
* 2018/Oct/15 util::AddressFrame uses push/pop instead of mov
|
||||
* 2018/Sep/19 ver 5.73 fix evex encoding of vpslld, vpslldq, vpsllw, etc for (reg, mem, imm8)
|
||||
* 2018/Sep/19 ver 5.72 fix the encoding of vinsertps for disp8N(Thanks to petercaday)
|
||||
* 2018/Sep/04 ver 5.71 L() returns a new label instance
|
||||
* 2018/Aug/27 ver 5.70 support setProtectMode() and DontUseProtect for read/exec setting
|
||||
* 2018/Aug/24 ver 5.68 fix wrong VSIB encoding with vector index >= 16(thanks to petercaday)
|
||||
* 2018/Aug/14 ver 5.67 remove mutable in Address ; fix setCacheHierarchy for cloud vm
|
||||
* 2018/Jul/26 ver 5.661 support mingw64
|
||||
* 2018/Jul/24 ver 5.66 add CodeArray::PROTECT_RE to mode of protect()
|
||||
* 2018/Jun/26 ver 5.65 fix push(qword [mem])
|
||||
* 2018/Mar/07 ver 5.64 fix zero division in Cpu() on some cpu
|
||||
* 2018/Feb/14 ver 5.63 fix Cpu::setCacheHierarchy() and fix EvexModifierZero for clang<3.9(thanks to mgouicem)
|
||||
* 2018/Feb/13 ver 5.62 Cpu::setCacheHierarchy() by mgouicem and rsdubtso
|
||||
* 2018/Feb/07 ver 5.61 vmov* supports mem{k}{z}(I forgot it)
|
||||
* 2018/Jan/24 ver 5.601 add xword, yword, etc. into Xbyak::util namespace
|
||||
* 2018/Jan/05 ver 5.60 support AVX-512 for Ice lake(319433-030.pdf)
|
||||
* 2017/Aug/22 ver 5.53 fix mpx encoding, add bnd() prefix
|
||||
* 2017/Aug/18 ver 5.52 fix align (thanks to MerryMage)
|
||||
* 2017/Aug/17 ver 5.51 add multi-byte nop and align() uses it(thanks to inolen)
|
||||
* 2017/Aug/08 ver 5.50 add mpx(thanks to magurosan)
|
||||
* 2017/Aug/08 ver 5.45 add sha(thanks to magurosan)
|
||||
* 2017/Aug/08 ver 5.44 add prefetchw(thanks to rsdubtso)
|
||||
* 2017/Jul/12 ver 5.432 reduce warnings of PVS studio
|
||||
* 2017/Jul/09 ver 5.431 fix hasRex() (no affect) (thanks to drillsar)
|
||||
* 2017/May/14 ver 5.43 fix CodeGenerator::resetSize() (thanks to gibbed)
|
||||
* 2017/May/13 ver 5.42 add movs{b,w,d,q}
|
||||
* 2017/Jan/26 ver 5.41 add prefetchwt1 and support for scale == 0(thanks to rsdubtso)
|
||||
* 2016/Dec/14 ver 5.40 add Label::getAddress() method to get the pointer specified by the label
|
||||
* 2016/Dec/09 ver 5.34 fix handling of negative offsets when encoding disp8N(thanks to rsdubtso)
|
||||
* 2016/Dec/08 ver 5.33 fix encoding of vpbroadcast{b,w,d,q}, vpinsr{b,w}, vpextr{b,w} for disp8N
|
||||
* 2016/Dec/01 ver 5.32 rename __xgetbv() to _xgetbv() to support clang for Visual Studio(thanks to freiro)
|
||||
* 2016/Nov/27 ver 5.31 rename AVX512_4VNNI to AVX512_4VNNIW
|
||||
* 2016/Nov/27 ver 5.30 add AVX512_4VNNI, AVX512_4FMAPS instructions(thanks to rsdubtso)
|
||||
* 2016/Nov/26 ver 5.20 add detection of AVX512_4VNNI and AVX512_4FMAPS(thanks to rsdubtso)
|
||||
* 2016/Nov/20 ver 5.11 lost vptest for ymm(thanks to gregory38)
|
||||
* 2016/Nov/20 ver 5.10 add addressing [rip+&var]
|
||||
* 2016/Sep/29 ver 5.03 fix detection ERR_INVALID_OPMASK_WITH_MEMORY(thanks to PVS-Studio)
|
||||
* 2016/Aug/15 ver 5.02 xbyak does not include xbyak_bin2hex.h
|
||||
* 2016/Aug/15 ver 5.011 fix detection of version of gcc 5.4
|
||||
* 2016/Aug/03 ver 5.01 disable omitted operand
|
||||
* 2016/Jun/24 ver 5.00 support avx-512 instruction set
|
||||
* 2016/Jun/13 avx-512 add mask instructions
|
||||
* 2016/May/05 ver 4.91 add detection of AVX-512 to Xbyak::util::Cpu
|
||||
* 2016/Mar/14 ver 4.901 comment to ready() function(thanks to skmp)
|
||||
* 2016/Feb/04 ver 4.90 add jcc(const void *addr);
|
||||
* 2016/Jan/30 ver 4.89 vpblendvb supports ymm reg(thanks to John Funnell)
|
||||
* 2016/Jan/24 ver 4.88 lea, cmov supports 16-bit register(thanks to whyisthisfieldhere)
|
||||
* 2015/Oct/05 ver 4.87 support segment selectors
|
||||
* 2015/Aug/18 ver 4.86 fix [rip + label] addressing with immediate value(thanks to whyisthisfieldhere)
|
||||
* 2015/Aug/10 ver 4.85 Address::operator==() is not correct(thanks to inolen)
|
||||
* 2015/Jun/22 ver 4.84 call() support variadic template if available(thanks to randomstuff)
|
||||
* 2015/Jun/16 ver 4.83 support movbe(thanks to benvanik)
|
||||
* 2015/May/24 ver 4.82 support detection of F16C
|
||||
* 2015/Apr/25 ver 4.81 fix the condition to throw exception for setSize(thanks to whyisthisfieldhere)
|
||||
* 2015/Apr/22 ver 4.80 rip supports label(thanks to whyisthisfieldhere)
|
||||
* 2015/Jar/28 ver 4.71 support adcx, adox, cmpxchg, rdseed, stac
|
||||
* 2014/Oct/14 ver 4.70 support MmapAllocator
|
||||
* 2014/Jun/13 ver 4.62 disable warning of VC2014
|
||||
* 2014/May/30 ver 4.61 support bt, bts, btr, btc
|
||||
* 2014/May/28 ver 4.60 support vcvtph2ps, vcvtps2ph
|
||||
* 2014/Apr/11 ver 4.52 add detection of rdrand
|
||||
* 2014/Mar/25 ver 4.51 remove state information of unreferenced labels
|
||||
* 2014/Mar/16 ver 4.50 support new Label
|
||||
* 2014/Mar/05 ver 4.40 fix wrong detection of BMI/enhanced rep on VirtualBox
|
||||
* 2013/Dec/03 ver 4.30 support Reg::cvt8(), cvt16(), cvt32(), cvt64()
|
||||
* 2013/Oct/16 ver 4.21 label support std::string
|
||||
* 2013/Jul/30 ver 4.20 [break backward compatibility] split Reg32e class into RegExp(base+index*scale+disp) and Reg32e(means Reg32 or Reg64)
|
||||
* 2013/Jul/04 ver 4.10 [break backward compatibility] change the type of Xbyak::Error from enum to a class
|
||||
* 2013/Jun/21 ver 4.02 add putL(LABEL) function to put the address of the label
|
||||
* 2013/Jun/21 ver 4.01 vpsllw, vpslld, vpsllq, vpsraw, vpsrad, vpsrlw, vpsrld, vpsrlq support (ymm, ymm, xmm). support vpbroadcastb, vpbroadcastw, vpbroadcastd, vpbroadcastq(thanks to Gabest).
|
||||
* 2013/May/30 ver 4.00 support AVX2, VEX-encoded GPR-instructions
|
||||
* 2013/Mar/27 ver 3.80 support mov(reg, "label");
|
||||
* 2013/Mar/13 ver 3.76 add cqo(), jcxz(), jecxz(), jrcxz()
|
||||
* 2013/Jan/15 ver 3.75 add setSize() to modify generated code
|
||||
* 2013/Jan/12 ver 3.74 add CodeGenerator::reset() ; add Allocator::useProtect()
|
||||
* 2013/Jan/06 ver 3.73 use unordered_map if possible
|
||||
* 2012/Dec/04 ver 3.72 eax, ebx, ... are member variables of CodeGenerator(revert), Xbyak::util::eax, ... are static const.
|
||||
* 2012/Nov/17 ver 3.71 and_(), or_(), xor_(), not_() are available if XBYAK_NO_OP_NAMES is not defined.
|
||||
* 2012/Nov/17 change eax, ebx, ptr and so on in CodeGenerator as static member and alias of them are defined in Xbyak::util.
|
||||
* 2012/Nov/09 ver 3.70 XBYAK_NO_OP_NAMES macro is added to use and_() instead of and() (thanks to Mattias)
|
||||
* 2012/Nov/01 ver 3.62 add fwait/fnwait/finit/fninit
|
||||
* 2012/Nov/01 ver 3.61 add fldcw/fstcw
|
||||
* 2012/May/03 ver 3.60 change interface of Allocator
|
||||
* 2012/Mar/23 ver 3.51 fix userPtr mode
|
||||
* 2012/Mar/19 ver 3.50 support AutoGrow mode
|
||||
* 2011/Nov/09 ver 3.05 fix bit property of rip addresing / support movsxd
|
||||
* 2011/Aug/15 ver 3.04 fix dealing with imm8 such as add(dword [ebp-8], 0xda); (thanks to lolcat)
|
||||
* 2011/Jun/16 ver 3.03 fix __GNUC_PREREQ macro for Mac gcc(thanks to t_teruya)
|
||||
* 2011/Apr/28 ver 3.02 do not use xgetbv on Mac gcc
|
||||
* 2011/May/24 ver 3.01 fix typo of OSXSAVE
|
||||
* 2011/May/23 ver 3.00 add vcmpeqps and so on
|
||||
* 2011/Feb/16 ver 2.994 beta add vmovq for 32-bit mode(I forgot it)
|
||||
* 2011/Feb/16 ver 2.993 beta remove cvtReg to avoid thread unsafe
|
||||
* 2011/Feb/10 ver 2.992 beta support one argument syntax for fadd like nasm
|
||||
* 2011/Feb/07 ver 2.991 beta fix pextrw reg, xmm, imm(Thanks to Gabest)
|
||||
* 2011/Feb/04 ver 2.99 beta support AVX
|
||||
* 2010/Dec/08 ver 2.31 fix ptr [rip + 32bit offset], support rdtscp
|
||||
* 2010/Oct/19 ver 2.30 support pclmulqdq, aesdec, aesdeclast, aesenc, aesenclast, aesimc, aeskeygenassist
|
||||
* 2010/Jun/07 ver 2.29 fix call(<label>)
|
||||
* 2010/Jun/17 ver 2.28 move some member functions to public
|
||||
* 2010/Jun/01 ver 2.27 support encoding of mov(reg64, imm) like yasm(not nasm)
|
||||
* 2010/May/24 ver 2.26 fix sub(rsp, 1000)
|
||||
* 2010/Apr/26 ver 2.25 add jc/jnc(I forgot to implement them...)
|
||||
* 2010/Apr/16 ver 2.24 change the prototype of rewrite() method
|
||||
* 2010/Apr/15 ver 2.23 fix align() and xbyak_util.h for Mac
|
||||
* 2010/Feb/16 ver 2.22 fix inLocalLabel()/outLocalLabel()
|
||||
* 2009/Dec/09 ver 2.21 support cygwin(gcc 4.3.2)
|
||||
* 2009/Nov/28 support a part of FPU
|
||||
* 2009/Jun/25 fix mov(qword[rax], imm); (thanks to Martin)
|
||||
* 2009/Mar/10 fix redundant REX.W prefix on jmp/call reg64
|
||||
* 2009/Feb/24 add movq reg64, mmx/xmm; movq mmx/xmm, reg64
|
||||
* 2009/Feb/13 movd(xmm7, dword[eax]) drops 0x66 prefix (thanks to Gabest)
|
||||
* 2008/Dec/30 fix call in short relative address(thanks to kato san)
|
||||
* 2008/Sep/18 support @@, @f, @b and localization of label(thanks to nobu-q san)
|
||||
* 2008/Sep/18 support (ptr[rip + 32bit offset]) (thanks to Dango-Chu san)
|
||||
* 2008/Jun/03 fix align(). mov(ptr[eax],1) throws ERR_MEM_SIZE_IS_NOT_SPECIFIED.
|
||||
* 2008/Jun/02 support memory interface allocated by user
|
||||
* 2008/May/26 fix protect() to avoid invalid setting(thanks to shinichiro_h san)
|
||||
* 2008/Apr/30 add cmpxchg16b, cdqe
|
||||
* 2008/Apr/29 support x64
|
||||
* 2008/Apr/14 code refactoring
|
||||
* 2008/Mar/12 add bsr/bsf
|
||||
* 2008/Feb/14 fix output of sub eax, 1234 (thanks to Robert)
|
||||
* 2007/Nov/5 support lock, xadd, xchg
|
||||
* 2007/Nov/2 support SSSE3/SSE4 (thanks to Dango-Chu san)
|
||||
* 2007/Feb/4 fix the bug that exception doesn't occur under the condition which the offset of jmp mnemonic without T_NEAR is over 127.
|
||||
* 2007/Jan/21 fix the bug to create address like [disp] select smaller representation for mov (eax|ax|al, [disp])
|
||||
* 2007/Jan/4 first version
|
14
externals/dynarmic/externals/xbyak/doc/install.md
vendored
Executable file
14
externals/dynarmic/externals/xbyak/doc/install.md
vendored
Executable file
|
@ -0,0 +1,14 @@
|
|||
# Install
|
||||
|
||||
The following files are necessary. Please add the path to your compile directory.
|
||||
|
||||
* xbyak.h
|
||||
* xbyak_mnemonic.h
|
||||
* xbyak_util.h
|
||||
|
||||
Linux:
|
||||
```
|
||||
make install
|
||||
```
|
||||
|
||||
These files are copied into `/usr/local/include/xbyak`.
|
409
externals/dynarmic/externals/xbyak/doc/usage.md
vendored
Executable file
409
externals/dynarmic/externals/xbyak/doc/usage.md
vendored
Executable file
|
@ -0,0 +1,409 @@
|
|||
# Usage
|
||||
|
||||
Inherit `Xbyak::CodeGenerator` class and make the class method.
|
||||
```
|
||||
#include <xbyak/xbyak.h>
|
||||
|
||||
struct Code : Xbyak::CodeGenerator {
|
||||
Code(int x)
|
||||
{
|
||||
mov(eax, x);
|
||||
ret();
|
||||
}
|
||||
};
|
||||
```
|
||||
Or you can pass the instance of CodeGenerator without inheriting.
|
||||
```
|
||||
void genCode(Xbyak::CodeGenerator& code, int x) {
|
||||
using namespace Xbyak::util;
|
||||
code.mov(eax, x);
|
||||
code.ret();
|
||||
}
|
||||
```
|
||||
|
||||
Make an instance of the class and get the function
|
||||
pointer by calling `getCode()` and call it.
|
||||
```
|
||||
Code c(5);
|
||||
int (*f)() = c.getCode<int (*)()>();
|
||||
printf("ret=%d\n", f()); // ret = 5
|
||||
```
|
||||
|
||||
## Syntax
|
||||
Similar to MASM/NASM syntax with parentheses.
|
||||
|
||||
```
|
||||
NASM Xbyak
|
||||
mov eax, ebx --> mov(eax, ebx);
|
||||
inc ecx inc(ecx);
|
||||
ret --> ret();
|
||||
```
|
||||
|
||||
## Addressing
|
||||
Use `qword`, `dword`, `word` and `byte` if it is necessary to specify the size of memory,
|
||||
otherwise use `ptr`.
|
||||
|
||||
```
|
||||
(ptr|qword|dword|word|byte) [base + index * (1|2|4|8) + displacement]
|
||||
[rip + 32bit disp] ; x64 only
|
||||
|
||||
NASM Xbyak
|
||||
mov eax, [ebx+ecx] --> mov(eax, ptr [ebx+ecx]);
|
||||
mov al, [ebx+ecx] --> mov(al, ptr [ebx + ecx]);
|
||||
test byte [esp], 4 --> test(byte [esp], 4);
|
||||
inc qword [rax] --> inc(qword [rax]);
|
||||
```
|
||||
**Note**: `qword`, ... are member variables, then don't use `dword` as unsigned int type.
|
||||
|
||||
### How to use Selector (Segment Register)
|
||||
```
|
||||
mov eax, [fs:eax] --> putSeg(fs);
|
||||
mov(eax, ptr [eax]);
|
||||
mov ax, cs --> mov(ax, cs);
|
||||
```
|
||||
**Note**: Segment class is not derived from `Operand`.
|
||||
|
||||
## AVX
|
||||
|
||||
```
|
||||
vaddps(xmm1, xmm2, xmm3); // xmm1 <- xmm2 + xmm3
|
||||
vaddps(xmm2, xmm3, ptr [rax]); // use ptr to access memory
|
||||
vgatherdpd(xmm1, ptr [ebp + 256 + xmm2*4], xmm3);
|
||||
```
|
||||
|
||||
**Note**:
|
||||
If `XBYAK_ENABLE_OMITTED_OPERAND` is defined, then you can use two operand version for backward compatibility.
|
||||
But the newer version will not support it.
|
||||
```
|
||||
vaddps(xmm2, xmm3); // xmm2 <- xmm2 + xmm3
|
||||
```
|
||||
|
||||
## AVX-512
|
||||
|
||||
```
|
||||
vaddpd zmm2, zmm5, zmm30 --> vaddpd(zmm2, zmm5, zmm30);
|
||||
vaddpd xmm30, xmm20, [rax] --> vaddpd(xmm30, xmm20, ptr [rax]);
|
||||
vaddps xmm30, xmm20, [rax] --> vaddps(xmm30, xmm20, ptr [rax]);
|
||||
vaddpd zmm2{k5}, zmm4, zmm2 --> vaddpd(zmm2 | k5, zmm4, zmm2);
|
||||
vaddpd zmm2{k5}{z}, zmm4, zmm2 --> vaddpd(zmm2 | k5 | T_z, zmm4, zmm2);
|
||||
vaddpd zmm2{k5}{z}, zmm4, zmm2,{rd-sae} --> vaddpd(zmm2 | k5 | T_z, zmm4, zmm2 | T_rd_sae);
|
||||
vaddpd(zmm2 | k5 | T_z | T_rd_sae, zmm4, zmm2); // the position of `|` is arbitrary.
|
||||
vcmppd k4{k3}, zmm1, zmm2, {sae}, 5 --> vcmppd(k4 | k3, zmm1, zmm2 | T_sae, 5);
|
||||
|
||||
vaddpd xmm1, xmm2, [rax+256] --> vaddpd(xmm1, xmm2, ptr [rax+256]);
|
||||
vaddpd xmm1, xmm2, [rax+256]{1to2} --> vaddpd(xmm1, xmm2, ptr_b [rax+256]);
|
||||
vaddpd ymm1, ymm2, [rax+256]{1to4} --> vaddpd(ymm1, ymm2, ptr_b [rax+256]);
|
||||
vaddpd zmm1, zmm2, [rax+256]{1to8} --> vaddpd(zmm1, zmm2, ptr_b [rax+256]);
|
||||
vaddps zmm1, zmm2, [rax+rcx*8+8]{1to16} --> vaddps(zmm1, zmm2, ptr_b [rax+rcx*8+8]);
|
||||
vmovsd [rax]{k1}, xmm4 --> vmovsd(ptr [rax] | k1, xmm4);
|
||||
|
||||
vcvtpd2dq xmm16, oword [eax+33] --> vcvtpd2dq(xmm16, xword [eax+33]); // use xword for m128 instead of oword
|
||||
vcvtpd2dq(xmm16, ptr [eax+33]); // default xword
|
||||
vcvtpd2dq xmm21, [eax+32]{1to2} --> vcvtpd2dq(xmm21, ptr_b [eax+32]);
|
||||
vcvtpd2dq xmm0, yword [eax+33] --> vcvtpd2dq(xmm0, yword [eax+33]); // use yword for m256
|
||||
vcvtpd2dq xmm19, [eax+32]{1to4} --> vcvtpd2dq(xmm19, yword_b [eax+32]); // use yword_b to broadcast
|
||||
|
||||
vfpclassps k5{k3}, zword [rax+64], 5 --> vfpclassps(k5|k3, zword [rax+64], 5); // specify m512
|
||||
vfpclasspd k5{k3}, [rax+64]{1to2}, 5 --> vfpclasspd(k5|k3, xword_b [rax+64], 5); // broadcast 64-bit to 128-bit
|
||||
vfpclassps k5{k3}, [rax+64]{1to4}, 5 --> vfpclassps(k5|k3, yword_b [rax+64], 5); // broadcast 64-bit to 256-bit
|
||||
|
||||
vpdpbusd(xm0, xm1, xm2); // default encoding is EVEX
|
||||
vpdpbusd(xm0, xm1, xm2, EvexEncoding); // same as the above
|
||||
vpdpbusd(xm0, xm1, xm2, VexEncoding); // VEX encoding
|
||||
```
|
||||
### Remark
|
||||
* `k1`, ..., `k7` are opmask registers.
|
||||
- `k0` is dealt as no mask.
|
||||
- e.g. `vmovaps(zmm0|k0, ptr[rax]);` and `vmovaps(zmm0|T_z, ptr[rax]);` are same to `vmovaps(zmm0, ptr[rax]);`.
|
||||
* use `| T_z`, `| T_sae`, `| T_rn_sae`, `| T_rd_sae`, `| T_ru_sae`, `| T_rz_sae` instead of `,{z}`, `,{sae}`, `,{rn-sae}`, `,{rd-sae}`, `,{ru-sae}`, `,{rz-sae}` respectively.
|
||||
* `k4 | k3` is different from `k3 | k4`.
|
||||
* use `ptr_b` for broadcast `{1toX}`. X is automatically determined.
|
||||
* specify `xword`/`yword`/`zword(_b)` for m128/m256/m512 if necessary.
|
||||
|
||||
## Label
|
||||
Two kinds of Label are supported. (String literal and Label class).
|
||||
|
||||
### String literal
|
||||
```
|
||||
L("L1");
|
||||
jmp("L1");
|
||||
|
||||
jmp("L2");
|
||||
...
|
||||
a few mnemonics (8-bit displacement jmp)
|
||||
...
|
||||
L("L2");
|
||||
|
||||
jmp("L3", T_NEAR);
|
||||
...
|
||||
a lot of mnemonics (32-bit displacement jmp)
|
||||
...
|
||||
L("L3");
|
||||
```
|
||||
|
||||
* Call `hasUndefinedLabel()` to verify your code has no undefined label.
|
||||
* you can use a label for immediate value of mov like as `mov(eax, "L2")`.
|
||||
|
||||
### Support `@@`, `@f`, `@b` like MASM
|
||||
|
||||
```
|
||||
L("@@"); // <A>
|
||||
jmp("@b"); // jmp to <A>
|
||||
jmp("@f"); // jmp to <B>
|
||||
L("@@"); // <B>
|
||||
jmp("@b"); // jmp to <B>
|
||||
mov(eax, "@b");
|
||||
jmp(eax); // jmp to <B>
|
||||
```
|
||||
|
||||
### Local label
|
||||
|
||||
Label symbols beginning with a period between `inLocalLabel()` and `outLocalLabel()`
|
||||
are treated as a local label.
|
||||
`inLocalLabel()` and `outLocalLabel()` can be nested.
|
||||
|
||||
```
|
||||
void func1()
|
||||
{
|
||||
inLocalLabel();
|
||||
L(".lp"); // <A> ; local label
|
||||
...
|
||||
jmp(".lp"); // jmp to <A>
|
||||
L("aaa"); // global label <C>
|
||||
outLocalLabel();
|
||||
|
||||
inLocalLabel();
|
||||
L(".lp"); // <B> ; local label
|
||||
func1();
|
||||
jmp(".lp"); // jmp to <B>
|
||||
inLocalLabel();
|
||||
jmp("aaa"); // jmp to <C>
|
||||
}
|
||||
```
|
||||
|
||||
### short and long jump
|
||||
Xbyak deals with jump mnemonics of an undefined label as short jump if no type is specified.
|
||||
So if the size between jmp and label is larger than 127 byte, then xbyak will cause an error.
|
||||
|
||||
```
|
||||
jmp("short-jmp"); // short jmp
|
||||
// small code
|
||||
L("short-jmp");
|
||||
|
||||
jmp("long-jmp");
|
||||
// long code
|
||||
L("long-jmp"); // throw exception
|
||||
```
|
||||
Then specify T_NEAR for jmp.
|
||||
```
|
||||
jmp("long-jmp", T_NEAR); // long jmp
|
||||
// long code
|
||||
L("long-jmp");
|
||||
```
|
||||
Or call `setDefaultJmpNEAR(true);` once, then the default type is set to T_NEAR.
|
||||
```
|
||||
jmp("long-jmp"); // long jmp
|
||||
// long code
|
||||
L("long-jmp");
|
||||
```
|
||||
|
||||
### Label class
|
||||
|
||||
`L()` and `jxx()` support Label class.
|
||||
|
||||
```
|
||||
Xbyak::Label label1, label2;
|
||||
L(label1);
|
||||
...
|
||||
jmp(label1);
|
||||
...
|
||||
jmp(label2);
|
||||
...
|
||||
L(label2);
|
||||
```
|
||||
|
||||
Use `putL` for jmp table
|
||||
```
|
||||
Label labelTbl, L0, L1, L2;
|
||||
mov(rax, labelTbl);
|
||||
// rdx is an index of jump table
|
||||
jmp(ptr [rax + rdx * sizeof(void*)]);
|
||||
L(labelTbl);
|
||||
putL(L0);
|
||||
putL(L1);
|
||||
putL(L2);
|
||||
L(L0);
|
||||
....
|
||||
L(L1);
|
||||
....
|
||||
```
|
||||
|
||||
`assignL(dstLabel, srcLabel)` binds dstLabel with srcLabel.
|
||||
|
||||
```
|
||||
Label label2;
|
||||
Label label1 = L(); // make label1 ; same to Label label1; L(label1);
|
||||
...
|
||||
jmp(label2); // label2 is not determined here
|
||||
...
|
||||
assignL(label2, label1); // label2 <- label1
|
||||
```
|
||||
The `jmp` in the above code jumps to label1 assigned by `assignL`.
|
||||
|
||||
**Note**:
|
||||
* srcLabel must be used in `L()`.
|
||||
* dstLabel must not be used in `L()`.
|
||||
|
||||
`Label::getAddress()` returns the address specified by the label instance and 0 if not specified.
|
||||
```
|
||||
// not AutoGrow mode
|
||||
Label label;
|
||||
assert(label.getAddress() == 0);
|
||||
L(label);
|
||||
assert(label.getAddress() == getCurr());
|
||||
```
|
||||
|
||||
### Rip ; relative addressing
|
||||
```
|
||||
Label label;
|
||||
mov(eax, ptr [rip + label]); // eax = 4
|
||||
...
|
||||
|
||||
L(label);
|
||||
dd(4);
|
||||
```
|
||||
```
|
||||
int x;
|
||||
...
|
||||
mov(eax, ptr[rip + &x]); // throw exception if the difference between &x and current position is larger than 2GiB
|
||||
```
|
||||
|
||||
## Far jump
|
||||
|
||||
Use `word|dword|qword` instead of `ptr` to specify the address size.
|
||||
|
||||
### 32 bit mode
|
||||
```
|
||||
jmp(word[eax], T_FAR); // jmp m16:16(FF /5)
|
||||
jmp(dword[eax], T_FAR); // jmp m16:32(FF /5)
|
||||
```
|
||||
|
||||
### 64 bit mode
|
||||
```
|
||||
jmp(word[rax], T_FAR); // jmp m16:16(FF /5)
|
||||
jmp(dword[rax], T_FAR); // jmp m16:32(FF /5)
|
||||
jmp(qword[rax], T_FAR); // jmp m16:64(REX.W FF /5)
|
||||
```
|
||||
The same applies to `call`.
|
||||
|
||||
## Code size
|
||||
The default max code size is 4096 bytes.
|
||||
Specify the size in constructor of `CodeGenerator()` if necessary.
|
||||
|
||||
```
|
||||
class Quantize : public Xbyak::CodeGenerator {
|
||||
public:
|
||||
Quantize()
|
||||
: CodeGenerator(8192)
|
||||
{
|
||||
}
|
||||
...
|
||||
};
|
||||
```
|
||||
|
||||
## User allocated memory
|
||||
|
||||
You can make jit code on prepared memory.
|
||||
|
||||
Call `setProtectModeRE` yourself to change memory mode if using the prepared memory.
|
||||
|
||||
```
|
||||
uint8_t alignas(4096) buf[8192]; // C++11 or later
|
||||
|
||||
struct Code : Xbyak::CodeGenerator {
|
||||
Code() : Xbyak::CodeGenerator(sizeof(buf), buf)
|
||||
{
|
||||
mov(rax, 123);
|
||||
ret();
|
||||
}
|
||||
};
|
||||
|
||||
int main()
|
||||
{
|
||||
Code c;
|
||||
c.setProtectModeRE(); // set memory to Read/Exec
|
||||
printf("%d\n", c.getCode<int(*)()>()());
|
||||
}
|
||||
```
|
||||
|
||||
**Note**: See [../sample/test0.cpp](../sample/test0.cpp).
|
||||
|
||||
### AutoGrow
|
||||
|
||||
The memory region for jit is automatically extended if necessary when `AutoGrow` is specified in a constructor of `CodeGenerator`.
|
||||
|
||||
Call `ready()` or `readyRE()` before calling `getCode()` to fix jump address.
|
||||
```
|
||||
struct Code : Xbyak::CodeGenerator {
|
||||
Code()
|
||||
: Xbyak::CodeGenerator(<default memory size>, Xbyak::AutoGrow)
|
||||
{
|
||||
...
|
||||
}
|
||||
};
|
||||
Code c;
|
||||
// generate code for jit
|
||||
c.ready(); // mode = Read/Write/Exec
|
||||
```
|
||||
|
||||
**Note**:
|
||||
* Don't use the address returned by `getCurr()` before calling `ready()` because it may be invalid address.
|
||||
|
||||
### Read/Exec mode
|
||||
Xbyak set Read/Write/Exec mode to memory to run jit code.
|
||||
If you want to use Read/Exec mode for security, then specify `DontSetProtectRWE` for `CodeGenerator` and
|
||||
call `setProtectModeRE()` after generating jit code.
|
||||
|
||||
```
|
||||
struct Code : Xbyak::CodeGenerator {
|
||||
Code()
|
||||
: Xbyak::CodeGenerator(4096, Xbyak::DontSetProtectRWE)
|
||||
{
|
||||
mov(eax, 123);
|
||||
ret();
|
||||
}
|
||||
};
|
||||
|
||||
Code c;
|
||||
c.setProtectModeRE();
|
||||
...
|
||||
|
||||
```
|
||||
Call `readyRE()` instead of `ready()` when using `AutoGrow` mode.
|
||||
See [protect-re.cpp](../sample/protect-re.cpp).
|
||||
|
||||
## Exception-less mode
|
||||
If `XBYAK_NO_EXCEPTION` is defined, then gcc/clang can compile xbyak with `-fno-exceptions`.
|
||||
In stead of throwing an exception, `Xbyak::GetError()` returns non-zero value (e.g. `ERR_BAD_ADDRESSING`) if there is something wrong.
|
||||
The status will not be changed automatically, then you should reset it by `Xbyak::ClearError()`.
|
||||
`CodeGenerator::reset()` calls `ClearError()`.
|
||||
|
||||
## Macro
|
||||
|
||||
* **XBYAK32** is defined on 32bit.
|
||||
* **XBYAK64** is defined on 64bit.
|
||||
* **XBYAK64_WIN** is defined on 64bit Windows(VC).
|
||||
* **XBYAK64_GCC** is defined on 64bit gcc, cygwin.
|
||||
* define **XBYAK_USE_OP_NAMES** on gcc with `-fno-operator-names` if you want to use `and()`, ....
|
||||
* define **XBYAK_ENABLE_OMITTED_OPERAND** if you use omitted destination such as `vaddps(xmm2, xmm3);`(deprecated in the future).
|
||||
* define **XBYAK_UNDEF_JNL** if Bessel function jnl is defined as macro.
|
||||
* define **XBYAK_NO_EXCEPTION** for a compiler option `-fno-exceptions`.
|
||||
* define **XBYAK_USE_MEMFD** on Linux then /proc/self/maps shows the area used by xbyak.
|
||||
* define **XBYAK_OLD_DISP_CHECK** if the old disp check is necessary (deprecated in the future).
|
||||
|
||||
## Sample
|
||||
|
||||
* [test0.cpp](../sample/test0.cpp) ; tiny sample (x86, x64)
|
||||
* [quantize.cpp](../sample/quantize.cpp) ; JIT optimized quantization by fast division (x86 only)
|
||||
* [calc.cpp](../sample/calc.cpp) ; assemble and estimate a given polynomial (x86, x64)
|
||||
* [bf.cpp](../sample/bf.cpp) ; JIT brainfuck (x86, x64)
|
17
externals/dynarmic/externals/xbyak/gen/Makefile
vendored
17
externals/dynarmic/externals/xbyak/gen/Makefile
vendored
|
@ -1,7 +1,7 @@
|
|||
TARGET=../xbyak/xbyak_mnemonic.h
|
||||
BIN=sortline gen_code gen_avx512
|
||||
CFLAGS=-I../ -O2 -DXBYAK_NO_OP_NAMES -Wall -Wextra -Wno-missing-field-initializers
|
||||
all: $(TARGET)
|
||||
CFLAGS=-I../ -O2 -DXBYAK_NO_OP_NAMES -Wall -Wextra -Wno-missing-field-initializers $(CXXFLAGS) $(CPPFLAGS) $(LDFLAGS)
|
||||
all: $(TARGET) ../CMakeLists.txt ../meson.build ../readme.md ../readme.txt
|
||||
sortline: sortline.cpp
|
||||
$(CXX) $(CFLAGS) $< -o $@
|
||||
gen_code: gen_code.cpp ../xbyak/xbyak.h avx_type.hpp
|
||||
|
@ -22,5 +22,18 @@ $(TARGET): $(BIN)
|
|||
echo "#endif" >> $@
|
||||
echo "#endif" >> $@
|
||||
|
||||
VER=$(shell head -n 1 ../xbyak/xbyak_mnemonic.h|grep -o "[0-9.]*")
|
||||
../CMakeLists.txt: $(TARGET)
|
||||
sed -i -e 's/CXX VERSION [0-9.]*/CXX VERSION $(VER)/' $@
|
||||
|
||||
../meson.build: $(TARGET)
|
||||
sed -i -e "s/version: '[0-9.]*',/version: '$(VER)',/" $@
|
||||
|
||||
../readme.md: $(TARGET)
|
||||
sed -l 2 -i -e "s/# Xbyak [0-9.]*/# Xbyak $(VER)/" $@
|
||||
|
||||
../readme.txt: $(TARGET)
|
||||
sed -l 2 -i -e "s/Xbyak [0-9.]*/Xbyak $(VER)/" $@
|
||||
|
||||
clean:
|
||||
$(RM) $(BIN) $(TARGET)
|
||||
|
|
|
@ -12,9 +12,10 @@
|
|||
//
|
||||
T_N_VL = 1 << 3, // N * (1, 2, 4) for VL
|
||||
T_DUP = 1 << 4, // N = (8, 32, 64)
|
||||
T_66 = 1 << 5,
|
||||
T_F3 = 1 << 6,
|
||||
T_F2 = 1 << 7,
|
||||
T_66 = 1 << 5, // pp = 1
|
||||
T_F3 = 1 << 6, // pp = 2
|
||||
T_F2 = T_66 | T_F3, // pp = 3
|
||||
T_ER_R = 1 << 7, // reg{er}
|
||||
T_0F = 1 << 8,
|
||||
T_0F38 = 1 << 9,
|
||||
T_0F3A = 1 << 10,
|
||||
|
@ -35,11 +36,18 @@
|
|||
T_MUST_EVEX = 1 << 25, // contains T_EVEX
|
||||
T_B32 = 1 << 26, // m32bcst
|
||||
T_B64 = 1 << 27, // m64bcst
|
||||
T_B16 = T_B32 | T_B64, // m16bcst
|
||||
T_M_K = 1 << 28, // mem{k}
|
||||
T_VSIB = 1 << 29,
|
||||
T_MEM_EVEX = 1 << 30, // use evex if mem
|
||||
T_FP16 = 1 << 31,
|
||||
T_MAP5 = T_FP16 | T_0F,
|
||||
T_MAP6 = T_FP16 | T_0F38,
|
||||
T_XXX
|
||||
};
|
||||
// T_66 = 1, T_F3 = 2, T_F2 = 3
|
||||
uint32_t getPP(int type) { return (type >> 5) & 3; }
|
||||
|
||||
|
||||
const int NONE = 256; // same as Xbyak::CodeGenerator::NONE
|
||||
|
||||
|
@ -62,25 +70,30 @@ std::string type2String(int type)
|
|||
if (!str.empty()) str += " | ";
|
||||
str += "T_DUP";
|
||||
}
|
||||
if (type & T_66) {
|
||||
if (!str.empty()) str += " | ";
|
||||
str += "T_66";
|
||||
}
|
||||
if (type & T_F3) {
|
||||
if (!str.empty()) str += " | ";
|
||||
str += "T_F3";
|
||||
}
|
||||
if (type & T_F2) {
|
||||
if (!str.empty()) str += " | ";
|
||||
str += "T_F2";
|
||||
switch (type & T_F2) {
|
||||
case T_66: str += "T_66"; break;
|
||||
case T_F3: str += "T_F3"; break;
|
||||
case T_F2: str += "T_F2"; break;
|
||||
default: break;
|
||||
}
|
||||
}
|
||||
if (type & T_0F) {
|
||||
if (!str.empty()) str += " | ";
|
||||
str += "T_0F";
|
||||
if (type & T_FP16) {
|
||||
str += "T_MAP5";
|
||||
} else {
|
||||
str += "T_0F";
|
||||
}
|
||||
}
|
||||
if (type & T_0F38) {
|
||||
if (!str.empty()) str += " | ";
|
||||
str += "T_0F38";
|
||||
if (type & T_FP16) {
|
||||
str += "T_MAP6";
|
||||
} else {
|
||||
str += "T_0F38";
|
||||
}
|
||||
}
|
||||
if (type & T_0F3A) {
|
||||
if (!str.empty()) str += " | ";
|
||||
|
@ -130,6 +143,10 @@ std::string type2String(int type)
|
|||
if (!str.empty()) str += " | ";
|
||||
str += "T_ER_Z";
|
||||
}
|
||||
if (type & T_ER_R) {
|
||||
if (!str.empty()) str += " | ";
|
||||
str += "T_ER_R";
|
||||
}
|
||||
if (type & T_SAE_X) {
|
||||
if (!str.empty()) str += " | ";
|
||||
str += "T_SAE_X";
|
||||
|
@ -148,9 +165,12 @@ std::string type2String(int type)
|
|||
}
|
||||
if (type & T_B32) {
|
||||
if (!str.empty()) str += " | ";
|
||||
str += "T_B32";
|
||||
}
|
||||
if (type & T_B64) {
|
||||
if (type & T_B64) {
|
||||
str += "T_B16"; // T_B16 = T_B32 | T_B64
|
||||
} else {
|
||||
str += "T_B32";
|
||||
}
|
||||
} else if (type & T_B64) {
|
||||
if (!str.empty()) str += " | ";
|
||||
str += "T_B64";
|
||||
}
|
||||
|
|
|
@ -107,6 +107,8 @@ void putVcmp()
|
|||
{ 0xC2, "vcmpps", T_0F | T_MUST_EVEX | T_EW0 | T_SAE_Z | T_YMM | T_B32, true },
|
||||
{ 0xC2, "vcmpsd", T_0F | T_MUST_EVEX | T_EW1 | T_SAE_Z | T_F2 | T_N8, true },
|
||||
{ 0xC2, "vcmpss", T_0F | T_MUST_EVEX | T_EW0 | T_SAE_Z | T_F3 | T_N4, true },
|
||||
{ 0xC2, "vcmpph", T_0F3A | T_MUST_EVEX | T_EW0 | T_SAE_Z | T_YMM | T_B16, true },
|
||||
{ 0xC2, "vcmpsh", T_F3 | T_0F3A | T_MUST_EVEX | T_EW0 | T_SAE_X | T_N2, true },
|
||||
|
||||
{ 0x74, "vpcmpeqb", T_66 | T_0F | T_MUST_EVEX | T_YMM, false },
|
||||
{ 0x75, "vpcmpeqw", T_66 | T_0F | T_MUST_EVEX | T_YMM, false },
|
||||
|
@ -144,6 +146,25 @@ void putVcmp()
|
|||
printf("void %s(const Opmask& k, const Xmm& x, const Operand& op%s) { opAVX_K_X_XM(k, x, op, %s, 0x%02X%s); }\n"
|
||||
, p->name, p->hasIMM ? ", uint8_t imm" : "", type.c_str(), p->code, p->hasIMM ? ", imm" : "");
|
||||
}
|
||||
puts("void vcomish(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_MAP5 | T_MUST_EVEX | T_EW0 | T_SAE_X | T_N2, 0x2F); }");
|
||||
puts("void vucomish(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_MAP5 | T_MUST_EVEX | T_EW0 | T_SAE_X | T_N2, 0x2E); }");
|
||||
}
|
||||
|
||||
void putVcmpAlias()
|
||||
{
|
||||
const char pred[32][16] = {
|
||||
"eq", "lt", "le", "unord", "neq", "nlt", "nle", "ord",
|
||||
"eq_uq", "nge", "ngt", "false", "neq_oq", "ge", "gt",
|
||||
"true", "eq_os", "lt_oq", "le_oq", "unord_s", "neq_us", "nlt_uq", "nle_uq", "ord_s",
|
||||
"eq_us", "nge_uq", "ngt_uq", "false_os", "neq_os", "ge_oq", "gt_oq", "true_us"
|
||||
};
|
||||
const char suf[][4] = { "pd", "ps", "sd", "ss" };
|
||||
for (int i = 0; i < 4; i++) {
|
||||
const char *s = suf[i];
|
||||
for (int j = 0; j < 32; j++) {
|
||||
printf("void vcmp%s%s(const Opmask& k, const Xmm& x, const Operand& op) { vcmp%s(k, x, op, %d); }\n", pred[j], s, s, j);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// XM_X
|
||||
|
@ -178,6 +199,14 @@ void putX_XM()
|
|||
{ 0x89, "vpexpandq", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1 | T_N8 },
|
||||
{ 0x42, "vgetexppd", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1 | T_B64 | T_SAE_Z },
|
||||
{ 0x42, "vgetexpps", T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_B32 | T_SAE_Z },
|
||||
{ 0x42, "vgetexpph", T_66 | T_MAP6 | T_MUST_EVEX | T_YMM | T_EW0 | T_B16 | T_SAE_Z },
|
||||
|
||||
{ 0x7D, "vcvtph2uw", T_MAP5 | T_MUST_EVEX | T_YMM | T_EW0 | T_B16 | T_ER_Z },
|
||||
{ 0x7D, "vcvtph2w", T_66 | T_MAP5 | T_MUST_EVEX | T_YMM | T_EW0 | T_B16 | T_ER_Z },
|
||||
{ 0x7C, "vcvttph2uw", T_MAP5 | T_MUST_EVEX | T_YMM | T_EW0 | T_B16 | T_SAE_Z },
|
||||
{ 0x7C, "vcvttph2w", T_66 | T_MAP5 | T_MUST_EVEX | T_YMM | T_EW0 | T_B16 | T_SAE_Z },
|
||||
{ 0x7D, "vcvtuw2ph", T_F2 | T_MAP5 | T_MUST_EVEX | T_YMM | T_EW0 | T_B16 | T_ER_Z },
|
||||
{ 0x7D, "vcvtw2ph", T_F3 | T_MAP5 | T_MUST_EVEX | T_YMM | T_EW0 | T_B16 | T_ER_Z },
|
||||
};
|
||||
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
|
||||
const Tbl *p = &tbl[i];
|
||||
|
@ -208,6 +237,8 @@ void putM_X()
|
|||
{ 0x7F, "vmovdqu16", T_F2 | T_0F | T_MUST_EVEX | T_YMM | T_EW1 | T_ER_X | T_ER_Y | T_ER_Z | T_M_K },
|
||||
{ 0x7F, "vmovdqu32", T_F3 | T_0F | T_MUST_EVEX | T_YMM | T_EW0 | T_ER_X | T_ER_Y | T_ER_Z | T_M_K },
|
||||
{ 0x7F, "vmovdqu64", T_F3 | T_0F | T_MUST_EVEX | T_YMM | T_EW1 | T_ER_X | T_ER_Y | T_ER_Z | T_M_K },
|
||||
{ 0x11, "vmovsh", T_F3 | T_MAP5 | T_MUST_EVEX | T_EW0 | T_N2 | T_M_K },
|
||||
{ 0x7E, "vmovw", T_66 | T_MAP5 | T_MUST_EVEX | T_N2 },
|
||||
};
|
||||
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
|
||||
const Tbl *p = &tbl[i];
|
||||
|
@ -299,8 +330,10 @@ void putX_X_XM_IMM()
|
|||
|
||||
{ 0x43, "vgetexpsd", T_66 | T_0F38 | T_MUST_EVEX | T_EW1 | T_SAE_X | T_N8, false },
|
||||
{ 0x43, "vgetexpss", T_66 | T_0F38 | T_MUST_EVEX | T_EW0 | T_SAE_X | T_N4, false },
|
||||
{ 0x43, "vgetexpsh", T_66 | T_MAP6 | T_MUST_EVEX | T_EW0 | T_SAE_X | T_N2, false },
|
||||
{ 0x27, "vgetmantsd", T_66 | T_0F3A | T_MUST_EVEX | T_EW1 | T_SAE_X | T_N8, true },
|
||||
{ 0x27, "vgetmantss", T_66 | T_0F3A | T_MUST_EVEX | T_EW0 | T_SAE_X | T_N4, true },
|
||||
{ 0x27, "vgetmantsh", T_0F3A | T_MUST_EVEX | T_EW0 | T_SAE_X | T_N2, true },
|
||||
|
||||
{ 0x54, "vfixupimmpd", T_66 | T_0F3A | T_MUST_EVEX | T_YMM | T_EW1 | T_B64 | T_SAE_Z, true },
|
||||
{ 0x54, "vfixupimmps", T_66 | T_0F3A | T_MUST_EVEX | T_YMM | T_EW0 | T_B32 | T_SAE_Z, true },
|
||||
|
@ -310,17 +343,26 @@ void putX_X_XM_IMM()
|
|||
{ 0x4D, "vrcp14sd", T_66 | T_0F38 | T_MUST_EVEX | T_EW1 | T_N8, false },
|
||||
{ 0x4D, "vrcp14ss", T_66 | T_0F38 | T_MUST_EVEX | T_EW0 | T_N4, false },
|
||||
|
||||
{ 0x4D, "vrcpsh", T_66 | T_MAP6 | T_MUST_EVEX | T_EW0 | T_N2, false },
|
||||
|
||||
{ 0x4F, "vrsqrt14sd", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_N8, false },
|
||||
{ 0x4F, "vrsqrt14ss", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_N4, false },
|
||||
|
||||
{ 0x0B, "vrndscalesd", T_66 | T_0F3A | T_MUST_EVEX | T_EW1 | T_N8, true },
|
||||
{ 0x0A, "vrndscaless", T_66 | T_0F3A | T_MUST_EVEX | T_EW0 | T_N4, true },
|
||||
{ 0x4F, "vrsqrtsh", T_66 | T_MAP6 | T_MUST_EVEX | T_EW0 | T_N2, false },
|
||||
{ 0x51, "vsqrtsh", T_F3 | T_MAP5 | T_MUST_EVEX | T_EW0 | T_ER_X | T_N2, false },
|
||||
|
||||
{ 0x0B, "vrndscalesd", T_66 | T_0F3A | T_MUST_EVEX | T_EW1 | T_N8 | T_SAE_X, true },
|
||||
{ 0x0A, "vrndscaless", T_66 | T_0F3A | T_MUST_EVEX | T_EW0 | T_N4 | T_SAE_X, true },
|
||||
{ 0x0A, "vrndscalesh", T_0F3A | T_MUST_EVEX | T_EW0 | T_N2 | T_SAE_X, true },
|
||||
|
||||
{ 0x2C, "vscalefpd", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_B64 | T_ER_Z, false },
|
||||
{ 0x2C, "vscalefps", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_B32 | T_ER_Z, false },
|
||||
{ 0x2D, "vscalefsd", T_66 | T_0F38 | T_MUST_EVEX | T_EW1 | T_ER_X | T_N8, false },
|
||||
{ 0x2D, "vscalefss", T_66 | T_0F38 | T_MUST_EVEX | T_EW0 | T_ER_X | T_N4, false },
|
||||
|
||||
{ 0x2C, "vscalefph", T_66 | T_MAP6 | T_YMM | T_MUST_EVEX | T_EW0 | T_B16 | T_ER_Z, false },
|
||||
{ 0x2D, "vscalefsh", T_66 | T_MAP6 | T_MUST_EVEX | T_EW0 | T_ER_X | T_N2, false },
|
||||
|
||||
{ 0x42, "vdbpsadbw", T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW0, true },
|
||||
{ 0x83, "vpmultishiftqb", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_B64, false },
|
||||
|
||||
|
@ -343,6 +385,7 @@ void putX_X_XM_IMM()
|
|||
|
||||
{ 0x57, "vreducesd", T_66 | T_0F3A | T_MUST_EVEX | T_EW1 | T_SAE_X | T_N8, true },
|
||||
{ 0x57, "vreducess", T_66 | T_0F3A | T_MUST_EVEX | T_EW0 | T_SAE_X | T_N4, true },
|
||||
{ 0x57, "vreducesh", T_0F3A | T_MUST_EVEX | T_EW0 | T_SAE_X | T_N2, true },
|
||||
|
||||
{ 0xB4, "vpmadd52luq", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_B64, false },
|
||||
{ 0xB5, "vpmadd52huq", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_B64, false },
|
||||
|
@ -365,6 +408,11 @@ void putX_X_XM_IMM()
|
|||
|
||||
{ 0x72, "vcvtne2ps2bf16", T_F2 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_SAE_Z | T_B32, false },
|
||||
{ 0x52, "vdpbf16ps", T_F3 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_SAE_Z | T_B32, false },
|
||||
|
||||
{ 0x5A, "vcvtsd2sh", T_F2 | T_MAP5 | T_MUST_EVEX | T_EW1 | T_ER_X | T_N8, false },
|
||||
{ 0x5A, "vcvtsh2sd", T_F3 | T_MAP5 | T_MUST_EVEX | T_EW0 | T_SAE_X | T_N2, false },
|
||||
{ 0x13, "vcvtsh2ss", T_MAP6 | T_MUST_EVEX | T_EW0 | T_SAE_X | T_N2, false },
|
||||
{ 0x1D, "vcvtss2sh", T_MAP5 | T_MUST_EVEX | T_EW0 | T_ER_X | T_N4, false },
|
||||
};
|
||||
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
|
||||
const Tbl *p = &tbl[i];
|
||||
|
@ -488,20 +536,81 @@ void putBroadcast(bool only64bit)
|
|||
|
||||
void putCvt()
|
||||
{
|
||||
puts("void vcvtpd2udq(const Xmm& x, const Operand& op) { opCvt2(x, op, T_0F | T_YMM | T_MUST_EVEX | T_EW1 | T_B64 | T_ER_Z, 0x79); }");
|
||||
puts("void vcvtps2qq(const Xmm& x, const Operand& op) { checkCvt1(x, op); opVex(x, 0, op, T_66 | T_0F | T_YMM | T_MUST_EVEX | T_EW0 | T_B32 | T_N8 | T_N_VL | T_ER_Y, 0x7B); }");
|
||||
puts("void vcvtps2uqq(const Xmm& x, const Operand& op) { checkCvt1(x, op); opVex(x, 0, op, T_66 | T_0F | T_YMM | T_MUST_EVEX | T_EW0 | T_B32 | T_N8 | T_N_VL | T_ER_Y, 0x79); }");
|
||||
puts("void vcvtqq2ps(const Xmm& x, const Operand& op) { opCvt2(x, op, T_0F | T_YMM | T_MUST_EVEX | T_EW1 | T_B64 | T_ER_Z, 0x5B); }");
|
||||
puts("void vcvttpd2udq(const Xmm& x, const Operand& op) { opCvt2(x, op, T_0F | T_YMM | T_MUST_EVEX | T_EW1 | T_B64 | T_SAE_Z, 0x78); }");
|
||||
puts("void vcvttps2qq(const Xmm& x, const Operand& op) { checkCvt1(x, op); opVex(x, 0, op, T_66 | T_0F | T_YMM | T_MUST_EVEX | T_EW0 | T_B32 | T_N8 | T_N_VL | T_SAE_Y, 0x7A); }");
|
||||
puts("void vcvttps2uqq(const Xmm& x, const Operand& op) { checkCvt1(x, op); opVex(x, 0, op, T_66 | T_0F | T_YMM | T_MUST_EVEX | T_EW0 | T_B32 | T_N8 | T_N_VL | T_SAE_Y, 0x78); }");
|
||||
puts("void vcvtudq2pd(const Xmm& x, const Operand& op) { checkCvt1(x, op); opVex(x, 0, op, T_F3 | T_0F | T_YMM | T_MUST_EVEX | T_EW0 | T_B32 | T_N8 | T_N_VL, 0x7A); }");
|
||||
const struct Tbl {
|
||||
uint8_t code;
|
||||
const char *name;
|
||||
int type;
|
||||
int ptn;
|
||||
} tbl[] = {
|
||||
{ 0x79, "vcvtsd2usi", T_F2 | T_0F | T_MUST_EVEX | T_N8 | T_ER_X, 0 },
|
||||
{ 0x79, "vcvtss2usi", T_F3 | T_0F | T_MUST_EVEX | T_N4 | T_ER_X, 0 },
|
||||
{ 0x78, "vcvttsd2usi", T_F2 | T_0F | T_MUST_EVEX | T_N8 | T_SAE_X, 0 },
|
||||
{ 0x78, "vcvttss2usi", T_F3 | T_0F | T_MUST_EVEX | T_N4 | T_SAE_X, 0 },
|
||||
{ 0x2D, "vcvtsh2si", T_F3 | T_MAP5 | T_MUST_EVEX | T_N2 | T_ER_X, 0 },
|
||||
{ 0x79, "vcvtsh2usi", T_F3 | T_MAP5 | T_MUST_EVEX | T_N2 | T_ER_X, 0 },
|
||||
{ 0x2C, "vcvttsh2si", T_F3 | T_MAP5 | T_MUST_EVEX | T_EW0 | T_N2 | T_SAE_X, 0 },
|
||||
{ 0x78, "vcvttsh2usi", T_F3 | T_MAP5 | T_MUST_EVEX | T_EW0 | T_N2 | T_SAE_X, 0 },
|
||||
|
||||
puts("void vcvtsd2usi(const Reg32e& r, const Operand& op) { int type = (T_F2 | T_0F | T_MUST_EVEX | T_N8 | T_ER_X) | (r.isREG(64) ? T_EW1 : T_EW0); opAVX_X_X_XM(Xmm(r.getIdx()), xm0, op, type, 0x79); }");
|
||||
puts("void vcvtss2usi(const Reg32e& r, const Operand& op) { int type = (T_F3 | T_0F | T_MUST_EVEX | T_N4 | T_ER_X) | (r.isREG(64) ? T_EW1 : T_EW0); opAVX_X_X_XM(Xmm(r.getIdx()), xm0, op, type, 0x79); }");
|
||||
puts("void vcvttsd2usi(const Reg32e& r, const Operand& op) { int type = (T_F2 | T_0F | T_MUST_EVEX | T_N8 | T_SAE_X) | (r.isREG(64) ? T_EW1 : T_EW0); opAVX_X_X_XM(Xmm(r.getIdx()), xm0, op, type, 0x78); }");
|
||||
puts("void vcvttss2usi(const Reg32e& r, const Operand& op) { int type = (T_F3 | T_0F | T_MUST_EVEX | T_N4 | T_SAE_X) | (r.isREG(64) ? T_EW1 : T_EW0); opAVX_X_X_XM(Xmm(r.getIdx()), xm0, op, type, 0x78); }");
|
||||
puts("void vcvtuqq2ps(const Xmm& x, const Operand& op) { opCvt2(x, op, T_F2 | T_0F | T_YMM | T_MUST_EVEX | T_EW1 | T_B64 | T_ER_Z, 0x7A); }");
|
||||
{ 0x7B, "vcvtps2qq", T_66 | T_0F | T_YMM | T_MUST_EVEX | T_EW0 | T_B32 | T_N8 | T_N_VL | T_ER_Y, 1 },
|
||||
{ 0x79, "vcvtps2uqq", T_66 | T_0F | T_YMM | T_MUST_EVEX | T_EW0 | T_B32 | T_N8 | T_N_VL | T_ER_Y, 1 },
|
||||
{ 0x7A, "vcvttps2qq", T_66 | T_0F | T_YMM | T_MUST_EVEX | T_EW0 | T_B32 | T_N8 | T_N_VL | T_SAE_Y, 1 },
|
||||
{ 0x78, "vcvttps2uqq", T_66 | T_0F | T_YMM | T_MUST_EVEX | T_EW0 | T_B32 | T_N8 | T_N_VL | T_SAE_Y, 1 },
|
||||
{ 0x7A, "vcvtudq2pd", T_F3 | T_0F | T_YMM | T_MUST_EVEX | T_EW0 | T_B32 | T_N8 | T_N_VL, 1 },
|
||||
{ 0x5B, "vcvtph2dq", T_66 | T_MAP5 | T_YMM | T_MUST_EVEX | T_EW0 | T_B16 | T_ER_Y | T_N8 | T_N_VL, 1 },
|
||||
{ 0x13, "vcvtph2psx", T_66 | T_MAP6 | T_YMM | T_MUST_EVEX | T_EW0 | T_B16 | T_SAE_Y | T_N8 | T_N_VL, 1 },
|
||||
{ 0x79, "vcvtph2udq", T_MAP5 | T_YMM | T_MUST_EVEX | T_EW0 | T_B16 | T_ER_Y | T_N8 | T_N_VL, 1 },
|
||||
{ 0x5B, "vcvttph2dq", T_F3 | T_MAP5 | T_YMM | T_MUST_EVEX | T_EW0 | T_B16 | T_SAE_Y | T_N8 | T_N_VL, 1 },
|
||||
{ 0x78, "vcvttph2udq", T_MAP5 | T_YMM | T_MUST_EVEX | T_EW0 | T_B16 | T_SAE_Y | T_N8 | T_N_VL, 1 },
|
||||
|
||||
{ 0x79, "vcvtpd2udq", T_0F | T_YMM | T_MUST_EVEX | T_EW1 | T_B64 | T_ER_Z, 2 },
|
||||
{ 0x5B, "vcvtqq2ps", T_0F | T_YMM | T_MUST_EVEX | T_EW1 | T_B64 | T_ER_Z, 2 },
|
||||
{ 0x78, "vcvttpd2udq", T_0F | T_YMM | T_MUST_EVEX | T_EW1 | T_B64 | T_SAE_Z, 2 },
|
||||
{ 0x7A, "vcvtuqq2ps", T_F2 | T_0F | T_YMM | T_MUST_EVEX | T_EW1 | T_B64 | T_ER_Z, 2 },
|
||||
|
||||
{ 0x5A, "vcvtph2pd", T_MAP5 | T_MUST_EVEX | T_YMM | T_EW0 | T_B16 | T_N4 | T_N_VL | T_SAE_X, 3 },
|
||||
{ 0x7B, "vcvtph2qq", T_66 | T_MAP5 | T_MUST_EVEX | T_YMM | T_EW0 | T_B16 | T_N4 | T_N_VL | T_ER_X, 3 },
|
||||
{ 0x79, "vcvtph2uqq", T_66 | T_MAP5 | T_MUST_EVEX | T_YMM | T_EW0 | T_B16 | T_N4 | T_N_VL | T_ER_X, 3 },
|
||||
{ 0x78, "vcvttph2uqq", T_66 | T_MAP5 | T_MUST_EVEX | T_YMM | T_EW0 | T_B16 | T_N4 | T_N_VL | T_SAE_X, 3 },
|
||||
{ 0x7A, "vcvttph2qq", T_66 | T_MAP5 | T_MUST_EVEX | T_YMM | T_EW0 | T_B16 | T_N4 | T_N_VL | T_SAE_X, 3 },
|
||||
|
||||
{ 0x5B, "vcvtdq2ph", T_MAP5 | T_YMM | T_MUST_EVEX | T_EW0 | T_B32 | T_ER_Z | T_N16 | T_N_VL, 4 },
|
||||
{ 0x1D, "vcvtps2phx", T_66 | T_MAP5 | T_MUST_EVEX | T_EW0 | T_B32 | T_ER_Z | T_N16 | T_N_VL, 4 },
|
||||
{ 0x7A, "vcvtudq2ph", T_F2 | T_MAP5 | T_MUST_EVEX | T_EW0 | T_B32 | T_ER_Z | T_N16 | T_N_VL, 4 },
|
||||
|
||||
{ 0x5A, "vcvtpd2ph", T_66 | T_MAP5 | T_MUST_EVEX | T_EW1 | T_B64 | T_ER_Z | T_N16 | T_N_VL, 5 },
|
||||
{ 0x5B, "vcvtqq2ph", T_MAP5 | T_MUST_EVEX | T_EW1 | T_B64 | T_ER_Z | T_N16 | T_N_VL, 5 },
|
||||
{ 0x7A, "vcvtuqq2ph", T_F2 | T_MAP5 | T_MUST_EVEX | T_EW1 | T_B64 | T_ER_Z | T_N16 | T_N_VL, 5 },
|
||||
|
||||
{ 0x2A, "vcvtsi2sh", T_F3 | T_MAP5 | T_MUST_EVEX | T_ER_R | T_M_K, 6 },
|
||||
{ 0x7B, "vcvtusi2sh", T_F3 | T_MAP5 | T_MUST_EVEX | T_ER_R | T_M_K, 6 },
|
||||
};
|
||||
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
|
||||
const Tbl& p = tbl[i];
|
||||
std::string type = type2String(p.type);
|
||||
switch (p.ptn) {
|
||||
case 0:
|
||||
printf("void %s(const Reg32e& r, const Operand& op) { int type = (%s) | (r.isREG(64) ? T_EW1 : T_EW0); opVex(r, &xm0, op, type, 0x%02X); }\n", p.name, type.c_str(), p.code);
|
||||
break;
|
||||
case 1:
|
||||
printf("void %s(const Xmm& x, const Operand& op) { checkCvt1(x, op); opVex(x, 0, op, %s, 0x%02X); }\n", p.name, type.c_str(), p.code);
|
||||
break;
|
||||
case 2:
|
||||
printf("void %s(const Xmm& x, const Operand& op) { opCvt2(x, op, %s, 0x%02X); }\n", p.name, type.c_str(), p.code);
|
||||
break;
|
||||
case 3:
|
||||
printf("void %s(const Xmm& x, const Operand& op) { if (!op.isXMM() && !op.isMEM()) XBYAK_THROW(ERR_BAD_MEM_SIZE) opVex(x, 0, op, %s, 0x%02X); }\n", p.name, type.c_str(), p.code);
|
||||
break;
|
||||
case 4:
|
||||
printf("void %s(const Xmm& x, const Operand& op) { checkCvt4(x, op); opCvt(x, op, %s, 0x%02X); }\n", p.name, type.c_str(), p.code);
|
||||
break;
|
||||
case 5:
|
||||
printf("void %s(const Xmm& x, const Operand& op) { opCvt5(x, op, %s, 0x%02X); }\n", p.name, type.c_str(), p.code);
|
||||
break;
|
||||
case 6:
|
||||
printf("void %s(const Xmm& x1, const Xmm& x2, const Operand& op) { if (!(x1.isXMM() && x2.isXMM() && op.isBit(32|64))) XBYAK_THROW(ERR_BAD_COMBINATION) int type = (%s) | (op.isBit(32) ? (T_EW0 | T_N4) : (T_EW1 | T_N8)); opVex(x1, &x2, op, type, 0x%02X); }\n", p.name, type.c_str(), p.code);
|
||||
break;
|
||||
}
|
||||
}
|
||||
puts("void vcvtusi2sd(const Xmm& x1, const Xmm& x2, const Operand& op) { opCvt3(x1, x2, op, T_F2 | T_0F | T_MUST_EVEX, T_W1 | T_EW1 | T_ER_X | T_N8, T_W0 | T_EW0 | T_N4, 0x7B); }");
|
||||
puts("void vcvtusi2ss(const Xmm& x1, const Xmm& x2, const Operand& op) { opCvt3(x1, x2, op, T_F3 | T_0F | T_MUST_EVEX | T_ER_X, T_W1 | T_EW1 | T_N8, T_W0 | T_EW0 | T_N4, 0x7B); }");
|
||||
}
|
||||
|
@ -628,14 +737,21 @@ void putX_XM_IMM()
|
|||
} tbl[] = {
|
||||
{ 0x26, "vgetmantpd", T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW1 | T_B64 | T_SAE_Z, true },
|
||||
{ 0x26, "vgetmantps", T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW0 | T_B32 | T_SAE_Z, true },
|
||||
{ 0x26, "vgetmantph", T_0F3A | T_YMM | T_MUST_EVEX | T_EW0 | T_B16 | T_SAE_Z, true },
|
||||
{ 0x4C, "vrcp14pd", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_B64, false },
|
||||
{ 0x4C, "vrcp14ps", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_B32, false },
|
||||
|
||||
{ 0x4C, "vrcpph", T_66 | T_MAP6 | T_MUST_EVEX | T_YMM | T_EW0 | T_B16, false },
|
||||
|
||||
{ 0x4E, "vrsqrt14pd", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_B64, false },
|
||||
{ 0x4E, "vrsqrt14ps", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_B32, false },
|
||||
|
||||
{ 0x09, "vrndscalepd", T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW1 | T_B64, true },
|
||||
{ 0x08, "vrndscaleps", T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW0 | T_B32, true },
|
||||
{ 0x4E, "vrsqrtph", T_66 | T_MAP6 | T_YMM | T_MUST_EVEX | T_EW0 | T_B16, false },
|
||||
{ 0x51, "vsqrtph", T_MAP5| T_YMM | T_MUST_EVEX | T_EW0 | T_ER_Z | T_B16, false },
|
||||
|
||||
{ 0x09, "vrndscalepd", T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW1 | T_B64 | T_SAE_Z, true },
|
||||
{ 0x08, "vrndscaleps", T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW0 | T_B32 | T_SAE_Z, true },
|
||||
{ 0x08, "vrndscaleph", T_0F3A | T_YMM | T_MUST_EVEX | T_EW0 | T_B16 | T_SAE_Z, true },
|
||||
|
||||
{ 0xC4, "vpconflictd", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_B32, false },
|
||||
{ 0xC4, "vpconflictq", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_B64, false },
|
||||
|
@ -645,6 +761,7 @@ void putX_XM_IMM()
|
|||
|
||||
{ 0x56, "vreducepd", T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW1 | T_B64 | T_SAE_Z, true },
|
||||
{ 0x56, "vreduceps", T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW0 | T_B32 | T_SAE_Z, true },
|
||||
{ 0x56, "vreduceph", T_0F3A | T_YMM | T_MUST_EVEX | T_EW0 | T_B16 | T_SAE_Z, true },
|
||||
|
||||
{ 0x54, "vpopcntb", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_SAE_Z, false },
|
||||
{ 0x54, "vpopcntw", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_SAE_Z, false },
|
||||
|
@ -704,8 +821,10 @@ void putMisc()
|
|||
|
||||
puts("void vfpclasspd(const Opmask& k, const Operand& op, uint8_t imm) { if (!op.isBit(128|256|512)) XBYAK_THROW(ERR_BAD_MEM_SIZE) opVex(k.changeBit(op.getBit()), 0, op, T_66 | T_0F3A | T_MUST_EVEX | T_YMM | T_EW1 | T_B64, 0x66, imm); }");
|
||||
puts("void vfpclassps(const Opmask& k, const Operand& op, uint8_t imm) { if (!op.isBit(128|256|512)) XBYAK_THROW(ERR_BAD_MEM_SIZE) opVex(k.changeBit(op.getBit()), 0, op, T_66 | T_0F3A | T_MUST_EVEX | T_YMM | T_EW0 | T_B32, 0x66, imm); }");
|
||||
puts("void vfpclassph(const Opmask& k, const Operand& op, uint8_t imm) { if (!op.isBit(128|256|512)) XBYAK_THROW(ERR_BAD_MEM_SIZE) opVex(k.changeBit(op.getBit()), 0, op, T_0F3A | T_MUST_EVEX | T_YMM | T_EW0 | T_B16, 0x66, imm); }");
|
||||
puts("void vfpclasssd(const Opmask& k, const Operand& op, uint8_t imm) { if (!op.isXMEM()) XBYAK_THROW(ERR_BAD_MEM_SIZE) opVex(k, 0, op, T_66 | T_0F3A | T_MUST_EVEX | T_EW1 | T_N8, 0x67, imm); }");
|
||||
puts("void vfpclassss(const Opmask& k, const Operand& op, uint8_t imm) { if (!op.isXMEM()) XBYAK_THROW(ERR_BAD_MEM_SIZE) opVex(k, 0, op, T_66 | T_0F3A | T_MUST_EVEX | T_EW0 | T_N4, 0x67, imm); }");
|
||||
puts("void vfpclasssh(const Opmask& k, const Operand& op, uint8_t imm) { if (!op.isXMEM()) XBYAK_THROW(ERR_BAD_MEM_SIZE) opVex(k, 0, op, T_0F3A | T_MUST_EVEX | T_EW0 | T_N2, 0x67, imm); }");
|
||||
|
||||
puts("void vpshufbitqmb(const Opmask& k, const Xmm& x, const Operand& op) { opVex(k, &x, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX, 0x8F); }");
|
||||
puts("void vcvtneps2bf16(const Xmm& x, const Operand& op) { opCvt2(x, op, T_F3 | T_0F38 | T_EW0 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B32, 0x72); }");
|
||||
|
@ -724,6 +843,126 @@ void putV4FMA()
|
|||
puts("void vp4dpwssds(const Zmm& z1, const Zmm& z2, const Address& addr) { opAVX_X_X_XM(z1, z2, addr, T_0F38 | T_F2 | T_EW0 | T_YMM | T_MUST_EVEX | T_N16, 0x53); }");
|
||||
}
|
||||
|
||||
void putFP16_1()
|
||||
{
|
||||
const struct Tbl {
|
||||
uint8_t code;
|
||||
const char *name;
|
||||
} tbl[] = {
|
||||
{ 0x58, "add" },
|
||||
{ 0x5C, "sub" },
|
||||
{ 0x59, "mul" },
|
||||
{ 0x5E, "div" },
|
||||
{ 0x5F, "max" },
|
||||
{ 0x5D, "min" },
|
||||
};
|
||||
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
|
||||
const Tbl *p = &tbl[i];
|
||||
printf("void v%sph(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_MAP5 | T_EW0 | T_YMM | T_MUST_EVEX | T_ER_Z | T_B16, 0x%02X); }\n", p->name, p->code);
|
||||
printf("void v%ssh(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_MAP5 | T_F3 | T_EW0 | T_MUST_EVEX | T_ER_X | T_N2, 0x%02X); }\n", p->name, p->code);
|
||||
}
|
||||
}
|
||||
|
||||
void putFP16_FMA()
|
||||
{
|
||||
const struct Tbl {
|
||||
uint8_t code;
|
||||
const char *name;
|
||||
bool isPH;
|
||||
} tbl[] = {
|
||||
{ 0x06, "vfmaddsub", true },
|
||||
{ 0x07, "vfmsubadd", true },
|
||||
{ 0x08, "vfmadd", true },
|
||||
{ 0x0C, "vfnmadd", true },
|
||||
{ 0x0A, "vfmsub", true },
|
||||
{ 0x0E, "vfnmsub", true },
|
||||
{ 0x09, "vfmadd", false },
|
||||
{ 0x0D, "vfnmadd", false },
|
||||
{ 0x0B, "vfmsub", false },
|
||||
{ 0x0F, "vfnmsub", false },
|
||||
};
|
||||
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
|
||||
for (int k = 0; k < 3; k++) {
|
||||
const struct Ord {
|
||||
const char *str;
|
||||
uint8_t code;
|
||||
} ord[] = {
|
||||
{ "132", 0x90 },
|
||||
{ "213", 0xA0 },
|
||||
{ "231", 0xB0 },
|
||||
};
|
||||
int t = T_66 | T_MAP6 | T_EW0 | T_MUST_EVEX;
|
||||
const char *suf = 0;
|
||||
if (tbl[i].isPH) {
|
||||
t |= T_ER_Z | T_YMM | T_B16;
|
||||
suf = "ph";
|
||||
} else {
|
||||
t |= T_ER_X | T_N2;
|
||||
suf = "sh";
|
||||
}
|
||||
std::string type = type2String(t);
|
||||
printf("void %s%s%s(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, %s, 0x%02X); }\n"
|
||||
, tbl[i].name, ord[k].str, suf, type.c_str(), tbl[i].code | ord[k].code);
|
||||
}
|
||||
}
|
||||
}
|
||||
void putFP16_FMA2()
|
||||
{
|
||||
const struct Tbl {
|
||||
uint8_t code;
|
||||
const char *name;
|
||||
bool isPH;
|
||||
} tbl[] = {
|
||||
{ 0x56, "maddc", true },
|
||||
{ 0xD6, "mulc", true },
|
||||
};
|
||||
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
|
||||
for (int j = 0; j < 2; j++) {
|
||||
int t = T_MAP6 | T_EW0 | T_MUST_EVEX;
|
||||
if (j == 0) {
|
||||
t |= T_F2;
|
||||
} else {
|
||||
t |= T_F3;
|
||||
}
|
||||
const char *suf = 0;
|
||||
if (tbl[i].isPH) {
|
||||
t |= T_ER_Z | T_YMM | T_B32;
|
||||
suf = "ph";
|
||||
} else {
|
||||
t |= T_ER_X | T_N2;
|
||||
suf = "sh";
|
||||
}
|
||||
std::string type = type2String(t);
|
||||
printf("void vf%s%s%s(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, %s, 0x%02X); }\n"
|
||||
, j == 0 ? "c" : "", tbl[i].name, suf, type.c_str(), tbl[i].code);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void putFP16_2()
|
||||
{
|
||||
{
|
||||
int t = T_F3 | T_MAP5 | T_MUST_EVEX | T_EW0 | T_N2;
|
||||
std::string type = type2String(t);
|
||||
printf("void vmovsh(const Xmm& x, const Address& addr) { opAVX_X_X_XM(x, xm0, addr, %s, 0x10); }\n", type.c_str());
|
||||
printf("void vmovsh(const Xmm& x1, const Xmm& x2, const Xmm& x3) { opAVX_X_X_XM(x1, x2, x3, %s, 0x10); }\n", type.c_str());
|
||||
}
|
||||
{
|
||||
int t = T_66 | T_MAP5 | T_MUST_EVEX | T_N2;
|
||||
std::string type = type2String(t);
|
||||
printf("void vmovw(const Xmm& x, const Operand& op) { if (!op.isREG(32|64) && !op.isMEM()) XBYAK_THROW(ERR_BAD_COMBINATION) opAVX_X_X_XM(x, xm0, op, %s, 0x6E); }\n", type.c_str());
|
||||
printf("void vmovw(const Reg32e& r, const Xmm& x) { opAVX_X_X_XM(x, xm0, r, %s, 0x7E); }\n", type.c_str());
|
||||
}
|
||||
}
|
||||
|
||||
void putFP16()
|
||||
{
|
||||
putFP16_1();
|
||||
putFP16_FMA();
|
||||
putFP16_FMA2();
|
||||
putFP16_2();
|
||||
}
|
||||
|
||||
int main(int argc, char *[])
|
||||
{
|
||||
bool only64bit = argc == 2;
|
||||
|
@ -733,6 +972,7 @@ int main(int argc, char *[])
|
|||
return 0;
|
||||
}
|
||||
putVcmp();
|
||||
putVcmpAlias();
|
||||
putX_XM();
|
||||
putM_X();
|
||||
putXM_X();
|
||||
|
@ -747,4 +987,5 @@ int main(int argc, char *[])
|
|||
putMisc();
|
||||
putScatter();
|
||||
putV4FMA();
|
||||
putFP16();
|
||||
}
|
||||
|
|
|
@ -26,6 +26,7 @@ struct GenericTbl {
|
|||
uint8_t code1;
|
||||
uint8_t code2;
|
||||
uint8_t code3;
|
||||
uint8_t code4;
|
||||
};
|
||||
|
||||
void putGeneric(const GenericTbl *p, size_t n)
|
||||
|
@ -34,6 +35,7 @@ void putGeneric(const GenericTbl *p, size_t n)
|
|||
printf("void %s() { db(0x%02X); ", p->name, p->code1);
|
||||
if (p->code2) printf("db(0x%02X); ", p->code2);
|
||||
if (p->code3) printf("db(0x%02X); ", p->code3);
|
||||
if (p->code4) printf("db(0x%02X); ", p->code4);
|
||||
printf("}\n");
|
||||
p++;
|
||||
}
|
||||
|
@ -250,7 +252,7 @@ void put()
|
|||
char buf[16];
|
||||
unsigned int v = VERSION;
|
||||
if (v & 0xF) {
|
||||
snprintf(buf, sizeof(buf), "%d.%02X%x", v >> 12, (v >> 4) & 0xFF, v & 0xF);
|
||||
snprintf(buf, sizeof(buf), "%d.%02X.%x", v >> 12, (v >> 4) & 0xFF, v & 0xF);
|
||||
} else {
|
||||
snprintf(buf, sizeof(buf), "%d.%02X", v >> 12, (v >> 4) & 0xFF);
|
||||
}
|
||||
|
@ -661,6 +663,9 @@ void put()
|
|||
{ "cmpsb", 0xA6 },
|
||||
{ "cmpsw", 0x66, 0xA7 },
|
||||
{ "cmpsd", 0xA7 },
|
||||
{ "endbr32", 0xF3, 0x0F, 0x1E, 0xFB },
|
||||
{ "endbr64", 0xF3, 0x0F, 0x1E, 0xFA },
|
||||
{ "hlt", 0xF4 },
|
||||
{ "int3", 0xCC },
|
||||
{ "scasb", 0xAE },
|
||||
{ "scasw", 0x66, 0xAF },
|
||||
|
@ -1040,11 +1045,14 @@ void put()
|
|||
puts("void lea(const Reg& reg, const Address& addr) { if (!reg.isBit(16 | i32e)) XBYAK_THROW(ERR_BAD_SIZE_OF_REGISTER) opModM(addr, reg, 0x8D); }");
|
||||
puts("void bswap(const Reg32e& reg) { opModR(Reg32(1), reg, 0x0F); }");
|
||||
puts("void ret(int imm = 0) { if (imm) { db(0xC2); dw(imm); } else { db(0xC3); } }");
|
||||
puts("void retf(int imm = 0) { if (imm) { db(0xCA); dw(imm); } else { db(0xCB); } }");
|
||||
|
||||
puts("void xadd(const Operand& op, const Reg& reg) { opModRM(reg, op, (op.isREG() && reg.isREG() && op.getBit() == reg.getBit()), op.isMEM(), 0x0F, 0xC0 | (reg.isBit(8) ? 0 : 1)); }");
|
||||
puts("void cmpxchg(const Operand& op, const Reg& reg) { opModRM(reg, op, (op.isREG() && reg.isREG() && op.getBit() == reg.getBit()), op.isMEM(), 0x0F, 0xB0 | (reg.isBit(8) ? 0 : 1)); }");
|
||||
puts("void movbe(const Reg& reg, const Address& addr) { opModM(addr, reg, 0x0F, 0x38, 0xF0); }");
|
||||
puts("void movbe(const Address& addr, const Reg& reg) { opModM(addr, reg, 0x0F, 0x38, 0xF1); }");
|
||||
puts("void movdiri(const Address& addr, const Reg32e& reg) { opModM(addr, reg, 0x0F, 0x38, 0xF9); }");
|
||||
puts("void movdir64b(const Reg& reg, const Address& addr) { db(0x66); opModM(addr, reg.cvt32(), 0x0F, 0x38, 0xF8); }");
|
||||
puts("void adcx(const Reg32e& reg, const Operand& op) { opGen(reg, op, 0xF6, 0x66, isREG32_REG32orMEM, NONE, 0x38); }");
|
||||
puts("void adox(const Reg32e& reg, const Operand& op) { opGen(reg, op, 0xF6, 0xF3, isREG32_REG32orMEM, NONE, 0x38); }");
|
||||
puts("void cmpxchg8b(const Address& addr) { opModM(addr, Reg32(1), 0x0F, 0xC7); }");
|
||||
|
@ -1079,6 +1087,11 @@ void put()
|
|||
puts("void rdrand(const Reg& r) { if (r.isBit(8)) XBYAK_THROW(ERR_BAD_SIZE_OF_REGISTER) opModR(Reg(6, Operand::REG, r.getBit()), r, 0x0F, 0xC7); }");
|
||||
puts("void rdseed(const Reg& r) { if (r.isBit(8)) XBYAK_THROW(ERR_BAD_SIZE_OF_REGISTER) opModR(Reg(7, Operand::REG, r.getBit()), r, 0x0F, 0xC7); }");
|
||||
puts("void crc32(const Reg32e& reg, const Operand& op) { if (reg.isBit(32) && op.isBit(16)) db(0x66); db(0xF2); opModRM(reg, op, op.isREG(), op.isMEM(), 0x0F, 0x38, 0xF0 | (op.isBit(8) ? 0 : 1)); }");
|
||||
puts("void tpause(const Reg32& r) { int idx = r.getIdx(); if (idx > 7) XBYAK_THROW(ERR_BAD_PARAMETER) db(0x66); db(0x0F); db(0xAE); setModRM(3, 6, idx); }");
|
||||
puts("void umonitor(const Reg& r) { int idx = r.getIdx(); if (idx > 7) XBYAK_THROW(ERR_BAD_PARAMETER) int bit = r.getBit(); if (BIT != bit) { if ((BIT == 32 && bit == 16) || (BIT == 64 && bit == 32)) { db(0x67); } else { XBYAK_THROW(ERR_BAD_SIZE_OF_REGISTER) } } db(0xF3); db(0x0F); db(0xAE); setModRM(3, 6, idx); }");
|
||||
puts("void umwait(const Reg32& r) { int idx = r.getIdx(); if (idx > 7) XBYAK_THROW(ERR_BAD_PARAMETER) db(0xF2); db(0x0F); db(0xAE); setModRM(3, 6, idx); }");
|
||||
puts("void clwb(const Address& addr) { db(0x66); opMIB(addr, esi, 0x0F, 0xAE); }");
|
||||
puts("void cldemote(const Address& addr) { opMIB(addr, eax, 0x0F, 0x1C); }");
|
||||
}
|
||||
{
|
||||
const struct Tbl {
|
||||
|
@ -1207,8 +1220,8 @@ void put()
|
|||
printf("void v%spd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_66 | T_EW1 | T_YMM | T_EVEX | T_ER_Z | T_B64, 0x%02X); }\n", p->name, p->code);
|
||||
printf("void v%sps(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_EW0 | T_YMM | T_EVEX | T_ER_Z | T_B32, 0x%02X); }\n", p->name, p->code);
|
||||
if (p->only_pd_ps) continue;
|
||||
printf("void v%ssd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_F2 | T_EW1 | T_EVEX | T_ER_Z | T_N8, 0x%02X); }\n", p->name, p->code);
|
||||
printf("void v%sss(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_F3 | T_EW0 | T_EVEX | T_ER_Z | T_N4, 0x%02X); }\n", p->name, p->code);
|
||||
printf("void v%ssd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_F2 | T_EW1 | T_EVEX | T_ER_X | T_N8, 0x%02X); }\n", p->name, p->code);
|
||||
printf("void v%sss(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_F3 | T_EW0 | T_EVEX | T_ER_X | T_N4, 0x%02X); }\n", p->name, p->code);
|
||||
}
|
||||
}
|
||||
putX_X_XM(false);
|
||||
|
@ -1299,7 +1312,8 @@ void put()
|
|||
if (p->mode & 1) {
|
||||
const char *immS1 = p->hasIMM ? ", uint8_t imm" : "";
|
||||
const char *immS2 = p->hasIMM ? ", imm" : ", NONE";
|
||||
const char *pref = p->type & T_66 ? "0x66" : p->type & T_F2 ? "0xF2" : p->type & T_F3 ? "0xF3" : "NONE";
|
||||
const char *prefTbl[5] = { "NONE", "0x66", "0xF3", "0xF2" };
|
||||
const char *pref = prefTbl[getPP(p->type)];
|
||||
const char *suf = p->type & T_0F38 ? "0x38" : p->type & T_0F3A ? "0x3A" : "NONE";
|
||||
printf("void %s(const Xmm& xmm, const Operand& op%s) { opGen(xmm, op, 0x%02X, %s, isXMM_XMMorMEM%s, %s); }\n", p->name, immS1, p->code, pref, immS2, suf);
|
||||
}
|
||||
|
@ -1350,11 +1364,12 @@ void put()
|
|||
{ 0xDE, "aesdec", T_0F38 | T_66 | T_YMM | T_EVEX, 3 },
|
||||
{ 0xDF, "aesdeclast", T_0F38 | T_66 | T_YMM | T_EVEX, 3 },
|
||||
};
|
||||
const uint8_t ppTbl[] = { 0, 0x66, 0xf3, 0xf2 };
|
||||
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
|
||||
const Tbl *p = &tbl[i];
|
||||
std::string type = type2String(p->type);
|
||||
if (p->mode & 1) {
|
||||
uint8_t pref = p->type & T_66 ? 0x66 : p->type & T_F2 ? 0xF2 : p->type & T_F3 ? 0xF3 : 0;
|
||||
uint8_t pref = ppTbl[getPP(p->type)];
|
||||
printf("void %s(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x%02X, 0x%02X, isXMM_XMMorMEM%s); }\n", p->name, p->code, pref, p->type & T_0F38 ? ", NONE, 0x38" : "");
|
||||
}
|
||||
if (p->mode & 2) {
|
||||
|
@ -1648,7 +1663,7 @@ void put()
|
|||
puts("void vcvttpd2dq(const Xmm& x, const Operand& op) { opCvt2(x, op, T_66 | T_0F | T_YMM | T_EVEX |T_EW1 | T_B64 | T_ER_Z, 0xE6); }");
|
||||
|
||||
puts("void vcvtph2ps(const Xmm& x, const Operand& op) { checkCvt1(x, op); opVex(x, 0, op, T_0F38 | T_66 | T_W0 | T_EVEX | T_EW0 | T_N8 | T_N_VL | T_SAE_Y, 0x13); }");
|
||||
puts("void vcvtps2ph(const Operand& op, const Xmm& x, uint8_t imm) { checkCvt1(x, op); opVex(x, 0, op, T_0F3A | T_66 | T_W0 | T_EVEX | T_EW0 | T_N8 | T_N_VL | T_SAE_Y, 0x1D, imm); }");
|
||||
puts("void vcvtps2ph(const Operand& op, const Xmm& x, uint8_t imm) { checkCvt1(x, op); opVex(x, 0, op, T_0F3A | T_66 | T_W0 | T_EVEX | T_EW0 | T_N8 | T_N_VL | T_SAE_Y | T_M_K, 0x1D, imm); }");
|
||||
|
||||
}
|
||||
// haswell gpr(reg, reg, r/m)
|
||||
|
|
45
externals/dynarmic/externals/xbyak/meson.build
vendored
Executable file
45
externals/dynarmic/externals/xbyak/meson.build
vendored
Executable file
|
@ -0,0 +1,45 @@
|
|||
# SPDX-FileCopyrightText: 2021 Andrea Pappacoda
|
||||
#
|
||||
# SPDX-License-Identifier: BSD-3-Clause
|
||||
|
||||
project(
|
||||
'xbyak',
|
||||
'cpp',
|
||||
version: '6.60.1',
|
||||
license: 'BSD-3-Clause',
|
||||
default_options: 'b_ndebug=if-release'
|
||||
)
|
||||
|
||||
install_subdir('xbyak', install_dir: get_option('includedir'))
|
||||
|
||||
xbyak_dep = declare_dependency(include_directories: include_directories('.'))
|
||||
|
||||
if meson.version().version_compare('>=0.54.0')
|
||||
meson.override_dependency(meson.project_name(), xbyak_dep)
|
||||
endif
|
||||
|
||||
import('pkgconfig').generate(
|
||||
name: meson.project_name(),
|
||||
description: 'JIT assembler for x86(IA32), x64(AMD64, x86-64)',
|
||||
version: meson.project_version(),
|
||||
url: 'https://github.com/herumi/xbyak'
|
||||
)
|
||||
|
||||
if meson.version().version_compare('>=0.50.0')
|
||||
cmake = import('cmake')
|
||||
|
||||
cmake.write_basic_package_version_file(
|
||||
name: meson.project_name(),
|
||||
version: meson.project_version()
|
||||
)
|
||||
|
||||
cmake_conf = configuration_data()
|
||||
cmake_conf.set('TARGET_NAME', meson.project_name() + '::' + meson.project_name())
|
||||
cmake_conf.set('ABSOLUTE_INCLUDE_DIR', get_option('prefix')/get_option('includedir'))
|
||||
|
||||
cmake.configure_package_config_file(
|
||||
name: meson.project_name(),
|
||||
input: 'cmake'/'meson-config.cmake.in',
|
||||
configuration: cmake_conf
|
||||
)
|
||||
endif
|
619
externals/dynarmic/externals/xbyak/readme.md
vendored
619
externals/dynarmic/externals/xbyak/readme.md
vendored
|
@ -1,6 +1,13 @@
|
|||
[![Build Status](https://github.com/herumi/xbyak/actions/workflows/main.yml/badge.svg)](https://github.com/herumi/xbyak/actions/workflows/main.yml)
|
||||
|
||||
# Xbyak 5.991 ; JIT assembler for x86(IA32), x64(AMD64, x86-64) by C++
|
||||
# Xbyak 6.60.1 [![Badge Build]][Build Status]
|
||||
|
||||
*A C++ JIT assembler for x86 (IA32), x64 (AMD64, x86-64)*
|
||||
|
||||
## Menu
|
||||
|
||||
- [Install]
|
||||
- [Usage]
|
||||
- [Changelog]
|
||||
|
||||
## Abstract
|
||||
|
||||
|
@ -10,15 +17,23 @@ The pronunciation of Xbyak is `kəi-bja-k`.
|
|||
It is named from a Japanese word [開闢](https://translate.google.com/?hl=ja&sl=ja&tl=en&text=%E9%96%8B%E9%97%A2&op=translate), which means the beginning of the world.
|
||||
|
||||
## Feature
|
||||
* header file only
|
||||
* Intel/MASM like syntax
|
||||
* fully support AVX-512
|
||||
|
||||
- header file only
|
||||
- Intel/MASM like syntax
|
||||
- fully support AVX-512
|
||||
|
||||
**Note**:
|
||||
Use `and_()`, `or_()`, ... instead of `and()`, `or()`.
|
||||
If you want to use them, then specify `-fno-operator-names` option to gcc/clang.
|
||||
|
||||
### News
|
||||
|
||||
- add movdiri, movdir64b, clwb, cldemote
|
||||
- WAITPKG instructions (tpause, umonitor, umwait) are supported.
|
||||
- MmapAllocator supports memfd with user-defined strings. see sample/memfd.cpp
|
||||
- strictly check address offset disp32 in a signed 32-bit integer. e.g., `ptr[(void*)0xffffffff]` causes an error.
|
||||
- define `XBYAK_OLD_DISP_CHECK` if you need an old check, but the option will be remoevd.
|
||||
- add `jmp(mem, T_FAR)`, `call(mem, T_FAR)` `retf()` for far absolute indirect jump.
|
||||
- vnni instructions such as vpdpbusd supports vex encoding.
|
||||
- (break backward compatibility) `push(byte, imm)` (resp. `push(word, imm)`) forces to cast `imm` to 8(resp. 16) bit.
|
||||
- (Windows) `#include <winsock2.h>` has been removed from xbyak.h, so add it explicitly if you need it.
|
||||
|
@ -27,590 +42,34 @@ If you want to use them, then specify `-fno-operator-names` option to gcc/clang.
|
|||
|
||||
### Supported OS
|
||||
|
||||
* Windows Xp, Vista, Windows 7, Windows 10(32bit, 64bit)
|
||||
* Linux(32bit, 64bit)
|
||||
* Intel macOS
|
||||
- Windows (Xp, Vista, 7, 10, 11) (32 / 64 bit)
|
||||
- Linux (32 / 64 bit)
|
||||
- macOS (Intel CPU)
|
||||
|
||||
### Supported Compilers
|
||||
|
||||
Almost C++03 or later compilers for x86/x64 such as Visual Studio, g++, clang++, Intel C++ compiler and g++ on mingw/cygwin.
|
||||
|
||||
## Install
|
||||
|
||||
The following files are necessary. Please add the path to your compile directory.
|
||||
|
||||
* xbyak.h
|
||||
* xbyak_mnemonic.h
|
||||
* xbyak_util.h
|
||||
|
||||
Linux:
|
||||
```
|
||||
make install
|
||||
```
|
||||
|
||||
These files are copied into `/usr/local/include/xbyak`.
|
||||
|
||||
## How to use it
|
||||
|
||||
Inherit `Xbyak::CodeGenerator` class and make the class method.
|
||||
```
|
||||
#include <xbyak/xbyak.h>
|
||||
|
||||
struct Code : Xbyak::CodeGenerator {
|
||||
Code(int x)
|
||||
{
|
||||
mov(eax, x);
|
||||
ret();
|
||||
}
|
||||
};
|
||||
```
|
||||
Or you can pass the instance of CodeGenerator without inheriting.
|
||||
```
|
||||
void genCode(Xbyak::CodeGenerator& code, int x) {
|
||||
using namespace Xbyak::util;
|
||||
code.mov(eax, x);
|
||||
code.ret();
|
||||
}
|
||||
```
|
||||
|
||||
Make an instance of the class and get the function
|
||||
pointer by calling `getCode()` and call it.
|
||||
```
|
||||
Code c(5);
|
||||
int (*f)() = c.getCode<int (*)()>();
|
||||
printf("ret=%d\n", f()); // ret = 5
|
||||
```
|
||||
|
||||
## Syntax
|
||||
Similar to MASM/NASM syntax with parentheses.
|
||||
|
||||
```
|
||||
NASM Xbyak
|
||||
mov eax, ebx --> mov(eax, ebx);
|
||||
inc ecx inc(ecx);
|
||||
ret --> ret();
|
||||
```
|
||||
|
||||
## Addressing
|
||||
Use `qword`, `dword`, `word` and `byte` if it is necessary to specify the size of memory,
|
||||
otherwise use `ptr`.
|
||||
|
||||
```
|
||||
(ptr|qword|dword|word|byte) [base + index * (1|2|4|8) + displacement]
|
||||
[rip + 32bit disp] ; x64 only
|
||||
|
||||
NASM Xbyak
|
||||
mov eax, [ebx+ecx] --> mov(eax, ptr [ebx+ecx]);
|
||||
mov al, [ebx+ecx] --> mov(al, ptr [ebx + ecx]);
|
||||
test byte [esp], 4 --> test(byte [esp], 4);
|
||||
inc qword [rax] --> inc(qword [rax]);
|
||||
```
|
||||
**Note**: `qword`, ... are member variables, then don't use `dword` as unsigned int type.
|
||||
|
||||
### How to use Selector (Segment Register)
|
||||
```
|
||||
mov eax, [fs:eax] --> putSeg(fs);
|
||||
mov(eax, ptr [eax]);
|
||||
mov ax, cs --> mov(ax, cs);
|
||||
```
|
||||
**Note**: Segment class is not derived from `Operand`.
|
||||
|
||||
## AVX
|
||||
|
||||
```
|
||||
vaddps(xmm1, xmm2, xmm3); // xmm1 <- xmm2 + xmm3
|
||||
vaddps(xmm2, xmm3, ptr [rax]); // use ptr to access memory
|
||||
vgatherdpd(xmm1, ptr [ebp + 256 + xmm2*4], xmm3);
|
||||
```
|
||||
|
||||
**Note**:
|
||||
If `XBYAK_ENABLE_OMITTED_OPERAND` is defined, then you can use two operand version for backward compatibility.
|
||||
But the newer version will not support it.
|
||||
```
|
||||
vaddps(xmm2, xmm3); // xmm2 <- xmm2 + xmm3
|
||||
```
|
||||
|
||||
## AVX-512
|
||||
|
||||
```
|
||||
vaddpd zmm2, zmm5, zmm30 --> vaddpd(zmm2, zmm5, zmm30);
|
||||
vaddpd xmm30, xmm20, [rax] --> vaddpd(xmm30, xmm20, ptr [rax]);
|
||||
vaddps xmm30, xmm20, [rax] --> vaddps(xmm30, xmm20, ptr [rax]);
|
||||
vaddpd zmm2{k5}, zmm4, zmm2 --> vaddpd(zmm2 | k5, zmm4, zmm2);
|
||||
vaddpd zmm2{k5}{z}, zmm4, zmm2 --> vaddpd(zmm2 | k5 | T_z, zmm4, zmm2);
|
||||
vaddpd zmm2{k5}{z}, zmm4, zmm2,{rd-sae} --> vaddpd(zmm2 | k5 | T_z, zmm4, zmm2 | T_rd_sae);
|
||||
vaddpd(zmm2 | k5 | T_z | T_rd_sae, zmm4, zmm2); // the position of `|` is arbitrary.
|
||||
vcmppd k4{k3}, zmm1, zmm2, {sae}, 5 --> vcmppd(k4 | k3, zmm1, zmm2 | T_sae, 5);
|
||||
|
||||
vaddpd xmm1, xmm2, [rax+256] --> vaddpd(xmm1, xmm2, ptr [rax+256]);
|
||||
vaddpd xmm1, xmm2, [rax+256]{1to2} --> vaddpd(xmm1, xmm2, ptr_b [rax+256]);
|
||||
vaddpd ymm1, ymm2, [rax+256]{1to4} --> vaddpd(ymm1, ymm2, ptr_b [rax+256]);
|
||||
vaddpd zmm1, zmm2, [rax+256]{1to8} --> vaddpd(zmm1, zmm2, ptr_b [rax+256]);
|
||||
vaddps zmm1, zmm2, [rax+rcx*8+8]{1to16} --> vaddps(zmm1, zmm2, ptr_b [rax+rcx*8+8]);
|
||||
vmovsd [rax]{k1}, xmm4 --> vmovsd(ptr [rax] | k1, xmm4);
|
||||
|
||||
vcvtpd2dq xmm16, oword [eax+33] --> vcvtpd2dq(xmm16, xword [eax+33]); // use xword for m128 instead of oword
|
||||
vcvtpd2dq(xmm16, ptr [eax+33]); // default xword
|
||||
vcvtpd2dq xmm21, [eax+32]{1to2} --> vcvtpd2dq(xmm21, ptr_b [eax+32]);
|
||||
vcvtpd2dq xmm0, yword [eax+33] --> vcvtpd2dq(xmm0, yword [eax+33]); // use yword for m256
|
||||
vcvtpd2dq xmm19, [eax+32]{1to4} --> vcvtpd2dq(xmm19, yword_b [eax+32]); // use yword_b to broadcast
|
||||
|
||||
vfpclassps k5{k3}, zword [rax+64], 5 --> vfpclassps(k5|k3, zword [rax+64], 5); // specify m512
|
||||
vfpclasspd k5{k3}, [rax+64]{1to2}, 5 --> vfpclasspd(k5|k3, xword_b [rax+64], 5); // broadcast 64-bit to 128-bit
|
||||
vfpclassps k5{k3}, [rax+64]{1to4}, 5 --> vfpclassps(k5|k3, yword_b [rax+64], 5); // broadcast 64-bit to 256-bit
|
||||
|
||||
vpdpbusd(xm0, xm1, xm2); // default encoding is EVEX
|
||||
vpdpbusd(xm0, xm1, xm2, EvexEncoding); // same as the above
|
||||
vpdpbusd(xm0, xm1, xm2, VexEncoding); // VEX encoding
|
||||
```
|
||||
### Remark
|
||||
* `k1`, ..., `k7` are opmask registers.
|
||||
- `k0` is dealt as no mask.
|
||||
- e.g. `vmovaps(zmm0|k0, ptr[rax]);` and `vmovaps(zmm0|T_z, ptr[rax]);` are same to `vmovaps(zmm0, ptr[rax]);`.
|
||||
* use `| T_z`, `| T_sae`, `| T_rn_sae`, `| T_rd_sae`, `| T_ru_sae`, `| T_rz_sae` instead of `,{z}`, `,{sae}`, `,{rn-sae}`, `,{rd-sae}`, `,{ru-sae}`, `,{rz-sae}` respectively.
|
||||
* `k4 | k3` is different from `k3 | k4`.
|
||||
* use `ptr_b` for broadcast `{1toX}`. X is automatically determined.
|
||||
* specify `xword`/`yword`/`zword(_b)` for m128/m256/m512 if necessary.
|
||||
|
||||
## Label
|
||||
Two kinds of Label are supported. (String literal and Label class).
|
||||
|
||||
### String literal
|
||||
```
|
||||
L("L1");
|
||||
jmp("L1");
|
||||
|
||||
jmp("L2");
|
||||
...
|
||||
a few mnemonics (8-bit displacement jmp)
|
||||
...
|
||||
L("L2");
|
||||
|
||||
jmp("L3", T_NEAR);
|
||||
...
|
||||
a lot of mnemonics (32-bit displacement jmp)
|
||||
...
|
||||
L("L3");
|
||||
```
|
||||
|
||||
* Call `hasUndefinedLabel()` to verify your code has no undefined label.
|
||||
* you can use a label for immediate value of mov like as `mov(eax, "L2")`.
|
||||
|
||||
### Support `@@`, `@f`, `@b` like MASM
|
||||
|
||||
```
|
||||
L("@@"); // <A>
|
||||
jmp("@b"); // jmp to <A>
|
||||
jmp("@f"); // jmp to <B>
|
||||
L("@@"); // <B>
|
||||
jmp("@b"); // jmp to <B>
|
||||
mov(eax, "@b");
|
||||
jmp(eax); // jmp to <B>
|
||||
```
|
||||
|
||||
### Local label
|
||||
|
||||
Label symbols beginning with a period between `inLocalLabel()` and `outLocalLabel()`
|
||||
are treated as a local label.
|
||||
`inLocalLabel()` and `outLocalLabel()` can be nested.
|
||||
|
||||
```
|
||||
void func1()
|
||||
{
|
||||
inLocalLabel();
|
||||
L(".lp"); // <A> ; local label
|
||||
...
|
||||
jmp(".lp"); // jmp to <A>
|
||||
L("aaa"); // global label <C>
|
||||
outLocalLabel();
|
||||
|
||||
inLocalLabel();
|
||||
L(".lp"); // <B> ; local label
|
||||
func1();
|
||||
jmp(".lp"); // jmp to <B>
|
||||
inLocalLabel();
|
||||
jmp("aaa"); // jmp to <C>
|
||||
}
|
||||
```
|
||||
|
||||
### short and long jump
|
||||
Xbyak deals with jump mnemonics of an undefined label as short jump if no type is specified.
|
||||
So if the size between jmp and label is larger than 127 byte, then xbyak will cause an error.
|
||||
|
||||
```
|
||||
jmp("short-jmp"); // short jmp
|
||||
// small code
|
||||
L("short-jmp");
|
||||
|
||||
jmp("long-jmp");
|
||||
// long code
|
||||
L("long-jmp"); // throw exception
|
||||
```
|
||||
Then specify T_NEAR for jmp.
|
||||
```
|
||||
jmp("long-jmp", T_NEAR); // long jmp
|
||||
// long code
|
||||
L("long-jmp");
|
||||
```
|
||||
Or call `setDefaultJmpNEAR(true);` once, then the default type is set to T_NEAR.
|
||||
```
|
||||
jmp("long-jmp"); // long jmp
|
||||
// long code
|
||||
L("long-jmp");
|
||||
```
|
||||
|
||||
### Label class
|
||||
|
||||
`L()` and `jxx()` support Label class.
|
||||
|
||||
```
|
||||
Xbyak::Label label1, label2;
|
||||
L(label1);
|
||||
...
|
||||
jmp(label1);
|
||||
...
|
||||
jmp(label2);
|
||||
...
|
||||
L(label2);
|
||||
```
|
||||
|
||||
Use `putL` for jmp table
|
||||
```
|
||||
Label labelTbl, L0, L1, L2;
|
||||
mov(rax, labelTbl);
|
||||
// rdx is an index of jump table
|
||||
jmp(ptr [rax + rdx * sizeof(void*)]);
|
||||
L(labelTbl);
|
||||
putL(L0);
|
||||
putL(L1);
|
||||
putL(L2);
|
||||
L(L0);
|
||||
....
|
||||
L(L1);
|
||||
....
|
||||
```
|
||||
|
||||
`assignL(dstLabel, srcLabel)` binds dstLabel with srcLabel.
|
||||
|
||||
```
|
||||
Label label2;
|
||||
Label label1 = L(); // make label1 ; same to Label label1; L(label1);
|
||||
...
|
||||
jmp(label2); // label2 is not determined here
|
||||
...
|
||||
assignL(label2, label1); // label2 <- label1
|
||||
```
|
||||
The `jmp` in the above code jumps to label1 assigned by `assignL`.
|
||||
|
||||
**Note**:
|
||||
* srcLabel must be used in `L()`.
|
||||
* dstLabel must not be used in `L()`.
|
||||
|
||||
`Label::getAddress()` returns the address specified by the label instance and 0 if not specified.
|
||||
```
|
||||
// not AutoGrow mode
|
||||
Label label;
|
||||
assert(label.getAddress() == 0);
|
||||
L(label);
|
||||
assert(label.getAddress() == getCurr());
|
||||
```
|
||||
|
||||
### Rip ; relative addressing
|
||||
```
|
||||
Label label;
|
||||
mov(eax, ptr [rip + label]); // eax = 4
|
||||
...
|
||||
|
||||
L(label);
|
||||
dd(4);
|
||||
```
|
||||
```
|
||||
int x;
|
||||
...
|
||||
mov(eax, ptr[rip + &x]); // throw exception if the difference between &x and current position is larger than 2GiB
|
||||
```
|
||||
|
||||
## Code size
|
||||
The default max code size is 4096 bytes.
|
||||
Specify the size in constructor of `CodeGenerator()` if necessary.
|
||||
|
||||
```
|
||||
class Quantize : public Xbyak::CodeGenerator {
|
||||
public:
|
||||
Quantize()
|
||||
: CodeGenerator(8192)
|
||||
{
|
||||
}
|
||||
...
|
||||
};
|
||||
```
|
||||
|
||||
## User allocated memory
|
||||
|
||||
You can make jit code on prepared memory.
|
||||
|
||||
Call `setProtectModeRE` yourself to change memory mode if using the prepared memory.
|
||||
|
||||
```
|
||||
uint8_t alignas(4096) buf[8192]; // C++11 or later
|
||||
|
||||
struct Code : Xbyak::CodeGenerator {
|
||||
Code() : Xbyak::CodeGenerator(sizeof(buf), buf)
|
||||
{
|
||||
mov(rax, 123);
|
||||
ret();
|
||||
}
|
||||
};
|
||||
|
||||
int main()
|
||||
{
|
||||
Code c;
|
||||
c.setProtectModeRE(); // set memory to Read/Exec
|
||||
printf("%d\n", c.getCode<int(*)()>()());
|
||||
}
|
||||
```
|
||||
|
||||
**Note**: See [sample/test0.cpp](sample/test0.cpp).
|
||||
|
||||
### AutoGrow
|
||||
|
||||
The memory region for jit is automatically extended if necessary when `AutoGrow` is specified in a constructor of `CodeGenerator`.
|
||||
|
||||
Call `ready()` or `readyRE()` before calling `getCode()` to fix jump address.
|
||||
```
|
||||
struct Code : Xbyak::CodeGenerator {
|
||||
Code()
|
||||
: Xbyak::CodeGenerator(<default memory size>, Xbyak::AutoGrow)
|
||||
{
|
||||
...
|
||||
}
|
||||
};
|
||||
Code c;
|
||||
// generate code for jit
|
||||
c.ready(); // mode = Read/Write/Exec
|
||||
```
|
||||
|
||||
**Note**:
|
||||
* Don't use the address returned by `getCurr()` before calling `ready()` because it may be invalid address.
|
||||
|
||||
### Read/Exec mode
|
||||
Xbyak set Read/Write/Exec mode to memory to run jit code.
|
||||
If you want to use Read/Exec mode for security, then specify `DontSetProtectRWE` for `CodeGenerator` and
|
||||
call `setProtectModeRE()` after generating jit code.
|
||||
|
||||
```
|
||||
struct Code : Xbyak::CodeGenerator {
|
||||
Code()
|
||||
: Xbyak::CodeGenerator(4096, Xbyak::DontSetProtectRWE)
|
||||
{
|
||||
mov(eax, 123);
|
||||
ret();
|
||||
}
|
||||
};
|
||||
|
||||
Code c;
|
||||
c.setProtectModeRE();
|
||||
...
|
||||
|
||||
```
|
||||
Call `readyRE()` instead of `ready()` when using `AutoGrow` mode.
|
||||
See [protect-re.cpp](sample/protect-re.cpp).
|
||||
|
||||
## Exception-less mode
|
||||
If `XBYAK_NO_EXCEPTION` is defined, then gcc/clang can compile xbyak with `-fno-exceptions`.
|
||||
In stead of throwing an exception, `Xbyak::GetError()` returns non-zero value (e.g. `ERR_BAD_ADDRESSING`) if there is something wrong.
|
||||
The status will not be changed automatically, then you should reset it by `Xbyak::ClearError()`.
|
||||
`CodeGenerator::reset()` calls `ClearError()`.
|
||||
|
||||
## Macro
|
||||
|
||||
* **XBYAK32** is defined on 32bit.
|
||||
* **XBYAK64** is defined on 64bit.
|
||||
* **XBYAK64_WIN** is defined on 64bit Windows(VC).
|
||||
* **XBYAK64_GCC** is defined on 64bit gcc, cygwin.
|
||||
* define **XBYAK_USE_OP_NAMES** on gcc with `-fno-operator-names` if you want to use `and()`, ....
|
||||
* define **XBYAK_ENABLE_OMITTED_OPERAND** if you use omitted destination such as `vaddps(xmm2, xmm3);`(deprecated in the future).
|
||||
* define **XBYAK_UNDEF_JNL** if Bessel function jnl is defined as macro.
|
||||
* define **XBYAK_NO_EXCEPTION** for a compiler option `-fno-exceptions`.
|
||||
|
||||
## Sample
|
||||
|
||||
* [test0.cpp](sample/test0.cpp) ; tiny sample (x86, x64)
|
||||
* [quantize.cpp](sample/quantize.cpp) ; JIT optimized quantization by fast division (x86 only)
|
||||
* [calc.cpp](sample/calc.cpp) ; assemble and estimate a given polynomial (x86, x64)
|
||||
* [bf.cpp](sample/bf.cpp) ; JIT brainfuck (x86, x64)
|
||||
|
||||
## License
|
||||
|
||||
modified new BSD License
|
||||
http://opensource.org/licenses/BSD-3-Clause
|
||||
|
||||
## History
|
||||
* 2020/Nov/16 ver 5.991 disable constexpr for gcc-5 with -std=c++-14
|
||||
* 2020/Oct/19 ver 5.99 support VNNI instructions(Thanks to akharito)
|
||||
* 2020/Oct/17 ver 5.98 support the form of [scale * reg]
|
||||
* 2020/Sep/08 ver 5.97 replace uint32 with uint32_t etc.
|
||||
* 2020/Aug/28 ver 5.95 some constructors of register classes support constexpr if C++14 or later
|
||||
* 2020/Aug/04 ver 5.941 `CodeGenerator::reset()` calls `ClearError()`.
|
||||
* 2020/Jul/28 ver 5.94 remove #include <winsock2.h> (only windows)
|
||||
* 2020/Jul/21 ver 5.93 support exception-less mode
|
||||
* 2020/Jun/30 ver 5.92 support Intel AMX instruction set (Thanks to nshustrov)
|
||||
* 2020/Jun/22 ver 5.913 fix mov(r64, imm64) on 32-bit env with XBYAK64
|
||||
* 2020/Jun/19 ver 5.912 define MAP_JIT on macOS regardless of Xcode version (Thanks to rsdubtso)
|
||||
* 2020/May/10 ver 5.911 XBYAK_USE_MMAP_ALLOCATOR is defined unless XBYAK_DONT_USE_MMAP_ALLOCATOR is defined.
|
||||
* 2020/Apr/20 ver 5.91 accept mask register k0 (it means no mask)
|
||||
* 2020/Apr/09 ver 5.90 kmov{b,d,w,q} throws exception for an unsupported register
|
||||
* 2020/Feb/26 ver 5.891 fix typo of type
|
||||
* 2020/Jan/03 ver 5.89 fix error of vfpclasspd
|
||||
* 2019/Dec/20 ver 5.88 fix compile error on Windows
|
||||
* 2019/Dec/19 ver 5.87 add setDefaultJmpNEAR(), which deals with `jmp` of an undefined label as T_NEAR if no type is specified.
|
||||
* 2019/Dec/13 ver 5.86 [changed] revert to the behavior before v5.84 if -fno-operator-names is defined (and() is available)
|
||||
* 2019/Dec/07 ver 5.85 append MAP_JIT flag to mmap for macOS mojave or later
|
||||
* 2019/Nov/29 ver 5.84 [changed] XBYAK_NO_OP_NAMES is defined unless XBYAK_USE_OP_NAMES is defined
|
||||
* 2019/Oct/12 ver 5.83 exit(1) was removed
|
||||
* 2019/Sep/23 ver 5.82 support monitorx, mwaitx, clzero (thanks to @MagurosanTeam)
|
||||
* 2019/Sep/14 ver 5.81 support some generic mnemonics.
|
||||
* 2019/Aug/01 ver 5.802 fix detection of AVX512_BF16 (thanks to vpirogov)
|
||||
* 2019/May/27 support vp2intersectd, vp2intersectq (not tested)
|
||||
* 2019/May/26 ver 5.80 support vcvtne2ps2bf16, vcvtneps2bf16, vdpbf16ps
|
||||
* 2019/Apr/27 ver 5.79 vcmppd/vcmpps supports ptr_b(thanks to jkopinsky)
|
||||
* 2019/Apr/15 ver 5.78 rewrite Reg::changeBit() (thanks to MerryMage)
|
||||
* 2019/Mar/06 ver 5.77 fix number of cores that share LLC cache by densamoilov
|
||||
* 2019/Jan/17 ver 5.76 add Cpu::getNumCores() by shelleygoel
|
||||
* 2018/Oct/31 ver 5.751 recover Xbyak::CastTo for compatibility
|
||||
* 2018/Oct/29 ver 5.75 unlink LabelManager from Label when msg is destroyed
|
||||
* 2018/Oct/21 ver 5.74 support RegRip +/- int. Xbyak::CastTo is removed
|
||||
* 2018/Oct/15 util::AddressFrame uses push/pop instead of mov
|
||||
* 2018/Sep/19 ver 5.73 fix evex encoding of vpslld, vpslldq, vpsllw, etc for (reg, mem, imm8)
|
||||
* 2018/Sep/19 ver 5.72 fix the encoding of vinsertps for disp8N(Thanks to petercaday)
|
||||
* 2018/Sep/04 ver 5.71 L() returns a new label instance
|
||||
* 2018/Aug/27 ver 5.70 support setProtectMode() and DontUseProtect for read/exec setting
|
||||
* 2018/Aug/24 ver 5.68 fix wrong VSIB encoding with vector index >= 16(thanks to petercaday)
|
||||
* 2018/Aug/14 ver 5.67 remove mutable in Address ; fix setCacheHierarchy for cloud vm
|
||||
* 2018/Jul/26 ver 5.661 support mingw64
|
||||
* 2018/Jul/24 ver 5.66 add CodeArray::PROTECT_RE to mode of protect()
|
||||
* 2018/Jun/26 ver 5.65 fix push(qword [mem])
|
||||
* 2018/Mar/07 ver 5.64 fix zero division in Cpu() on some cpu
|
||||
* 2018/Feb/14 ver 5.63 fix Cpu::setCacheHierarchy() and fix EvexModifierZero for clang<3.9(thanks to mgouicem)
|
||||
* 2018/Feb/13 ver 5.62 Cpu::setCacheHierarchy() by mgouicem and rsdubtso
|
||||
* 2018/Feb/07 ver 5.61 vmov* supports mem{k}{z}(I forgot it)
|
||||
* 2018/Jan/24 ver 5.601 add xword, yword, etc. into Xbyak::util namespace
|
||||
* 2018/Jan/05 ver 5.60 support AVX-512 for Ice lake(319433-030.pdf)
|
||||
* 2017/Aug/22 ver 5.53 fix mpx encoding, add bnd() prefix
|
||||
* 2017/Aug/18 ver 5.52 fix align (thanks to MerryMage)
|
||||
* 2017/Aug/17 ver 5.51 add multi-byte nop and align() uses it(thanks to inolen)
|
||||
* 2017/Aug/08 ver 5.50 add mpx(thanks to magurosan)
|
||||
* 2017/Aug/08 ver 5.45 add sha(thanks to magurosan)
|
||||
* 2017/Aug/08 ver 5.44 add prefetchw(thanks to rsdubtso)
|
||||
* 2017/Jul/12 ver 5.432 reduce warnings of PVS studio
|
||||
* 2017/Jul/09 ver 5.431 fix hasRex() (no affect) (thanks to drillsar)
|
||||
* 2017/May/14 ver 5.43 fix CodeGenerator::resetSize() (thanks to gibbed)
|
||||
* 2017/May/13 ver 5.42 add movs{b,w,d,q}
|
||||
* 2017/Jan/26 ver 5.41 add prefetchwt1 and support for scale == 0(thanks to rsdubtso)
|
||||
* 2016/Dec/14 ver 5.40 add Label::getAddress() method to get the pointer specified by the label
|
||||
* 2016/Dec/09 ver 5.34 fix handling of negative offsets when encoding disp8N(thanks to rsdubtso)
|
||||
* 2016/Dec/08 ver 5.33 fix encoding of vpbroadcast{b,w,d,q}, vpinsr{b,w}, vpextr{b,w} for disp8N
|
||||
* 2016/Dec/01 ver 5.32 rename __xgetbv() to _xgetbv() to support clang for Visual Studio(thanks to freiro)
|
||||
* 2016/Nov/27 ver 5.31 rename AVX512_4VNNI to AVX512_4VNNIW
|
||||
* 2016/Nov/27 ver 5.30 add AVX512_4VNNI, AVX512_4FMAPS instructions(thanks to rsdubtso)
|
||||
* 2016/Nov/26 ver 5.20 add detection of AVX512_4VNNI and AVX512_4FMAPS(thanks to rsdubtso)
|
||||
* 2016/Nov/20 ver 5.11 lost vptest for ymm(thanks to gregory38)
|
||||
* 2016/Nov/20 ver 5.10 add addressing [rip+&var]
|
||||
* 2016/Sep/29 ver 5.03 fix detection ERR_INVALID_OPMASK_WITH_MEMORY(thanks to PVS-Studio)
|
||||
* 2016/Aug/15 ver 5.02 xbyak does not include xbyak_bin2hex.h
|
||||
* 2016/Aug/15 ver 5.011 fix detection of version of gcc 5.4
|
||||
* 2016/Aug/03 ver 5.01 disable omitted operand
|
||||
* 2016/Jun/24 ver 5.00 support avx-512 instruction set
|
||||
* 2016/Jun/13 avx-512 add mask instructions
|
||||
* 2016/May/05 ver 4.91 add detection of AVX-512 to Xbyak::util::Cpu
|
||||
* 2016/Mar/14 ver 4.901 comment to ready() function(thanks to skmp)
|
||||
* 2016/Feb/04 ver 4.90 add jcc(const void *addr);
|
||||
* 2016/Jan/30 ver 4.89 vpblendvb supports ymm reg(thanks to John Funnell)
|
||||
* 2016/Jan/24 ver 4.88 lea, cmov supports 16-bit register(thanks to whyisthisfieldhere)
|
||||
* 2015/Oct/05 ver 4.87 support segment selectors
|
||||
* 2015/Aug/18 ver 4.86 fix [rip + label] addressing with immediate value(thanks to whyisthisfieldhere)
|
||||
* 2015/Aug/10 ver 4.85 Address::operator==() is not correct(thanks to inolen)
|
||||
* 2015/Jun/22 ver 4.84 call() support variadic template if available(thanks to randomstuff)
|
||||
* 2015/Jun/16 ver 4.83 support movbe(thanks to benvanik)
|
||||
* 2015/May/24 ver 4.82 support detection of F16C
|
||||
* 2015/Apr/25 ver 4.81 fix the condition to throw exception for setSize(thanks to whyisthisfieldhere)
|
||||
* 2015/Apr/22 ver 4.80 rip supports label(thanks to whyisthisfieldhere)
|
||||
* 2015/Jar/28 ver 4.71 support adcx, adox, cmpxchg, rdseed, stac
|
||||
* 2014/Oct/14 ver 4.70 support MmapAllocator
|
||||
* 2014/Jun/13 ver 4.62 disable warning of VC2014
|
||||
* 2014/May/30 ver 4.61 support bt, bts, btr, btc
|
||||
* 2014/May/28 ver 4.60 support vcvtph2ps, vcvtps2ph
|
||||
* 2014/Apr/11 ver 4.52 add detection of rdrand
|
||||
* 2014/Mar/25 ver 4.51 remove state information of unreferenced labels
|
||||
* 2014/Mar/16 ver 4.50 support new Label
|
||||
* 2014/Mar/05 ver 4.40 fix wrong detection of BMI/enhanced rep on VirtualBox
|
||||
* 2013/Dec/03 ver 4.30 support Reg::cvt8(), cvt16(), cvt32(), cvt64()
|
||||
* 2013/Oct/16 ver 4.21 label support std::string
|
||||
* 2013/Jul/30 ver 4.20 [break backward compatibility] split Reg32e class into RegExp(base+index*scale+disp) and Reg32e(means Reg32 or Reg64)
|
||||
* 2013/Jul/04 ver 4.10 [break backward compatibility] change the type of Xbyak::Error from enum to a class
|
||||
* 2013/Jun/21 ver 4.02 add putL(LABEL) function to put the address of the label
|
||||
* 2013/Jun/21 ver 4.01 vpsllw, vpslld, vpsllq, vpsraw, vpsrad, vpsrlw, vpsrld, vpsrlq support (ymm, ymm, xmm). support vpbroadcastb, vpbroadcastw, vpbroadcastd, vpbroadcastq(thanks to Gabest).
|
||||
* 2013/May/30 ver 4.00 support AVX2, VEX-encoded GPR-instructions
|
||||
* 2013/Mar/27 ver 3.80 support mov(reg, "label");
|
||||
* 2013/Mar/13 ver 3.76 add cqo(), jcxz(), jecxz(), jrcxz()
|
||||
* 2013/Jan/15 ver 3.75 add setSize() to modify generated code
|
||||
* 2013/Jan/12 ver 3.74 add CodeGenerator::reset() ; add Allocator::useProtect()
|
||||
* 2013/Jan/06 ver 3.73 use unordered_map if possible
|
||||
* 2012/Dec/04 ver 3.72 eax, ebx, ... are member variables of CodeGenerator(revert), Xbyak::util::eax, ... are static const.
|
||||
* 2012/Nov/17 ver 3.71 and_(), or_(), xor_(), not_() are available if XBYAK_NO_OP_NAMES is not defined.
|
||||
* 2012/Nov/17 change eax, ebx, ptr and so on in CodeGenerator as static member and alias of them are defined in Xbyak::util.
|
||||
* 2012/Nov/09 ver 3.70 XBYAK_NO_OP_NAMES macro is added to use and_() instead of and() (thanks to Mattias)
|
||||
* 2012/Nov/01 ver 3.62 add fwait/fnwait/finit/fninit
|
||||
* 2012/Nov/01 ver 3.61 add fldcw/fstcw
|
||||
* 2012/May/03 ver 3.60 change interface of Allocator
|
||||
* 2012/Mar/23 ver 3.51 fix userPtr mode
|
||||
* 2012/Mar/19 ver 3.50 support AutoGrow mode
|
||||
* 2011/Nov/09 ver 3.05 fix bit property of rip addresing / support movsxd
|
||||
* 2011/Aug/15 ver 3.04 fix dealing with imm8 such as add(dword [ebp-8], 0xda); (thanks to lolcat)
|
||||
* 2011/Jun/16 ver 3.03 fix __GNUC_PREREQ macro for Mac gcc(thanks to t_teruya)
|
||||
* 2011/Apr/28 ver 3.02 do not use xgetbv on Mac gcc
|
||||
* 2011/May/24 ver 3.01 fix typo of OSXSAVE
|
||||
* 2011/May/23 ver 3.00 add vcmpeqps and so on
|
||||
* 2011/Feb/16 ver 2.994 beta add vmovq for 32-bit mode(I forgot it)
|
||||
* 2011/Feb/16 ver 2.993 beta remove cvtReg to avoid thread unsafe
|
||||
* 2011/Feb/10 ver 2.992 beta support one argument syntax for fadd like nasm
|
||||
* 2011/Feb/07 ver 2.991 beta fix pextrw reg, xmm, imm(Thanks to Gabest)
|
||||
* 2011/Feb/04 ver 2.99 beta support AVX
|
||||
* 2010/Dec/08 ver 2.31 fix ptr [rip + 32bit offset], support rdtscp
|
||||
* 2010/Oct/19 ver 2.30 support pclmulqdq, aesdec, aesdeclast, aesenc, aesenclast, aesimc, aeskeygenassist
|
||||
* 2010/Jun/07 ver 2.29 fix call(<label>)
|
||||
* 2010/Jun/17 ver 2.28 move some member functions to public
|
||||
* 2010/Jun/01 ver 2.27 support encoding of mov(reg64, imm) like yasm(not nasm)
|
||||
* 2010/May/24 ver 2.26 fix sub(rsp, 1000)
|
||||
* 2010/Apr/26 ver 2.25 add jc/jnc(I forgot to implement them...)
|
||||
* 2010/Apr/16 ver 2.24 change the prototype of rewrite() method
|
||||
* 2010/Apr/15 ver 2.23 fix align() and xbyak_util.h for Mac
|
||||
* 2010/Feb/16 ver 2.22 fix inLocalLabel()/outLocalLabel()
|
||||
* 2009/Dec/09 ver 2.21 support cygwin(gcc 4.3.2)
|
||||
* 2009/Nov/28 support a part of FPU
|
||||
* 2009/Jun/25 fix mov(qword[rax], imm); (thanks to Martin)
|
||||
* 2009/Mar/10 fix redundant REX.W prefix on jmp/call reg64
|
||||
* 2009/Feb/24 add movq reg64, mmx/xmm; movq mmx/xmm, reg64
|
||||
* 2009/Feb/13 movd(xmm7, dword[eax]) drops 0x66 prefix (thanks to Gabest)
|
||||
* 2008/Dec/30 fix call in short relative address(thanks to kato san)
|
||||
* 2008/Sep/18 support @@, @f, @b and localization of label(thanks to nobu-q san)
|
||||
* 2008/Sep/18 support (ptr[rip + 32bit offset]) (thanks to Dango-Chu san)
|
||||
* 2008/Jun/03 fix align(). mov(ptr[eax],1) throws ERR_MEM_SIZE_IS_NOT_SPECIFIED.
|
||||
* 2008/Jun/02 support memory interface allocated by user
|
||||
* 2008/May/26 fix protect() to avoid invalid setting(thanks to shinichiro_h san)
|
||||
* 2008/Apr/30 add cmpxchg16b, cdqe
|
||||
* 2008/Apr/29 support x64
|
||||
* 2008/Apr/14 code refactoring
|
||||
* 2008/Mar/12 add bsr/bsf
|
||||
* 2008/Feb/14 fix output of sub eax, 1234 (thanks to Robert)
|
||||
* 2007/Nov/5 support lock, xadd, xchg
|
||||
* 2007/Nov/2 support SSSE3/SSE4 (thanks to Dango-Chu san)
|
||||
* 2007/Feb/4 fix the bug that exception doesn't occur under the condition which the offset of jmp mnemonic without T_NEAR is over 127.
|
||||
* 2007/Jan/21 fix the bug to create address like [disp] select smaller representation for mov (eax|ax|al, [disp])
|
||||
* 2007/Jan/4 first version
|
||||
[BSD-3-Clause License](http://opensource.org/licenses/BSD-3-Clause)
|
||||
|
||||
## Author
|
||||
MITSUNARI Shigeo(herumi@nifty.com)
|
||||
|
||||
#### 光成滋生 Mitsunari Shigeo
|
||||
[GitHub](https://github.com/herumi) | [Website (Japanese)](http://herumi.in.coocan.jp/) | [herumi@nifty.com](mailto:herumi@nifty.com)
|
||||
|
||||
## Sponsors welcome
|
||||
[GitHub Sponsor](https://github.com/sponsors/herumi)
|
||||
|
||||
<!----------------------------------------------------------------------------->
|
||||
|
||||
[Badge Build]: https://github.com/herumi/xbyak/actions/workflows/main.yml/badge.svg
|
||||
[Build Status]: https://github.com/herumi/xbyak/actions/workflows/main.yml
|
||||
|
||||
[License]: COPYRIGHT
|
||||
|
||||
[Changelog]: doc/changelog.md
|
||||
[Install]: doc/install.md
|
||||
[Usage]: doc/usage.md
|
||||
|
||||
|
|
37
externals/dynarmic/externals/xbyak/readme.txt
vendored
37
externals/dynarmic/externals/xbyak/readme.txt
vendored
|
@ -1,5 +1,5 @@
|
|||
|
||||
C++用x86(IA-32), x64(AMD64, x86-64) JITアセンブラ Xbyak 5.991
|
||||
C++用x86(IA-32), x64(AMD64, x86-64) JITアセンブラ Xbyak 6.60.1
|
||||
|
||||
-----------------------------------------------------------------------------
|
||||
◎概要
|
||||
|
@ -277,6 +277,24 @@ L(label);
|
|||
assert(label.getAddress(), getCurr());
|
||||
```
|
||||
|
||||
4. farジャンプ
|
||||
|
||||
`jmp(mem, T_FAR)`, `call(mem, T_FAR)`, `retf()`をサポートします。
|
||||
サイズを明示するために`ptr`の代わりに`word|dword|qword`を利用してください。
|
||||
|
||||
32bit
|
||||
```
|
||||
jmp(word[eax], T_FAR); // jmp m16:16(FF /5)
|
||||
jmp(dword[eax], T_FAR); // jmp m16:32(FF /5)
|
||||
```
|
||||
|
||||
64bit
|
||||
```
|
||||
jmp(word[rax], T_FAR); // jmp m16:16(FF /5)
|
||||
jmp(dword[rax], T_FAR); // jmp m16:32(FF /5)
|
||||
jmp(qword[rax], T_FAR); // jmp m16:64(REX.W FF /5)
|
||||
```
|
||||
|
||||
・Xbyak::CodeGenerator()コンストラクタインタフェース
|
||||
|
||||
@param maxSize [in] コード生成最大サイズ(デフォルト4096byte)
|
||||
|
@ -382,6 +400,23 @@ sample/{echo,hello}.bfは http://www.kmonos.net/alang/etc/brainfuck.php から
|
|||
-----------------------------------------------------------------------------
|
||||
◎履歴
|
||||
|
||||
2022/06/15 ver 6.60.1 Visual Studio /O0でXbyak::util::Cpuがリンクエラーになるのに対応
|
||||
2022/06/06 ver 6.60 バージョンのつけ方を数値が戻らないように変更
|
||||
2022/06/01 ver 6.06 Cpu::TypeクラスのリファクタリングとXBYAK_USE_MEMFDが定義されたときのMmapAllocatorの改善
|
||||
2022/05/20 ver 6.052 Cpu::operator==()を正しく定義
|
||||
2022/05/13 ver 6.051 XYBAK_NO_EXCEPTIONを定義したときのCpuクラスのコンパイルエラー修正
|
||||
2022/05/12 ver 6.05 movdiri, movdir64b, clwb, cldemoteを追加
|
||||
2022/04/05 ver 6.04 tpause, umonitor, umwaitを追加
|
||||
2022/03/08 ver 6.03 MmapAllocatorがmemfd用のユーザ定義文字列をサポート
|
||||
2022/01/28 ver 6.02 dispacementの32bit範囲チェックの厳密化
|
||||
2021/12/14 ver 6.01 T_FAR jump/callとretfをサポート
|
||||
2021/09/14 ver 6.00 AVX512-FP16を完全サポート
|
||||
2021/09/09 ver 5.997 vrndscale*を{sae}をサポートするよう修正
|
||||
2021/09/03 ver 5.996 v{add,sub,mul,div,max,min}{sd,ss}をT_rd_saeなどをサポートするよう修正
|
||||
2021/08/15 ver 5.995 Linux上でXBYAK_USE_MEMFDが定義されたなら/proc/self/mapsにラベル追加
|
||||
2021/06/17 ver 5.994 マスクレジスタ用のvcmpXX{ps,pd,ss,sd}のalias追加
|
||||
2021/06/06 ver 5.993 gather/scatterのレジスタの組み合わせの厳密なチェック
|
||||
2021/05/09 ver 5.992 endbr32とendbr64のサポート
|
||||
2020/11/16 ver 5.991 g++-5のC++14でconstexpr機能の抑制
|
||||
2020/10/19 ver 5.99 VNNI命令サポート(Thanks to akharito)
|
||||
2020/10/17 ver 5.98 [scale * reg]のサポート
|
||||
|
|
|
@ -37,6 +37,7 @@ endif
|
|||
|
||||
ifneq ($(OS),mac)
|
||||
TARGET += static_buf64
|
||||
TARGET += memfd
|
||||
endif
|
||||
|
||||
|
||||
|
@ -51,7 +52,7 @@ all: $(TARGET)
|
|||
|
||||
CFLAGS_WARN=-Wall -Wextra -Wformat=2 -Wcast-qual -Wcast-align -Wwrite-strings -Wfloat-equal -Wpointer-arith #-pedantic
|
||||
|
||||
CFLAGS=-g -O2 -fomit-frame-pointer -Wall -I../ $(CFLAGS_WARN)
|
||||
CFLAGS=-g -O2 -fomit-frame-pointer -Wall -I../ $(CFLAGS_WARN) $(CXXFLAGS) $(CPPFLAGS) $(LDFLAGS)
|
||||
|
||||
test:
|
||||
$(CXX) $(CFLAGS) test0.cpp -o $@ -m32
|
||||
|
@ -95,6 +96,8 @@ jmp_table:
|
|||
$(CXX) $(CFLAGS) jmp_table.cpp -o $@ -m32
|
||||
jmp_table64:
|
||||
$(CXX) $(CFLAGS) jmp_table.cpp -o $@ -m64
|
||||
memfd:
|
||||
$(CXX) $(CFLAGS) memfd.cpp -o $@ -m64
|
||||
profiler: profiler.cpp ../xbyak/xbyak_util.h
|
||||
$(CXX) $(CFLAGS) profiler.cpp -o $@
|
||||
profiler-vtune: profiler.cpp ../xbyak/xbyak_util.h
|
||||
|
@ -121,3 +124,4 @@ test_util : test_util.cpp $(XBYAK_INC) ../xbyak/xbyak_util.h
|
|||
test_util2 : test_util.cpp $(XBYAK_INC) ../xbyak/xbyak_util.h
|
||||
jmp_table: jmp_table.cpp $(XBYAK_INC)
|
||||
jmp_table64: jmp_table.cpp $(XBYAK_INC)
|
||||
memfd: memfd.cpp $(XBYAK_INC)
|
||||
|
|
39
externals/dynarmic/externals/xbyak/sample/memfd.cpp
vendored
Executable file
39
externals/dynarmic/externals/xbyak/sample/memfd.cpp
vendored
Executable file
|
@ -0,0 +1,39 @@
|
|||
/*
|
||||
a sample to use MmapAllocator with an user-defined name
|
||||
cat /proc/`psidof ./memfd`/maps
|
||||
|
||||
7fca70b44000-7fca70b4a000 rw-p 00000000 00:00 0
|
||||
7fca70b67000-7fca70b68000 rwxs 00000000 00:05 19960170 /memfd:xyz (deleted)
|
||||
7fca70b68000-7fca70b69000 rwxs 00000000 00:05 19960169 /memfd:abc (deleted)
|
||||
7fca70b69000-7fca70b6a000 r--p 00029000 103:03 19136541 /lib/x86_64-linux-gnu/ld-2.27.so
|
||||
7fca70b6a000-7fca70b6b000 rw-p 0002a000 103:03 19136541 /lib/x86_64-linux-gnu/ld-2.27.so
|
||||
*/
|
||||
#define XBYAK_USE_MEMFD
|
||||
#include <xbyak/xbyak.h>
|
||||
#include <fstream>
|
||||
|
||||
class Code : Xbyak::MmapAllocator, public Xbyak::CodeGenerator {
|
||||
public:
|
||||
Code(const char *name, int v)
|
||||
: Xbyak::MmapAllocator(name)
|
||||
, Xbyak::CodeGenerator(4096, nullptr, this /* specify external MmapAllocator */)
|
||||
{
|
||||
mov(eax, v);
|
||||
ret();
|
||||
}
|
||||
};
|
||||
|
||||
int main()
|
||||
{
|
||||
Code c1("Xbyak::abc", 123);
|
||||
Code c2("Xbyak::xyz", 456);
|
||||
printf("c1 %d\n", c1.getCode<int (*)()>()());
|
||||
printf("c2 %d\n", c2.getCode<int (*)()>()());
|
||||
std::ifstream ifs("/proc/self/maps", std::ios::binary);
|
||||
if (ifs) {
|
||||
std::string line;
|
||||
while (std::getline(ifs, line)) {
|
||||
printf("%s\n", line.c_str());
|
||||
}
|
||||
}
|
||||
}
|
|
@ -7,14 +7,13 @@ struct PopCountTest : public Xbyak::CodeGenerator {
|
|||
PopCountTest(int n)
|
||||
: Xbyak::CodeGenerator(4096, Xbyak::DontSetProtectRWE)
|
||||
{
|
||||
ret();
|
||||
mov(eax, n);
|
||||
popcnt(eax, eax);
|
||||
ret();
|
||||
}
|
||||
};
|
||||
|
||||
void putCPUinfo()
|
||||
void putCPUinfo(bool onlyCpuidFeature)
|
||||
{
|
||||
using namespace Xbyak::util;
|
||||
Cpu cpu;
|
||||
|
@ -35,8 +34,6 @@ void putCPUinfo()
|
|||
{ Cpu::tPOPCNT, "popcnt" },
|
||||
{ Cpu::t3DN, "3dn" },
|
||||
{ Cpu::tE3DN, "e3dn" },
|
||||
{ Cpu::tSSE4a, "sse4a" },
|
||||
{ Cpu::tSSE5, "sse5" },
|
||||
{ Cpu::tAESNI, "aesni" },
|
||||
{ Cpu::tRDTSCP, "rdtscp" },
|
||||
{ Cpu::tOSXSAVE, "osxsave(xgetvb)" },
|
||||
|
@ -85,11 +82,19 @@ void putCPUinfo()
|
|||
{ Cpu::tAMX_INT8, "amx(int8)" },
|
||||
{ Cpu::tAMX_BF16, "amx(bf16)" },
|
||||
{ Cpu::tAVX_VNNI, "avx_vnni" },
|
||||
{ Cpu::tAVX512_FP16, "avx512_fp16" },
|
||||
{ Cpu::tWAITPKG, "waitpkg" },
|
||||
{ Cpu::tCLFLUSHOPT, "clflushopt" },
|
||||
{ Cpu::tCLDEMOTE, "cldemote" },
|
||||
{ Cpu::tMOVDIRI, "movdiri" },
|
||||
{ Cpu::tMOVDIR64B, "movdir64b" },
|
||||
{ Cpu::tCLZERO, "clzero" },
|
||||
};
|
||||
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
|
||||
if (cpu.has(tbl[i].type)) printf(" %s", tbl[i].str);
|
||||
}
|
||||
printf("\n");
|
||||
if (onlyCpuidFeature) return;
|
||||
if (cpu.has(Cpu::tPOPCNT)) {
|
||||
const int n = 0x12345678; // bitcount = 13
|
||||
const int ok = 13;
|
||||
|
@ -123,12 +128,15 @@ void putCPUinfo()
|
|||
printf("CoreLevel=%u\n", cpu.getNumCores(Xbyak::util::CoreLevel));
|
||||
}
|
||||
|
||||
int main()
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
bool onlyCpuidFeature = argc == 2 && strcmp(argv[1], "-cpuid") == 0;
|
||||
if (!onlyCpuidFeature) {
|
||||
#ifdef XBYAK32
|
||||
puts("32bit");
|
||||
puts("32bit");
|
||||
#else
|
||||
puts("64bit");
|
||||
puts("64bit");
|
||||
#endif
|
||||
putCPUinfo();
|
||||
}
|
||||
putCPUinfo(onlyCpuidFeature);
|
||||
}
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
TARGET = make_nm normalize_prefix bad_address misc cvt_test cvt_test32 noexception
|
||||
TARGET = make_nm normalize_prefix bad_address misc cvt_test cvt_test32 noexception misc32
|
||||
XBYAK_INC=../xbyak/xbyak.h
|
||||
UNAME_S=$(shell uname -s)
|
||||
BIT=32
|
||||
|
@ -22,7 +22,7 @@ all: $(TARGET)
|
|||
|
||||
CFLAGS_WARN=-Wall -Wextra -Wformat=2 -Wcast-qual -Wcast-align -Wwrite-strings -Wfloat-equal -Wpointer-arith
|
||||
|
||||
CFLAGS=-O2 -fomit-frame-pointer -Wall -fno-operator-names -I../ -I./ $(CFLAGS_WARN) #-std=c++0x
|
||||
CFLAGS=-O2 -fomit-frame-pointer -Wall -fno-operator-names -I../ -I./ $(CFLAGS_WARN) $(CXXFLAGS) $(CPPFLAGS) $(LDFLAGS) #-std=c++0x
|
||||
make_nm:
|
||||
$(CXX) $(CFLAGS) make_nm.cpp -o $@
|
||||
normalize_prefix: normalize_prefix.cpp ../xbyak/xbyak.h
|
||||
|
@ -41,6 +41,8 @@ bad_address: bad_address.cpp ../xbyak/xbyak.h
|
|||
$(CXX) $(CFLAGS) bad_address.cpp -o $@
|
||||
misc: misc.cpp ../xbyak/xbyak.h
|
||||
$(CXX) $(CFLAGS) misc.cpp -o $@
|
||||
misc32: misc.cpp ../xbyak/xbyak.h
|
||||
$(CXX) $(CFLAGS) misc.cpp -o $@ -DXBYAK32
|
||||
cvt_test: cvt_test.cpp ../xbyak/xbyak.h
|
||||
$(CXX) $(CFLAGS) $< -o $@
|
||||
cvt_test32: cvt_test.cpp ../xbyak/xbyak.h
|
||||
|
@ -62,6 +64,7 @@ ifneq ($(ONLY_64BIT),1)
|
|||
endif
|
||||
./bad_address
|
||||
./misc
|
||||
./misc32
|
||||
./cvt_test
|
||||
ifeq ($(BIT),64)
|
||||
./test_address.sh 64
|
||||
|
@ -95,7 +98,7 @@ test:
|
|||
$(MAKE) test_avx512
|
||||
|
||||
clean:
|
||||
rm -rf *.o $(TARGET) lib_run nm.cpp nm_frame make_512
|
||||
$(RM) a.asm *.lst *.obj *.o $(TARGET) lib_run nm.cpp nm_frame make_512
|
||||
|
||||
lib_run: lib_test.cpp lib_run.cpp lib.h
|
||||
$(CXX) $(CFLAGS) lib_run.cpp lib_test.cpp -o lib_run
|
||||
|
|
|
@ -31,7 +31,7 @@ void genVsib(bool isJIT)
|
|||
"xmm3 * 8 + edi + 123",
|
||||
"xmm2 * 2 + 5",
|
||||
"eax + xmm0",
|
||||
"esp + xmm4",
|
||||
"esp + xmm2",
|
||||
};
|
||||
const char *vm32yTbl[] = {
|
||||
"ymm0",
|
||||
|
@ -42,7 +42,7 @@ void genVsib(bool isJIT)
|
|||
"ymm3 * 8 + edi + 123",
|
||||
"ymm2 * 2 + 5",
|
||||
"eax + ymm0",
|
||||
"esp + ymm4",
|
||||
"esp + ymm2",
|
||||
};
|
||||
genVsibSub(isJIT, "vgatherdpd", vm32xTbl, NUM_OF_ARRAY(vm32xTbl));
|
||||
genVsibSub(isJIT, "vgatherqpd", vm32yTbl, NUM_OF_ARRAY(vm32yTbl));
|
||||
|
@ -93,7 +93,7 @@ void genAddress(bool isJIT, const char regTbl[][5], size_t regTblNum)
|
|||
}
|
||||
if (isFirst) {
|
||||
if (isJIT) printf("(void*)");
|
||||
printf("0x%08X", disp);
|
||||
printf("%d", disp);
|
||||
} else {
|
||||
if (disp >= 0) {
|
||||
putchar('+');
|
||||
|
|
15
externals/dynarmic/externals/xbyak/test/jmp.cpp
vendored
15
externals/dynarmic/externals/xbyak/test/jmp.cpp
vendored
|
@ -1383,3 +1383,18 @@ CYBOZU_TEST_AUTO(setDefaultJmpNEAR)
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
CYBOZU_TEST_AUTO(ambiguousFarJmp)
|
||||
{
|
||||
struct Code : Xbyak::CodeGenerator {
|
||||
#ifdef XBYAK32
|
||||
void genJmp() { jmp(ptr[eax], T_FAR); }
|
||||
void genCall() { call(ptr[eax], T_FAR); }
|
||||
#else
|
||||
void genJmp() { jmp(ptr[rax], T_FAR); }
|
||||
void genCall() { call(ptr[rax], T_FAR); }
|
||||
#endif
|
||||
} code;
|
||||
CYBOZU_TEST_EXCEPTION(code.genJmp(), std::exception);
|
||||
CYBOZU_TEST_EXCEPTION(code.genCall(), std::exception);
|
||||
}
|
||||
|
|
|
@ -1366,6 +1366,8 @@ public:
|
|||
put(p, _ZMM, _ZMM, mem | _MEM);
|
||||
}
|
||||
}
|
||||
put("vaddss", XMM, _XMM, XMM_ER);
|
||||
put("vaddsd", XMM, _XMM, XMM_ER);
|
||||
#endif
|
||||
}
|
||||
void putAVX1()
|
||||
|
@ -1949,14 +1951,16 @@ public:
|
|||
put("vrndscalepd", XMM_KZ, _XMM | _MEM | M_1to2, IMM8);
|
||||
put("vrndscalepd", YMM_KZ, _YMM | _MEM | M_1to4, IMM8);
|
||||
put("vrndscalepd", ZMM_KZ, _ZMM | _MEM | M_1to8, IMM8);
|
||||
put("vrndscalepd", ZMM_KZ, _ZMM | ZMM_SAE, IMM8);
|
||||
|
||||
put("vrndscaleps", XMM_KZ, _XMM | _MEM | M_1to4, IMM8);
|
||||
put("vrndscaleps", YMM_KZ, _YMM | _MEM | M_1to8, IMM8);
|
||||
put("vrndscaleps", ZMM_KZ, _ZMM | _MEM | M_1to16, IMM8);
|
||||
put("vrndscaleps", ZMM_KZ, _ZMM | ZMM_SAE, IMM8);
|
||||
|
||||
put("vrndscalesd", XMM_KZ, _XMM, _XMM | _MEM, IMM8);
|
||||
put("vrndscalesd", XMM_KZ, _XMM, _XMM | _MEM | XMM_SAE, IMM8);
|
||||
|
||||
put("vrndscaless", XMM_KZ, _XMM, _XMM | _MEM, IMM8);
|
||||
put("vrndscaless", XMM_KZ, _XMM, _XMM | _MEM | XMM_SAE, IMM8);
|
||||
|
||||
put("vscalefpd", XMM_KZ, _XMM, _XMM | _MEM | M_1to2);
|
||||
put("vscalefpd", YMM_KZ, _YMM, _YMM | _MEM | M_1to4);
|
||||
|
|
|
@ -179,6 +179,19 @@ class Test {
|
|||
printf("\n");
|
||||
}
|
||||
}
|
||||
void put(const char *nm, const char *para1, uint64_t op2, const char *para3) const
|
||||
{
|
||||
for (int j = 0; j < bitEnd; j++) {
|
||||
if ((op2 & (1ULL << j)) == 0) continue;
|
||||
printf("%s ", nm);
|
||||
if (isXbyak_) printf("(");
|
||||
printf("%s", para1);
|
||||
if (!(op2 & NOPARA)) printf(", %s", get(1ULL << j));
|
||||
printf(", %s", para3);
|
||||
if (isXbyak_) printf("); dump();");
|
||||
printf("\n");
|
||||
}
|
||||
}
|
||||
const char *get(uint64_t type) const
|
||||
{
|
||||
int idx = (rand() / 31) & 7;
|
||||
|
@ -499,6 +512,7 @@ class Test {
|
|||
"cmpsb",
|
||||
"cmpsw",
|
||||
"cmpsd",
|
||||
"hlt",
|
||||
"int3",
|
||||
"leave",
|
||||
"lodsb",
|
||||
|
@ -623,6 +637,7 @@ class Test {
|
|||
"fstsw",
|
||||
"fnstsw",
|
||||
"fxrstor",
|
||||
"clwb",
|
||||
};
|
||||
for (size_t i = 0; i < NUM_OF_ARRAY(memTbl); i++) {
|
||||
put(memTbl[i], MEM);
|
||||
|
@ -685,6 +700,24 @@ class Test {
|
|||
puts("pshufb xmm14, [rel label0]");
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
}
|
||||
void putFarJmp() const
|
||||
{
|
||||
#ifdef XBYAK64
|
||||
put("jmp", "word[rax],T_FAR", "far word [rax]");
|
||||
put("jmp", "dword[rax],T_FAR", "far dword [rax]");
|
||||
put("jmp", "qword[rax],T_FAR", "far qword [rax]");
|
||||
|
||||
put("call", "word[rax],T_FAR", "far word [rax]");
|
||||
put("call", "dword[rax],T_FAR", "far dword [rax]");
|
||||
put("call", "qword[rax],T_FAR", "far qword [rax]");
|
||||
#else
|
||||
put("jmp", "dword[eax],T_FAR", "far dword [eax]");
|
||||
put("jmp", "word[eax],T_FAR", "far word [eax]");
|
||||
|
||||
put("call", "dword[eax],T_FAR", "far dword [eax]");
|
||||
put("call", "word[eax],T_FAR", "far word [eax]");
|
||||
#endif
|
||||
}
|
||||
void putMMX1() const
|
||||
|
@ -1237,6 +1270,10 @@ class Test {
|
|||
put("mov", REG64, "0x12345678", "0x12345678");
|
||||
put("mov", REG64, "0xffffffff12345678LL", "0xffffffff12345678");
|
||||
put("mov", REG32e|REG16|REG8|RAX|EAX|AX|AL, IMM);
|
||||
|
||||
put("mov", EAX, "ptr[(void*)-1]", "[-1]");
|
||||
put("mov", EAX, "ptr[(void*)0x7fffffff]", "[0x7fffffff]");
|
||||
put("mov", EAX, "ptr[(void*)0xffffffffffffffff]", "[0xffffffffffffffff]");
|
||||
}
|
||||
void putEtc() const
|
||||
{
|
||||
|
@ -1244,6 +1281,9 @@ class Test {
|
|||
const char *p = "ret";
|
||||
put(p);
|
||||
put(p, IMM);
|
||||
p = "retf";
|
||||
put(p);
|
||||
put(p, IMM);
|
||||
p = "mov";
|
||||
put(p, EAX|REG32|MEM|MEM_ONLY_DISP, REG32|EAX);
|
||||
put(p, REG64|MEM|MEM_ONLY_DISP, REG64|RAX);
|
||||
|
@ -1480,6 +1520,7 @@ class Test {
|
|||
put("pextrq", REG64|MEM, XMM, IMM);
|
||||
put("pinsrq", XMM, REG64|MEM, IMM);
|
||||
#endif
|
||||
|
||||
}
|
||||
void putSHA() const
|
||||
{
|
||||
|
@ -2361,16 +2402,16 @@ public:
|
|||
for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) {
|
||||
const Tbl& p = tbl[i];
|
||||
const char *name = p.name;
|
||||
put(name, XMM, VM32X, XMM);
|
||||
put(name, "xmm3", VM32X, "xmm5");
|
||||
switch (p.mode) {
|
||||
case y_vx_y:
|
||||
put(name, YMM, VM32X, YMM);
|
||||
put(name, "ymm3", VM32X, "ymm5");
|
||||
break;
|
||||
case y_vy_y:
|
||||
put(name, YMM, VM32Y, YMM);
|
||||
put(name, "ymm3", VM32Y, "ymm5");
|
||||
break;
|
||||
case x_vy_x:
|
||||
put(name, XMM, VM32Y, XMM);
|
||||
put(name, "xmm3", VM32Y, "xmm5");
|
||||
break;
|
||||
default:
|
||||
printf("ERR mode=%d\n", p.mode);
|
||||
|
@ -2516,6 +2557,7 @@ public:
|
|||
#else // USE_AVX
|
||||
|
||||
putJmp();
|
||||
putFarJmp();
|
||||
|
||||
#ifdef USE_YASM
|
||||
|
||||
|
|
1133
externals/dynarmic/externals/xbyak/test/misc.cpp
vendored
1133
externals/dynarmic/externals/xbyak/test/misc.cpp
vendored
File diff suppressed because it is too large
Load diff
|
@ -218,7 +218,7 @@ void check(int x, int y)
|
|||
}
|
||||
}
|
||||
|
||||
void verify(const Xbyak::uint8_t *f, int pNum)
|
||||
void verify(const uint8_t *f, int pNum)
|
||||
{
|
||||
switch (pNum) {
|
||||
case 0:
|
||||
|
@ -264,7 +264,7 @@ void testAll()
|
|||
}
|
||||
for (int tNum = 0; tNum < maxNum; tNum++) {
|
||||
// printf("pNum=%d, tNum=%d, stackSize=%d\n", pNum, tNum | opt, stackSize);
|
||||
const Xbyak::uint8_t *f = code.getCurr();
|
||||
const uint8_t *f = code.getCurr();
|
||||
code.gen(pNum, tNum | opt, stackSize);
|
||||
verify(f, pNum);
|
||||
/*
|
||||
|
|
163
externals/dynarmic/externals/xbyak/xbyak/xbyak.h
vendored
163
externals/dynarmic/externals/xbyak/xbyak/xbyak.h
vendored
|
@ -95,6 +95,12 @@
|
|||
#include <stdint.h>
|
||||
#endif
|
||||
|
||||
// MFD_CLOEXEC defined only linux 3.17 or later.
|
||||
// Android wraps the memfd_create syscall from API version 30.
|
||||
#if !defined(MFD_CLOEXEC) || (defined(__ANDROID__) && __ANDROID_API__ < 30)
|
||||
#undef XBYAK_USE_MEMFD
|
||||
#endif
|
||||
|
||||
#if defined(_WIN64) || defined(__MINGW64__) || (defined(__CYGWIN__) && defined(__x86_64__))
|
||||
#define XBYAK64_WIN
|
||||
#elif defined(__x86_64__)
|
||||
|
@ -138,7 +144,7 @@ namespace Xbyak {
|
|||
|
||||
enum {
|
||||
DEFAULT_MAX_CODE_SIZE = 4096,
|
||||
VERSION = 0x5991 /* 0xABCD = A.BC(D) */
|
||||
VERSION = 0x6601 /* 0xABCD = A.BC(.D) */
|
||||
};
|
||||
|
||||
#ifndef MIE_INTEGER_TYPE_DEFINED
|
||||
|
@ -209,6 +215,7 @@ enum {
|
|||
ERR_INVALID_MIB_ADDRESS,
|
||||
ERR_X2APIC_IS_NOT_SUPPORTED,
|
||||
ERR_NOT_SUPPORTED,
|
||||
ERR_SAME_REGS_ARE_INVALID,
|
||||
ERR_INTERNAL // Put it at last.
|
||||
};
|
||||
|
||||
|
@ -261,6 +268,7 @@ inline const char *ConvertErrorToString(int err)
|
|||
"invalid mib address",
|
||||
"x2APIC is not supported",
|
||||
"not supported",
|
||||
"same regs are invalid",
|
||||
"internal error"
|
||||
};
|
||||
assert(ERR_INTERNAL + 1 == sizeof(errTbl) / sizeof(*errTbl));
|
||||
|
@ -285,10 +293,10 @@ inline void SetError(int err) {
|
|||
inline void ClearError() {
|
||||
local::GetErrorRef() = 0;
|
||||
}
|
||||
inline int GetError() { return local::GetErrorRef(); }
|
||||
inline int GetError() { return Xbyak::local::GetErrorRef(); }
|
||||
|
||||
#define XBYAK_THROW(err) { local::SetError(err); return; }
|
||||
#define XBYAK_THROW_RET(err, r) { local::SetError(err); return r; }
|
||||
#define XBYAK_THROW(err) { Xbyak::local::SetError(err); return; }
|
||||
#define XBYAK_THROW_RET(err, r) { Xbyak::local::SetError(err); return r; }
|
||||
|
||||
#else
|
||||
class Error : public std::exception {
|
||||
|
@ -377,6 +385,7 @@ enum LabelMode {
|
|||
custom allocator
|
||||
*/
|
||||
struct Allocator {
|
||||
explicit Allocator(const std::string& = "") {} // same interface with MmapAllocator
|
||||
virtual uint8_t *alloc(size_t size) { return reinterpret_cast<uint8_t*>(AlignedMalloc(size, inner::ALIGN_PAGE_SIZE)); }
|
||||
virtual void free(uint8_t *p) { AlignedFree(p); }
|
||||
virtual ~Allocator() {}
|
||||
|
@ -408,10 +417,21 @@ inline int getMacOsVersion()
|
|||
|
||||
} // util
|
||||
#endif
|
||||
class MmapAllocator : Allocator {
|
||||
typedef XBYAK_STD_UNORDERED_MAP<uintptr_t, size_t> SizeList;
|
||||
SizeList sizeList_;
|
||||
class MmapAllocator : public Allocator {
|
||||
struct Allocation {
|
||||
size_t size;
|
||||
#if defined(XBYAK_USE_MEMFD)
|
||||
// fd_ is only used with XBYAK_USE_MEMFD. We keep the file open
|
||||
// during the lifetime of each allocation in order to support
|
||||
// checkpoint/restore by unprivileged users.
|
||||
int fd;
|
||||
#endif
|
||||
};
|
||||
const std::string name_; // only used with XBYAK_USE_MEMFD
|
||||
typedef XBYAK_STD_UNORDERED_MAP<uintptr_t, Allocation> AllocationList;
|
||||
AllocationList allocList_;
|
||||
public:
|
||||
explicit MmapAllocator(const std::string& name = "xbyak") : name_(name) {}
|
||||
uint8_t *alloc(size_t size)
|
||||
{
|
||||
const size_t alignedSizeM1 = inner::ALIGN_PAGE_SIZE - 1;
|
||||
|
@ -427,21 +447,44 @@ public:
|
|||
const int mojaveVersion = 18;
|
||||
if (util::getMacOsVersion() >= mojaveVersion) mode |= MAP_JIT;
|
||||
#endif
|
||||
void *p = mmap(NULL, size, PROT_READ | PROT_WRITE, mode, -1, 0);
|
||||
if (p == MAP_FAILED) XBYAK_THROW_RET(ERR_CANT_ALLOC, 0)
|
||||
int fd = -1;
|
||||
#if defined(XBYAK_USE_MEMFD)
|
||||
fd = memfd_create(name_.c_str(), MFD_CLOEXEC);
|
||||
if (fd != -1) {
|
||||
mode = MAP_SHARED;
|
||||
if (ftruncate(fd, size) != 0) {
|
||||
close(fd);
|
||||
XBYAK_THROW_RET(ERR_CANT_ALLOC, 0)
|
||||
}
|
||||
}
|
||||
#endif
|
||||
void *p = mmap(NULL, size, PROT_READ | PROT_WRITE, mode, fd, 0);
|
||||
if (p == MAP_FAILED) {
|
||||
if (fd != -1) close(fd);
|
||||
XBYAK_THROW_RET(ERR_CANT_ALLOC, 0)
|
||||
}
|
||||
assert(p);
|
||||
sizeList_[(uintptr_t)p] = size;
|
||||
Allocation &alloc = allocList_[(uintptr_t)p];
|
||||
alloc.size = size;
|
||||
#if defined(XBYAK_USE_MEMFD)
|
||||
alloc.fd = fd;
|
||||
#endif
|
||||
return (uint8_t*)p;
|
||||
}
|
||||
void free(uint8_t *p)
|
||||
{
|
||||
if (p == 0) return;
|
||||
SizeList::iterator i = sizeList_.find((uintptr_t)p);
|
||||
if (i == sizeList_.end()) XBYAK_THROW(ERR_BAD_PARAMETER)
|
||||
if (munmap((void*)i->first, i->second) < 0) XBYAK_THROW(ERR_MUNMAP)
|
||||
sizeList_.erase(i);
|
||||
AllocationList::iterator i = allocList_.find((uintptr_t)p);
|
||||
if (i == allocList_.end()) XBYAK_THROW(ERR_BAD_PARAMETER)
|
||||
if (munmap((void*)i->first, i->second.size) < 0) XBYAK_THROW(ERR_MUNMAP)
|
||||
#if defined(XBYAK_USE_MEMFD)
|
||||
if (i->second.fd != -1) close(i->second.fd);
|
||||
#endif
|
||||
allocList_.erase(i);
|
||||
}
|
||||
};
|
||||
#else
|
||||
typedef Allocator MmapAllocator;
|
||||
#endif
|
||||
|
||||
class Address;
|
||||
|
@ -1557,6 +1600,7 @@ public:
|
|||
enum LabelType {
|
||||
T_SHORT,
|
||||
T_NEAR,
|
||||
T_FAR, // far jump
|
||||
T_AUTO // T_SHORT if possible
|
||||
};
|
||||
private:
|
||||
|
@ -1605,6 +1649,11 @@ private:
|
|||
{
|
||||
return op1.isREG(i32e) && ((op2.isREG(i32e) && op1.getBit() == op2.getBit()) || op2.isMEM());
|
||||
}
|
||||
static inline bool isValidSSE(const Operand& op1)
|
||||
{
|
||||
// SSE instructions do not support XMM16 - XMM31
|
||||
return !(op1.isXMM() && op1.getIdx() >= 16);
|
||||
}
|
||||
void rex(const Operand& op1, const Operand& op2 = Operand())
|
||||
{
|
||||
uint8_t rex = 0;
|
||||
|
@ -1635,9 +1684,10 @@ private:
|
|||
//
|
||||
T_N_VL = 1 << 3, // N * (1, 2, 4) for VL
|
||||
T_DUP = 1 << 4, // N = (8, 32, 64)
|
||||
T_66 = 1 << 5,
|
||||
T_F3 = 1 << 6,
|
||||
T_F2 = 1 << 7,
|
||||
T_66 = 1 << 5, // pp = 1
|
||||
T_F3 = 1 << 6, // pp = 2
|
||||
T_F2 = T_66 | T_F3, // pp = 3
|
||||
T_ER_R = 1 << 7, // reg{er}
|
||||
T_0F = 1 << 8,
|
||||
T_0F38 = 1 << 9,
|
||||
T_0F3A = 1 << 10,
|
||||
|
@ -1658,11 +1708,17 @@ private:
|
|||
T_MUST_EVEX = 1 << 25, // contains T_EVEX
|
||||
T_B32 = 1 << 26, // m32bcst
|
||||
T_B64 = 1 << 27, // m64bcst
|
||||
T_B16 = T_B32 | T_B64, // m16bcst (Be careful)
|
||||
T_M_K = 1 << 28, // mem{k}
|
||||
T_VSIB = 1 << 29,
|
||||
T_MEM_EVEX = 1 << 30, // use evex if mem
|
||||
T_FP16 = 1 << 31, // avx512-fp16
|
||||
T_MAP5 = T_FP16 | T_0F,
|
||||
T_MAP6 = T_FP16 | T_0F38,
|
||||
T_XXX
|
||||
};
|
||||
// T_66 = 1, T_F3 = 2, T_F2 = 3
|
||||
uint32_t getPP(int type) const { return (type >> 5) & 3; }
|
||||
void vex(const Reg& reg, const Reg& base, const Operand *v, int type, int code, bool x = false)
|
||||
{
|
||||
int w = (type & T_W1) ? 1 : 0;
|
||||
|
@ -1671,7 +1727,7 @@ private:
|
|||
bool b = base.isExtIdx();
|
||||
int idx = v ? v->getIdx() : 0;
|
||||
if ((idx | reg.getIdx() | base.getIdx()) >= 16) XBYAK_THROW(ERR_BAD_COMBINATION)
|
||||
uint32_t pp = (type & T_66) ? 1 : (type & T_F3) ? 2 : (type & T_F2) ? 3 : 0;
|
||||
uint32_t pp = getPP(type);
|
||||
uint32_t vvvv = (((~idx) & 15) << 3) | (is256 ? 4 : 0) | pp;
|
||||
if (!b && !x && !w && (type & T_0F)) {
|
||||
db(0xC5); db((r ? 0 : 0x80) | vvvv);
|
||||
|
@ -1688,6 +1744,7 @@ private:
|
|||
}
|
||||
void verifyER(const Reg& r, int type) const
|
||||
{
|
||||
if ((type & T_ER_R) && r.isREG(32|64)) return;
|
||||
if (((type & T_ER_X) && r.isXMM()) || ((type & T_ER_Y) && r.isYMM()) || ((type & T_ER_Z) && r.isZMM())) return;
|
||||
XBYAK_THROW(ERR_ER_IS_INVALID)
|
||||
}
|
||||
|
@ -1702,9 +1759,9 @@ private:
|
|||
{
|
||||
if (!(type & (T_EVEX | T_MUST_EVEX))) XBYAK_THROW_RET(ERR_EVEX_IS_INVALID, 0)
|
||||
int w = (type & T_EW1) ? 1 : 0;
|
||||
uint32_t mm = (type & T_0F) ? 1 : (type & T_0F38) ? 2 : (type & T_0F3A) ? 3 : 0;
|
||||
uint32_t pp = (type & T_66) ? 1 : (type & T_F3) ? 2 : (type & T_F2) ? 3 : 0;
|
||||
|
||||
uint32_t mmm = (type & T_0F) ? 1 : (type & T_0F38) ? 2 : (type & T_0F3A) ? 3 : 0;
|
||||
if (type & T_FP16) mmm |= 4;
|
||||
uint32_t pp = getPP(type);
|
||||
int idx = v ? v->getIdx() : 0;
|
||||
uint32_t vvvv = ~idx;
|
||||
|
||||
|
@ -1727,7 +1784,7 @@ private:
|
|||
VL = (std::max)((std::max)(reg.getBit(), base.getBit()), VL);
|
||||
LL = (VL == 512) ? 2 : (VL == 256) ? 1 : 0;
|
||||
if (b) {
|
||||
disp8N = (type & T_B32) ? 4 : 8;
|
||||
disp8N = ((type & T_B16) == T_B16) ? 2 : (type & T_B32) ? 4 : 8;
|
||||
} else if (type & T_DUP) {
|
||||
disp8N = VL == 128 ? 8 : VL == 256 ? 32 : 64;
|
||||
} else {
|
||||
|
@ -1746,7 +1803,7 @@ private:
|
|||
if (aaa == 0) aaa = verifyDuplicate(base.getOpmaskIdx(), reg.getOpmaskIdx(), (v ? v->getOpmaskIdx() : 0), ERR_OPMASK_IS_ALREADY_SET);
|
||||
if (aaa == 0) z = 0; // clear T_z if mask is not set
|
||||
db(0x62);
|
||||
db((R ? 0x80 : 0) | (X ? 0x40 : 0) | (B ? 0x20 : 0) | (Rp ? 0x10 : 0) | (mm & 3));
|
||||
db((R ? 0x80 : 0) | (X ? 0x40 : 0) | (B ? 0x20 : 0) | (Rp ? 0x10 : 0) | mmm);
|
||||
db((w == 1 ? 0x80 : 0) | ((vvvv & 15) << 3) | 4 | (pp & 3));
|
||||
db((z ? 0x80 : 0) | ((LL & 3) << 5) | (b ? 0x10 : 0) | (Vp ? 8 : 0) | (aaa & 7));
|
||||
db(code);
|
||||
|
@ -1760,8 +1817,15 @@ private:
|
|||
{
|
||||
uint64_t disp64 = e.getDisp();
|
||||
#ifdef XBYAK64
|
||||
#ifdef XBYAK_OLD_DISP_CHECK
|
||||
// treat 0xffffffff as 0xffffffffffffffff
|
||||
uint64_t high = disp64 >> 32;
|
||||
if (high != 0 && high != 0xFFFFFFFF) XBYAK_THROW(ERR_OFFSET_IS_TOO_BIG)
|
||||
#else
|
||||
// displacement should be a signed 32-bit value, so also check sign bit
|
||||
uint64_t high = disp64 >> 31;
|
||||
if (high != 0 && high != 0x1FFFFFFFF) XBYAK_THROW(ERR_OFFSET_IS_TOO_BIG)
|
||||
#endif
|
||||
#endif
|
||||
uint32_t disp = static_cast<uint32_t>(disp64);
|
||||
const Reg& base = e.getBase();
|
||||
|
@ -1862,6 +1926,7 @@ private:
|
|||
template<class T>
|
||||
void opJmp(T& label, LabelType type, uint8_t shortCode, uint8_t longCode, uint8_t longPref)
|
||||
{
|
||||
if (type == T_FAR) XBYAK_THROW(ERR_NOT_SUPPORTED)
|
||||
if (isAutoGrow() && size_ + 16 >= maxSize_) growMemory(); /* avoid splitting code of jmp */
|
||||
size_t offset = 0;
|
||||
if (labelMgr_.getOffset(&offset, label)) { /* label exists */
|
||||
|
@ -1882,6 +1947,7 @@ private:
|
|||
}
|
||||
void opJmpAbs(const void *addr, LabelType type, uint8_t shortCode, uint8_t longCode, uint8_t longPref = 0)
|
||||
{
|
||||
if (type == T_FAR) XBYAK_THROW(ERR_NOT_SUPPORTED)
|
||||
if (isAutoGrow()) {
|
||||
if (!isNEAR(type)) XBYAK_THROW(ERR_ONLY_T_NEAR_IS_SUPPORTED_IN_AUTO_GROW)
|
||||
if (size_ + 16 >= maxSize_) growMemory();
|
||||
|
@ -1894,6 +1960,16 @@ private:
|
|||
}
|
||||
|
||||
}
|
||||
void opJmpOp(const Operand& op, LabelType type, int ext)
|
||||
{
|
||||
const int bit = 16|i32e;
|
||||
if (type == T_FAR) {
|
||||
if (!op.isMEM(bit)) XBYAK_THROW(ERR_NOT_SUPPORTED)
|
||||
opR_ModM(op, bit, ext + 1, 0xFF, NONE, NONE, false);
|
||||
} else {
|
||||
opR_ModM(op, bit, ext, 0xFF, NONE, NONE, true);
|
||||
}
|
||||
}
|
||||
// reg is reg field of ModRM
|
||||
// immSize is the size for immediate value
|
||||
// disp8N = 0(normal), disp8N = 1(force disp32), disp8N = {2, 4, 8} ; compressed displacement
|
||||
|
@ -1920,6 +1996,7 @@ private:
|
|||
void opGen(const Operand& reg, const Operand& op, int code, int pref, bool isValid(const Operand&, const Operand&), int imm8 = NONE, int preCode = NONE)
|
||||
{
|
||||
if (isValid && !isValid(reg, op)) XBYAK_THROW(ERR_BAD_COMBINATION)
|
||||
if (!isValidSSE(reg) || !isValidSSE(op)) XBYAK_THROW(ERR_NOT_SUPPORTED)
|
||||
if (pref != NONE) db(pref);
|
||||
if (op.isMEM()) {
|
||||
opModM(op.getAddress(), reg.getReg(), 0x0F, preCode, code, (imm8 != NONE) ? 1 : 0);
|
||||
|
@ -1930,6 +2007,7 @@ private:
|
|||
}
|
||||
void opMMX_IMM(const Mmx& mmx, int imm8, int code, int ext)
|
||||
{
|
||||
if (!isValidSSE(mmx)) XBYAK_THROW(ERR_NOT_SUPPORTED)
|
||||
if (mmx.isXMM()) db(0x66);
|
||||
opModR(Reg32(ext), mmx, 0x0F, code);
|
||||
db(imm8);
|
||||
|
@ -1940,6 +2018,7 @@ private:
|
|||
}
|
||||
void opMovXMM(const Operand& op1, const Operand& op2, int code, int pref)
|
||||
{
|
||||
if (!isValidSSE(op1) || !isValidSSE(op2)) XBYAK_THROW(ERR_NOT_SUPPORTED)
|
||||
if (pref != NONE) db(pref);
|
||||
if (op1.isXMM() && op2.isMEM()) {
|
||||
opModM(op2.getAddress(), op1.getReg(), 0x0F, code);
|
||||
|
@ -1951,6 +2030,7 @@ private:
|
|||
}
|
||||
void opExt(const Operand& op, const Mmx& mmx, int code, int imm, bool hasMMX2 = false)
|
||||
{
|
||||
if (!isValidSSE(op) || !isValidSSE(mmx)) XBYAK_THROW(ERR_NOT_SUPPORTED)
|
||||
if (hasMMX2 && op.isREG(i32e)) { /* pextrw is special */
|
||||
if (mmx.isXMM()) db(0x66);
|
||||
opModR(op.getReg(), mmx, 0x0F, 0xC5); db(imm);
|
||||
|
@ -2211,11 +2291,15 @@ private:
|
|||
{
|
||||
if (!(x.isXMM() && op.is(Operand::XMM | Operand::YMM | Operand::MEM)) && !(x.isYMM() && op.is(Operand::ZMM | Operand::MEM))) XBYAK_THROW(ERR_BAD_COMBINATION)
|
||||
}
|
||||
void opCvt(const Xmm& x, const Operand& op, int type, int code)
|
||||
{
|
||||
Operand::Kind kind = x.isXMM() ? (op.isBit(256) ? Operand::YMM : Operand::XMM) : Operand::ZMM;
|
||||
opVex(x.copyAndSetKind(kind), &xm0, op, type, code);
|
||||
}
|
||||
void opCvt2(const Xmm& x, const Operand& op, int type, int code)
|
||||
{
|
||||
checkCvt2(x, op);
|
||||
Operand::Kind kind = x.isXMM() ? (op.isBit(256) ? Operand::YMM : Operand::XMM) : Operand::ZMM;
|
||||
opVex(x.copyAndSetKind(kind), &xm0, op, type, code);
|
||||
opCvt(x, op, type, code);
|
||||
}
|
||||
void opCvt3(const Xmm& x1, const Xmm& x2, const Operand& op, int type, int type64, int type32, uint8_t code)
|
||||
{
|
||||
|
@ -2224,6 +2308,18 @@ private:
|
|||
const Operand *p = op.isREG() ? &x : &op;
|
||||
opVex(x1, &x2, *p, type | (op.isBit(64) ? type64 : type32), code);
|
||||
}
|
||||
// (x, x/y/xword/yword), (y, z/m)
|
||||
void checkCvt4(const Xmm& x, const Operand& op) const
|
||||
{
|
||||
if (!(x.isXMM() && op.is(Operand::XMM | Operand::YMM | Operand::MEM) && op.isBit(128|256)) && !(x.isYMM() && op.is(Operand::ZMM | Operand::MEM))) XBYAK_THROW(ERR_BAD_COMBINATION)
|
||||
}
|
||||
// (x, x/y/z/xword/yword/zword)
|
||||
void opCvt5(const Xmm& x, const Operand& op, int type, int code)
|
||||
{
|
||||
if (!(x.isXMM() && op.isBit(128|256|512))) XBYAK_THROW(ERR_BAD_COMBINATION)
|
||||
Operand::Kind kind = op.isBit(128) ? Operand::XMM : op.isBit(256) ? Operand::YMM : Operand::ZMM;
|
||||
opVex(x.copyAndSetKind(kind), &xm0, op, type, code);
|
||||
}
|
||||
const Xmm& cvtIdx0(const Operand& x) const
|
||||
{
|
||||
return x.isZMM() ? zm0 : x.isYMM() ? ym0 : xm0;
|
||||
|
@ -2261,7 +2357,11 @@ private:
|
|||
}
|
||||
if (!isOK) XBYAK_THROW(ERR_BAD_VSIB_ADDRESSING)
|
||||
}
|
||||
opAVX_X_X_XM(isAddrYMM ? Ymm(x1.getIdx()) : x1, isAddrYMM ? Ymm(x2.getIdx()) : x2, addr, type, code);
|
||||
int i1 = x1.getIdx();
|
||||
int i2 = regExp.getIndex().getIdx();
|
||||
int i3 = x2.getIdx();
|
||||
if (i1 == i2 || i1 == i3 || i2 == i3) XBYAK_THROW(ERR_SAME_REGS_ARE_INVALID);
|
||||
opAVX_X_X_XM(isAddrYMM ? Ymm(i1) : x1, isAddrYMM ? Ymm(i3) : x2, addr, type, code);
|
||||
}
|
||||
enum {
|
||||
xx_yy_zz = 0,
|
||||
|
@ -2284,7 +2384,12 @@ private:
|
|||
void opGather2(const Xmm& x, const Address& addr, int type, uint8_t code, int mode)
|
||||
{
|
||||
if (x.hasZero()) XBYAK_THROW(ERR_INVALID_ZERO)
|
||||
checkGather2(x, addr.getRegExp().getIndex(), mode);
|
||||
const RegExp& regExp = addr.getRegExp();
|
||||
checkGather2(x, regExp.getIndex(), mode);
|
||||
int maskIdx = x.getOpmaskIdx();
|
||||
if ((type & T_M_K) && addr.getOpmaskIdx()) maskIdx = addr.getOpmaskIdx();
|
||||
if (maskIdx == 0) XBYAK_THROW(ERR_K0_IS_INVALID);
|
||||
if (!(type & T_M_K) && x.getIdx() == regExp.getIndex().getIdx()) XBYAK_THROW(ERR_SAME_REGS_ARE_INVALID);
|
||||
opVex(x, 0, addr, type, code);
|
||||
}
|
||||
/*
|
||||
|
@ -2424,13 +2529,13 @@ public:
|
|||
|
||||
// set default type of `jmp` of undefined label to T_NEAR
|
||||
void setDefaultJmpNEAR(bool isNear) { isDefaultJmpNEAR_ = isNear; }
|
||||
void jmp(const Operand& op) { opR_ModM(op, BIT, 4, 0xFF, NONE, NONE, true); }
|
||||
void jmp(const Operand& op, LabelType type = T_AUTO) { opJmpOp(op, type, 4); }
|
||||
void jmp(std::string label, LabelType type = T_AUTO) { opJmp(label, type, 0xEB, 0xE9, 0); }
|
||||
void jmp(const char *label, LabelType type = T_AUTO) { jmp(std::string(label), type); }
|
||||
void jmp(const Label& label, LabelType type = T_AUTO) { opJmp(label, type, 0xEB, 0xE9, 0); }
|
||||
void jmp(const void *addr, LabelType type = T_AUTO) { opJmpAbs(addr, type, 0xEB, 0xE9); }
|
||||
|
||||
void call(const Operand& op) { opR_ModM(op, 16 | i32e, 2, 0xFF, NONE, NONE, true); }
|
||||
void call(const Operand& op, LabelType type = T_AUTO) { opJmpOp(op, type, 2); }
|
||||
// call(string label), not const std::string&
|
||||
void call(std::string label) { opJmp(label, T_NEAR, 0, 0xE8, 0); }
|
||||
void call(const char *label) { call(std::string(label)); }
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
const char *getVersionString() const { return "5.991"; }
|
||||
const char *getVersionString() const { return "6.60.1"; }
|
||||
void adc(const Operand& op, uint32_t imm) { opRM_I(op, imm, 0x10, 2); }
|
||||
void adc(const Operand& op1, const Operand& op2) { opRM_RM(op1, op2, 0x10); }
|
||||
void adcx(const Reg32e& reg, const Operand& op) { opGen(reg, op, 0xF6, 0x66, isREG32_REG32orMEM, NONE, 0x38); }
|
||||
|
@ -57,9 +57,11 @@ void cbw() { db(0x66); db(0x98); }
|
|||
void cdq() { db(0x99); }
|
||||
void clc() { db(0xF8); }
|
||||
void cld() { db(0xFC); }
|
||||
void cldemote(const Address& addr) { opMIB(addr, eax, 0x0F, 0x1C); }
|
||||
void clflush(const Address& addr) { opModM(addr, Reg32(7), 0x0F, 0xAE); }
|
||||
void clflushopt(const Address& addr) { db(0x66); opModM(addr, Reg32(7), 0x0F, 0xAE); }
|
||||
void cli() { db(0xFA); }
|
||||
void clwb(const Address& addr) { db(0x66); opMIB(addr, esi, 0x0F, 0xAE); }
|
||||
void clzero() { db(0x0F); db(0x01); db(0xFC); }
|
||||
void cmc() { db(0xF5); }
|
||||
void cmova(const Reg& reg, const Operand& op) { opModRM(reg, op, op.isREG(16 | i32e), op.isMEM(), 0x0F, 0x40 | 7); }//-V524
|
||||
|
@ -172,6 +174,8 @@ void divss(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x5E, 0xF3, isXMM
|
|||
void dppd(const Xmm& xmm, const Operand& op, int imm) { opGen(xmm, op, 0x41, 0x66, isXMM_XMMorMEM, static_cast<uint8_t>(imm), 0x3A); }
|
||||
void dpps(const Xmm& xmm, const Operand& op, int imm) { opGen(xmm, op, 0x40, 0x66, isXMM_XMMorMEM, static_cast<uint8_t>(imm), 0x3A); }
|
||||
void emms() { db(0x0F); db(0x77); }
|
||||
void endbr32() { db(0xF3); db(0x0F); db(0x1E); db(0xFB); }
|
||||
void endbr64() { db(0xF3); db(0x0F); db(0x1E); db(0xFA); }
|
||||
void enter(uint16_t x, uint8_t y) { db(0xC8); dw(x); db(y); }
|
||||
void extractps(const Operand& op, const Xmm& xmm, uint8_t imm) { opExt(op, xmm, 0x17, imm); }
|
||||
void f2xm1() { db(0xD9); db(0xF0); }
|
||||
|
@ -321,6 +325,7 @@ void gf2p8affineqb(const Xmm& xmm, const Operand& op, int imm) { opGen(xmm, op,
|
|||
void gf2p8mulb(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0xCF, 0x66, isXMM_XMMorMEM, NONE, 0x38); }
|
||||
void haddpd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x7C, 0x66, isXMM_XMMorMEM); }
|
||||
void haddps(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x7C, 0xF2, isXMM_XMMorMEM); }
|
||||
void hlt() { db(0xF4); }
|
||||
void hsubpd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x7D, 0x66, isXMM_XMMorMEM); }
|
||||
void hsubps(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x7D, 0xF2, isXMM_XMMorMEM); }
|
||||
void idiv(const Operand& op) { opR_ModM(op, 0, 7, 0xF6); }
|
||||
|
@ -498,6 +503,8 @@ void movd(const Mmx& mmx, const Address& addr) { if (mmx.isXMM()) db(0x66); opMo
|
|||
void movd(const Mmx& mmx, const Reg32& reg) { if (mmx.isXMM()) db(0x66); opModR(mmx, reg, 0x0F, 0x6E); }
|
||||
void movd(const Reg32& reg, const Mmx& mmx) { if (mmx.isXMM()) db(0x66); opModR(mmx, reg, 0x0F, 0x7E); }
|
||||
void movddup(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x12, 0xF2, isXMM_XMMorMEM, NONE, NONE); }
|
||||
void movdir64b(const Reg& reg, const Address& addr) { db(0x66); opModM(addr, reg.cvt32(), 0x0F, 0x38, 0xF8); }
|
||||
void movdiri(const Address& addr, const Reg32e& reg) { opModM(addr, reg, 0x0F, 0x38, 0xF9); }
|
||||
void movdq2q(const Mmx& mmx, const Xmm& xmm) { db(0xF2); opModR(mmx, xmm, 0x0F, 0xD6); }
|
||||
void movdqa(const Address& addr, const Xmm& xmm) { db(0x66); opModM(addr, xmm, 0x0F, 0x7F); }
|
||||
void movdqa(const Xmm& xmm, const Operand& op) { opMMX(xmm, op, 0x6F, 0x66); }
|
||||
|
@ -717,6 +724,7 @@ void repne() { db(0xF2); }
|
|||
void repnz() { db(0xF2); }
|
||||
void repz() { db(0xF3); }
|
||||
void ret(int imm = 0) { if (imm) { db(0xC2); dw(imm); } else { db(0xC3); } }
|
||||
void retf(int imm = 0) { if (imm) { db(0xCA); dw(imm); } else { db(0xCB); } }
|
||||
void rol(const Operand& op, const Reg8& _cl) { opShift(op, _cl, 0); }
|
||||
void rol(const Operand& op, int imm) { opShift(op, imm, 0); }
|
||||
void ror(const Operand& op, const Reg8& _cl) { opShift(op, _cl, 1); }
|
||||
|
@ -809,18 +817,21 @@ void subsd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x5C, 0xF2, isXMM
|
|||
void subss(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x5C, 0xF3, isXMM_XMMorMEM); }
|
||||
void sysenter() { db(0x0F); db(0x34); }
|
||||
void sysexit() { db(0x0F); db(0x35); }
|
||||
void tpause(const Reg32& r) { int idx = r.getIdx(); if (idx > 7) XBYAK_THROW(ERR_BAD_PARAMETER) db(0x66); db(0x0F); db(0xAE); setModRM(3, 6, idx); }
|
||||
void tzcnt(const Reg®, const Operand& op) { opSp1(reg, op, 0xF3, 0x0F, 0xBC); }
|
||||
void ucomisd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x2E, 0x66, isXMM_XMMorMEM); }
|
||||
void ucomiss(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x2E, 0x100, isXMM_XMMorMEM); }
|
||||
void ud2() { db(0x0F); db(0x0B); }
|
||||
void umonitor(const Reg& r) { int idx = r.getIdx(); if (idx > 7) XBYAK_THROW(ERR_BAD_PARAMETER) int bit = r.getBit(); if (BIT != bit) { if ((BIT == 32 && bit == 16) || (BIT == 64 && bit == 32)) { db(0x67); } else { XBYAK_THROW(ERR_BAD_SIZE_OF_REGISTER) } } db(0xF3); db(0x0F); db(0xAE); setModRM(3, 6, idx); }
|
||||
void umwait(const Reg32& r) { int idx = r.getIdx(); if (idx > 7) XBYAK_THROW(ERR_BAD_PARAMETER) db(0xF2); db(0x0F); db(0xAE); setModRM(3, 6, idx); }
|
||||
void unpckhpd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x15, 0x66, isXMM_XMMorMEM); }
|
||||
void unpckhps(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x15, 0x100, isXMM_XMMorMEM); }
|
||||
void unpcklpd(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x14, 0x66, isXMM_XMMorMEM); }
|
||||
void unpcklps(const Xmm& xmm, const Operand& op) { opGen(xmm, op, 0x14, 0x100, isXMM_XMMorMEM); }
|
||||
void vaddpd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_66 | T_EW1 | T_YMM | T_EVEX | T_ER_Z | T_B64, 0x58); }
|
||||
void vaddps(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_EW0 | T_YMM | T_EVEX | T_ER_Z | T_B32, 0x58); }
|
||||
void vaddsd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_F2 | T_EW1 | T_EVEX | T_ER_Z | T_N8, 0x58); }
|
||||
void vaddss(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_F3 | T_EW0 | T_EVEX | T_ER_Z | T_N4, 0x58); }
|
||||
void vaddsd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_F2 | T_EW1 | T_EVEX | T_ER_X | T_N8, 0x58); }
|
||||
void vaddss(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_F3 | T_EW0 | T_EVEX | T_ER_X | T_N4, 0x58); }
|
||||
void vaddsubpd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_66 | T_0F | T_YMM, 0xD0); }
|
||||
void vaddsubps(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_F2 | T_0F | T_YMM, 0xD0); }
|
||||
void vaesdec(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_66 | T_0F38 | T_YMM | T_EVEX, 0xDE); }
|
||||
|
@ -982,7 +993,7 @@ void vcvtpd2ps(const Xmm& x, const Operand& op) { opCvt2(x, op, T_0F | T_66 | T_
|
|||
void vcvtph2ps(const Xmm& x, const Operand& op) { checkCvt1(x, op); opVex(x, 0, op, T_0F38 | T_66 | T_W0 | T_EVEX | T_EW0 | T_N8 | T_N_VL | T_SAE_Y, 0x13); }
|
||||
void vcvtps2dq(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_66 | T_0F | T_EW0 | T_YMM | T_EVEX | T_ER_Z | T_B32, 0x5B); }
|
||||
void vcvtps2pd(const Xmm& x, const Operand& op) { checkCvt1(x, op); opVex(x, 0, op, T_0F | T_YMM | T_EVEX | T_EW0 | T_B32 | T_N8 | T_N_VL | T_SAE_Y, 0x5A); }
|
||||
void vcvtps2ph(const Operand& op, const Xmm& x, uint8_t imm) { checkCvt1(x, op); opVex(x, 0, op, T_0F3A | T_66 | T_W0 | T_EVEX | T_EW0 | T_N8 | T_N_VL | T_SAE_Y, 0x1D, imm); }
|
||||
void vcvtps2ph(const Operand& op, const Xmm& x, uint8_t imm) { checkCvt1(x, op); opVex(x, 0, op, T_0F3A | T_66 | T_W0 | T_EVEX | T_EW0 | T_N8 | T_N_VL | T_SAE_Y | T_M_K, 0x1D, imm); }
|
||||
void vcvtsd2si(const Reg32& r, const Operand& op) { opAVX_X_X_XM(Xmm(r.getIdx()), xm0, op, T_0F | T_F2 | T_W0 | T_EVEX | T_EW0 | T_N4 | T_ER_X, 0x2D); }
|
||||
void vcvtsd2ss(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N8 | T_F2 | T_0F | T_EW1 | T_EVEX | T_ER_X, 0x5A); }
|
||||
void vcvtsi2sd(const Xmm& x1, const Xmm& x2, const Operand& op) { opCvt3(x1, x2, op, T_0F | T_F2 | T_EVEX, T_W1 | T_EW1 | T_ER_X | T_N8, T_W0 | T_EW0 | T_N4, 0x2A); }
|
||||
|
@ -995,8 +1006,8 @@ void vcvttsd2si(const Reg32& r, const Operand& op) { opAVX_X_X_XM(Xmm(r.getIdx()
|
|||
void vcvttss2si(const Reg32& r, const Operand& op) { opAVX_X_X_XM(Xmm(r.getIdx()), xm0, op, T_0F | T_F3 | T_W0 | T_EVEX | T_EW0 | T_SAE_X | T_N8, 0x2C); }
|
||||
void vdivpd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_66 | T_EW1 | T_YMM | T_EVEX | T_ER_Z | T_B64, 0x5E); }
|
||||
void vdivps(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_EW0 | T_YMM | T_EVEX | T_ER_Z | T_B32, 0x5E); }
|
||||
void vdivsd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_F2 | T_EW1 | T_EVEX | T_ER_Z | T_N8, 0x5E); }
|
||||
void vdivss(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_F3 | T_EW0 | T_EVEX | T_ER_Z | T_N4, 0x5E); }
|
||||
void vdivsd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_F2 | T_EW1 | T_EVEX | T_ER_X | T_N8, 0x5E); }
|
||||
void vdivss(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_F3 | T_EW0 | T_EVEX | T_ER_X | T_N4, 0x5E); }
|
||||
void vdppd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_W0, 0x41, imm); }
|
||||
void vdpps(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_W0 | T_YMM, 0x40, imm); }
|
||||
void vextractf128(const Operand& op, const Ymm& y, uint8_t imm) { if (!(op.isXMEM() && y.isYMM())) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(y, 0, op, T_0F3A | T_66 | T_W0 | T_YMM, 0x19, imm); }
|
||||
|
@ -1085,12 +1096,12 @@ void vmaskmovps(const Address& addr, const Xmm& x1, const Xmm& x2) { opAVX_X_X_X
|
|||
void vmaskmovps(const Xmm& x1, const Xmm& x2, const Address& addr) { opAVX_X_X_XM(x1, x2, addr, T_0F38 | T_66 | T_W0 | T_YMM, 0x2C); }
|
||||
void vmaxpd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_66 | T_EW1 | T_YMM | T_EVEX | T_ER_Z | T_B64, 0x5F); }
|
||||
void vmaxps(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_EW0 | T_YMM | T_EVEX | T_ER_Z | T_B32, 0x5F); }
|
||||
void vmaxsd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_F2 | T_EW1 | T_EVEX | T_ER_Z | T_N8, 0x5F); }
|
||||
void vmaxss(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_F3 | T_EW0 | T_EVEX | T_ER_Z | T_N4, 0x5F); }
|
||||
void vmaxsd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_F2 | T_EW1 | T_EVEX | T_ER_X | T_N8, 0x5F); }
|
||||
void vmaxss(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_F3 | T_EW0 | T_EVEX | T_ER_X | T_N4, 0x5F); }
|
||||
void vminpd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_66 | T_EW1 | T_YMM | T_EVEX | T_ER_Z | T_B64, 0x5D); }
|
||||
void vminps(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_EW0 | T_YMM | T_EVEX | T_ER_Z | T_B32, 0x5D); }
|
||||
void vminsd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_F2 | T_EW1 | T_EVEX | T_ER_Z | T_N8, 0x5D); }
|
||||
void vminss(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_F3 | T_EW0 | T_EVEX | T_ER_Z | T_N4, 0x5D); }
|
||||
void vminsd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_F2 | T_EW1 | T_EVEX | T_ER_X | T_N8, 0x5D); }
|
||||
void vminss(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_F3 | T_EW0 | T_EVEX | T_ER_X | T_N4, 0x5D); }
|
||||
void vmovapd(const Address& addr, const Xmm& xmm) { opAVX_X_XM_IMM(xmm, addr, T_66 | T_0F | T_EW1 | T_YMM | T_EVEX | T_M_K, 0x29); }
|
||||
void vmovapd(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_66 | T_0F | T_EW1 | T_YMM | T_EVEX, 0x28); }
|
||||
void vmovaps(const Address& addr, const Xmm& xmm) { opAVX_X_XM_IMM(xmm, addr, T_0F | T_EW0 | T_YMM | T_EVEX | T_M_K, 0x29); }
|
||||
|
@ -1136,8 +1147,8 @@ void vmovups(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_0F | T
|
|||
void vmpsadbw(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_W0 | T_YMM, 0x42, imm); }
|
||||
void vmulpd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_66 | T_EW1 | T_YMM | T_EVEX | T_ER_Z | T_B64, 0x59); }
|
||||
void vmulps(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_EW0 | T_YMM | T_EVEX | T_ER_Z | T_B32, 0x59); }
|
||||
void vmulsd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_F2 | T_EW1 | T_EVEX | T_ER_Z | T_N8, 0x59); }
|
||||
void vmulss(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_F3 | T_EW0 | T_EVEX | T_ER_Z | T_N4, 0x59); }
|
||||
void vmulsd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_F2 | T_EW1 | T_EVEX | T_ER_X | T_N8, 0x59); }
|
||||
void vmulss(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_F3 | T_EW0 | T_EVEX | T_ER_X | T_N4, 0x59); }
|
||||
void vorpd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_66 | T_EW1 | T_YMM | T_EVEX | T_ER_Z | T_B64, 0x56); }
|
||||
void vorps(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_EW0 | T_YMM | T_EVEX | T_ER_Z | T_B32, 0x56); }
|
||||
void vpabsb(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_66 | T_0F38 | T_YMM | T_EVEX, 0x1C); }
|
||||
|
@ -1320,8 +1331,8 @@ void vsqrtss(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1,
|
|||
void vstmxcsr(const Address& addr) { opAVX_X_X_XM(xm3, xm0, addr, T_0F, 0xAE); }
|
||||
void vsubpd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_66 | T_EW1 | T_YMM | T_EVEX | T_ER_Z | T_B64, 0x5C); }
|
||||
void vsubps(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_EW0 | T_YMM | T_EVEX | T_ER_Z | T_B32, 0x5C); }
|
||||
void vsubsd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_F2 | T_EW1 | T_EVEX | T_ER_Z | T_N8, 0x5C); }
|
||||
void vsubss(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_F3 | T_EW0 | T_EVEX | T_ER_Z | T_N4, 0x5C); }
|
||||
void vsubsd(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_F2 | T_EW1 | T_EVEX | T_ER_X | T_N8, 0x5C); }
|
||||
void vsubss(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_0F | T_F3 | T_EW0 | T_EVEX | T_ER_X | T_N4, 0x5C); }
|
||||
void vtestpd(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_66 | T_0F38 | T_YMM, 0x0F); }
|
||||
void vtestps(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_66 | T_0F38 | T_YMM, 0x0E); }
|
||||
void vucomisd(const Xmm& xm, const Operand& op) { opAVX_X_XM_IMM(xm, op, T_N8 | T_66 | T_0F | T_EW1 | T_EVEX | T_SAE_X, 0x2E); }
|
||||
|
@ -1739,6 +1750,8 @@ void v4fmaddps(const Zmm& z1, const Zmm& z2, const Address& addr) { opAVX_X_X_XM
|
|||
void v4fmaddss(const Xmm& x1, const Xmm& x2, const Address& addr) { opAVX_X_X_XM(x1, x2, addr, T_0F38 | T_F2 | T_EW0 | T_MUST_EVEX | T_N16, 0x9B); }
|
||||
void v4fnmaddps(const Zmm& z1, const Zmm& z2, const Address& addr) { opAVX_X_X_XM(z1, z2, addr, T_0F38 | T_F2 | T_EW0 | T_YMM | T_MUST_EVEX | T_N16, 0xAA); }
|
||||
void v4fnmaddss(const Xmm& x1, const Xmm& x2, const Address& addr) { opAVX_X_X_XM(x1, x2, addr, T_0F38 | T_F2 | T_EW0 | T_MUST_EVEX | T_N16, 0xAB); }
|
||||
void vaddph(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_MAP5 | T_EW0 | T_YMM | T_MUST_EVEX | T_ER_Z | T_B16, 0x58); }
|
||||
void vaddsh(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_MAP5 | T_F3 | T_EW0 | T_MUST_EVEX | T_ER_X | T_N2, 0x58); }
|
||||
void valignd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_EW0 | T_YMM | T_MUST_EVEX, 0x03, imm); }
|
||||
void valignq(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_EW1 | T_YMM | T_MUST_EVEX, 0x03, imm); }
|
||||
void vblendmpd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0x65); }
|
||||
|
@ -1753,41 +1766,206 @@ void vbroadcasti32x4(const Ymm& y, const Operand& op) { opAVX_X_XM_IMM(y, op, T_
|
|||
void vbroadcasti32x8(const Zmm& z, const Operand& op) { opAVX_X_XM_IMM(z, op, T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_N32, 0x5B); }
|
||||
void vbroadcasti64x2(const Ymm& y, const Operand& op) { opAVX_X_XM_IMM(y, op, T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_N16, 0x5A); }
|
||||
void vbroadcasti64x4(const Zmm& z, const Operand& op) { opAVX_X_XM_IMM(z, op, T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_N32, 0x5B); }
|
||||
void vcmpeq_ospd(const Opmask& k, const Xmm& x, const Operand& op) { vcmppd(k, x, op, 16); }
|
||||
void vcmpeq_osps(const Opmask& k, const Xmm& x, const Operand& op) { vcmpps(k, x, op, 16); }
|
||||
void vcmpeq_ossd(const Opmask& k, const Xmm& x, const Operand& op) { vcmpsd(k, x, op, 16); }
|
||||
void vcmpeq_osss(const Opmask& k, const Xmm& x, const Operand& op) { vcmpss(k, x, op, 16); }
|
||||
void vcmpeq_uqpd(const Opmask& k, const Xmm& x, const Operand& op) { vcmppd(k, x, op, 8); }
|
||||
void vcmpeq_uqps(const Opmask& k, const Xmm& x, const Operand& op) { vcmpps(k, x, op, 8); }
|
||||
void vcmpeq_uqsd(const Opmask& k, const Xmm& x, const Operand& op) { vcmpsd(k, x, op, 8); }
|
||||
void vcmpeq_uqss(const Opmask& k, const Xmm& x, const Operand& op) { vcmpss(k, x, op, 8); }
|
||||
void vcmpeq_uspd(const Opmask& k, const Xmm& x, const Operand& op) { vcmppd(k, x, op, 24); }
|
||||
void vcmpeq_usps(const Opmask& k, const Xmm& x, const Operand& op) { vcmpps(k, x, op, 24); }
|
||||
void vcmpeq_ussd(const Opmask& k, const Xmm& x, const Operand& op) { vcmpsd(k, x, op, 24); }
|
||||
void vcmpeq_usss(const Opmask& k, const Xmm& x, const Operand& op) { vcmpss(k, x, op, 24); }
|
||||
void vcmpeqpd(const Opmask& k, const Xmm& x, const Operand& op) { vcmppd(k, x, op, 0); }
|
||||
void vcmpeqps(const Opmask& k, const Xmm& x, const Operand& op) { vcmpps(k, x, op, 0); }
|
||||
void vcmpeqsd(const Opmask& k, const Xmm& x, const Operand& op) { vcmpsd(k, x, op, 0); }
|
||||
void vcmpeqss(const Opmask& k, const Xmm& x, const Operand& op) { vcmpss(k, x, op, 0); }
|
||||
void vcmpfalse_ospd(const Opmask& k, const Xmm& x, const Operand& op) { vcmppd(k, x, op, 27); }
|
||||
void vcmpfalse_osps(const Opmask& k, const Xmm& x, const Operand& op) { vcmpps(k, x, op, 27); }
|
||||
void vcmpfalse_ossd(const Opmask& k, const Xmm& x, const Operand& op) { vcmpsd(k, x, op, 27); }
|
||||
void vcmpfalse_osss(const Opmask& k, const Xmm& x, const Operand& op) { vcmpss(k, x, op, 27); }
|
||||
void vcmpfalsepd(const Opmask& k, const Xmm& x, const Operand& op) { vcmppd(k, x, op, 11); }
|
||||
void vcmpfalseps(const Opmask& k, const Xmm& x, const Operand& op) { vcmpps(k, x, op, 11); }
|
||||
void vcmpfalsesd(const Opmask& k, const Xmm& x, const Operand& op) { vcmpsd(k, x, op, 11); }
|
||||
void vcmpfalsess(const Opmask& k, const Xmm& x, const Operand& op) { vcmpss(k, x, op, 11); }
|
||||
void vcmpge_oqpd(const Opmask& k, const Xmm& x, const Operand& op) { vcmppd(k, x, op, 29); }
|
||||
void vcmpge_oqps(const Opmask& k, const Xmm& x, const Operand& op) { vcmpps(k, x, op, 29); }
|
||||
void vcmpge_oqsd(const Opmask& k, const Xmm& x, const Operand& op) { vcmpsd(k, x, op, 29); }
|
||||
void vcmpge_oqss(const Opmask& k, const Xmm& x, const Operand& op) { vcmpss(k, x, op, 29); }
|
||||
void vcmpgepd(const Opmask& k, const Xmm& x, const Operand& op) { vcmppd(k, x, op, 13); }
|
||||
void vcmpgeps(const Opmask& k, const Xmm& x, const Operand& op) { vcmpps(k, x, op, 13); }
|
||||
void vcmpgesd(const Opmask& k, const Xmm& x, const Operand& op) { vcmpsd(k, x, op, 13); }
|
||||
void vcmpgess(const Opmask& k, const Xmm& x, const Operand& op) { vcmpss(k, x, op, 13); }
|
||||
void vcmpgt_oqpd(const Opmask& k, const Xmm& x, const Operand& op) { vcmppd(k, x, op, 30); }
|
||||
void vcmpgt_oqps(const Opmask& k, const Xmm& x, const Operand& op) { vcmpps(k, x, op, 30); }
|
||||
void vcmpgt_oqsd(const Opmask& k, const Xmm& x, const Operand& op) { vcmpsd(k, x, op, 30); }
|
||||
void vcmpgt_oqss(const Opmask& k, const Xmm& x, const Operand& op) { vcmpss(k, x, op, 30); }
|
||||
void vcmpgtpd(const Opmask& k, const Xmm& x, const Operand& op) { vcmppd(k, x, op, 14); }
|
||||
void vcmpgtps(const Opmask& k, const Xmm& x, const Operand& op) { vcmpps(k, x, op, 14); }
|
||||
void vcmpgtsd(const Opmask& k, const Xmm& x, const Operand& op) { vcmpsd(k, x, op, 14); }
|
||||
void vcmpgtss(const Opmask& k, const Xmm& x, const Operand& op) { vcmpss(k, x, op, 14); }
|
||||
void vcmple_oqpd(const Opmask& k, const Xmm& x, const Operand& op) { vcmppd(k, x, op, 18); }
|
||||
void vcmple_oqps(const Opmask& k, const Xmm& x, const Operand& op) { vcmpps(k, x, op, 18); }
|
||||
void vcmple_oqsd(const Opmask& k, const Xmm& x, const Operand& op) { vcmpsd(k, x, op, 18); }
|
||||
void vcmple_oqss(const Opmask& k, const Xmm& x, const Operand& op) { vcmpss(k, x, op, 18); }
|
||||
void vcmplepd(const Opmask& k, const Xmm& x, const Operand& op) { vcmppd(k, x, op, 2); }
|
||||
void vcmpleps(const Opmask& k, const Xmm& x, const Operand& op) { vcmpps(k, x, op, 2); }
|
||||
void vcmplesd(const Opmask& k, const Xmm& x, const Operand& op) { vcmpsd(k, x, op, 2); }
|
||||
void vcmpless(const Opmask& k, const Xmm& x, const Operand& op) { vcmpss(k, x, op, 2); }
|
||||
void vcmplt_oqpd(const Opmask& k, const Xmm& x, const Operand& op) { vcmppd(k, x, op, 17); }
|
||||
void vcmplt_oqps(const Opmask& k, const Xmm& x, const Operand& op) { vcmpps(k, x, op, 17); }
|
||||
void vcmplt_oqsd(const Opmask& k, const Xmm& x, const Operand& op) { vcmpsd(k, x, op, 17); }
|
||||
void vcmplt_oqss(const Opmask& k, const Xmm& x, const Operand& op) { vcmpss(k, x, op, 17); }
|
||||
void vcmpltpd(const Opmask& k, const Xmm& x, const Operand& op) { vcmppd(k, x, op, 1); }
|
||||
void vcmpltps(const Opmask& k, const Xmm& x, const Operand& op) { vcmpps(k, x, op, 1); }
|
||||
void vcmpltsd(const Opmask& k, const Xmm& x, const Operand& op) { vcmpsd(k, x, op, 1); }
|
||||
void vcmpltss(const Opmask& k, const Xmm& x, const Operand& op) { vcmpss(k, x, op, 1); }
|
||||
void vcmpneq_oqpd(const Opmask& k, const Xmm& x, const Operand& op) { vcmppd(k, x, op, 12); }
|
||||
void vcmpneq_oqps(const Opmask& k, const Xmm& x, const Operand& op) { vcmpps(k, x, op, 12); }
|
||||
void vcmpneq_oqsd(const Opmask& k, const Xmm& x, const Operand& op) { vcmpsd(k, x, op, 12); }
|
||||
void vcmpneq_oqss(const Opmask& k, const Xmm& x, const Operand& op) { vcmpss(k, x, op, 12); }
|
||||
void vcmpneq_ospd(const Opmask& k, const Xmm& x, const Operand& op) { vcmppd(k, x, op, 28); }
|
||||
void vcmpneq_osps(const Opmask& k, const Xmm& x, const Operand& op) { vcmpps(k, x, op, 28); }
|
||||
void vcmpneq_ossd(const Opmask& k, const Xmm& x, const Operand& op) { vcmpsd(k, x, op, 28); }
|
||||
void vcmpneq_osss(const Opmask& k, const Xmm& x, const Operand& op) { vcmpss(k, x, op, 28); }
|
||||
void vcmpneq_uspd(const Opmask& k, const Xmm& x, const Operand& op) { vcmppd(k, x, op, 20); }
|
||||
void vcmpneq_usps(const Opmask& k, const Xmm& x, const Operand& op) { vcmpps(k, x, op, 20); }
|
||||
void vcmpneq_ussd(const Opmask& k, const Xmm& x, const Operand& op) { vcmpsd(k, x, op, 20); }
|
||||
void vcmpneq_usss(const Opmask& k, const Xmm& x, const Operand& op) { vcmpss(k, x, op, 20); }
|
||||
void vcmpneqpd(const Opmask& k, const Xmm& x, const Operand& op) { vcmppd(k, x, op, 4); }
|
||||
void vcmpneqps(const Opmask& k, const Xmm& x, const Operand& op) { vcmpps(k, x, op, 4); }
|
||||
void vcmpneqsd(const Opmask& k, const Xmm& x, const Operand& op) { vcmpsd(k, x, op, 4); }
|
||||
void vcmpneqss(const Opmask& k, const Xmm& x, const Operand& op) { vcmpss(k, x, op, 4); }
|
||||
void vcmpnge_uqpd(const Opmask& k, const Xmm& x, const Operand& op) { vcmppd(k, x, op, 25); }
|
||||
void vcmpnge_uqps(const Opmask& k, const Xmm& x, const Operand& op) { vcmpps(k, x, op, 25); }
|
||||
void vcmpnge_uqsd(const Opmask& k, const Xmm& x, const Operand& op) { vcmpsd(k, x, op, 25); }
|
||||
void vcmpnge_uqss(const Opmask& k, const Xmm& x, const Operand& op) { vcmpss(k, x, op, 25); }
|
||||
void vcmpngepd(const Opmask& k, const Xmm& x, const Operand& op) { vcmppd(k, x, op, 9); }
|
||||
void vcmpngeps(const Opmask& k, const Xmm& x, const Operand& op) { vcmpps(k, x, op, 9); }
|
||||
void vcmpngesd(const Opmask& k, const Xmm& x, const Operand& op) { vcmpsd(k, x, op, 9); }
|
||||
void vcmpngess(const Opmask& k, const Xmm& x, const Operand& op) { vcmpss(k, x, op, 9); }
|
||||
void vcmpngt_uqpd(const Opmask& k, const Xmm& x, const Operand& op) { vcmppd(k, x, op, 26); }
|
||||
void vcmpngt_uqps(const Opmask& k, const Xmm& x, const Operand& op) { vcmpps(k, x, op, 26); }
|
||||
void vcmpngt_uqsd(const Opmask& k, const Xmm& x, const Operand& op) { vcmpsd(k, x, op, 26); }
|
||||
void vcmpngt_uqss(const Opmask& k, const Xmm& x, const Operand& op) { vcmpss(k, x, op, 26); }
|
||||
void vcmpngtpd(const Opmask& k, const Xmm& x, const Operand& op) { vcmppd(k, x, op, 10); }
|
||||
void vcmpngtps(const Opmask& k, const Xmm& x, const Operand& op) { vcmpps(k, x, op, 10); }
|
||||
void vcmpngtsd(const Opmask& k, const Xmm& x, const Operand& op) { vcmpsd(k, x, op, 10); }
|
||||
void vcmpngtss(const Opmask& k, const Xmm& x, const Operand& op) { vcmpss(k, x, op, 10); }
|
||||
void vcmpnle_uqpd(const Opmask& k, const Xmm& x, const Operand& op) { vcmppd(k, x, op, 22); }
|
||||
void vcmpnle_uqps(const Opmask& k, const Xmm& x, const Operand& op) { vcmpps(k, x, op, 22); }
|
||||
void vcmpnle_uqsd(const Opmask& k, const Xmm& x, const Operand& op) { vcmpsd(k, x, op, 22); }
|
||||
void vcmpnle_uqss(const Opmask& k, const Xmm& x, const Operand& op) { vcmpss(k, x, op, 22); }
|
||||
void vcmpnlepd(const Opmask& k, const Xmm& x, const Operand& op) { vcmppd(k, x, op, 6); }
|
||||
void vcmpnleps(const Opmask& k, const Xmm& x, const Operand& op) { vcmpps(k, x, op, 6); }
|
||||
void vcmpnlesd(const Opmask& k, const Xmm& x, const Operand& op) { vcmpsd(k, x, op, 6); }
|
||||
void vcmpnless(const Opmask& k, const Xmm& x, const Operand& op) { vcmpss(k, x, op, 6); }
|
||||
void vcmpnlt_uqpd(const Opmask& k, const Xmm& x, const Operand& op) { vcmppd(k, x, op, 21); }
|
||||
void vcmpnlt_uqps(const Opmask& k, const Xmm& x, const Operand& op) { vcmpps(k, x, op, 21); }
|
||||
void vcmpnlt_uqsd(const Opmask& k, const Xmm& x, const Operand& op) { vcmpsd(k, x, op, 21); }
|
||||
void vcmpnlt_uqss(const Opmask& k, const Xmm& x, const Operand& op) { vcmpss(k, x, op, 21); }
|
||||
void vcmpnltpd(const Opmask& k, const Xmm& x, const Operand& op) { vcmppd(k, x, op, 5); }
|
||||
void vcmpnltps(const Opmask& k, const Xmm& x, const Operand& op) { vcmpps(k, x, op, 5); }
|
||||
void vcmpnltsd(const Opmask& k, const Xmm& x, const Operand& op) { vcmpsd(k, x, op, 5); }
|
||||
void vcmpnltss(const Opmask& k, const Xmm& x, const Operand& op) { vcmpss(k, x, op, 5); }
|
||||
void vcmpord_spd(const Opmask& k, const Xmm& x, const Operand& op) { vcmppd(k, x, op, 23); }
|
||||
void vcmpord_sps(const Opmask& k, const Xmm& x, const Operand& op) { vcmpps(k, x, op, 23); }
|
||||
void vcmpord_ssd(const Opmask& k, const Xmm& x, const Operand& op) { vcmpsd(k, x, op, 23); }
|
||||
void vcmpord_sss(const Opmask& k, const Xmm& x, const Operand& op) { vcmpss(k, x, op, 23); }
|
||||
void vcmpordpd(const Opmask& k, const Xmm& x, const Operand& op) { vcmppd(k, x, op, 7); }
|
||||
void vcmpordps(const Opmask& k, const Xmm& x, const Operand& op) { vcmpps(k, x, op, 7); }
|
||||
void vcmpordsd(const Opmask& k, const Xmm& x, const Operand& op) { vcmpsd(k, x, op, 7); }
|
||||
void vcmpordss(const Opmask& k, const Xmm& x, const Operand& op) { vcmpss(k, x, op, 7); }
|
||||
void vcmppd(const Opmask& k, const Xmm& x, const Operand& op, uint8_t imm) { opAVX_K_X_XM(k, x, op, T_66 | T_0F | T_EW1 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B64, 0xC2, imm); }
|
||||
void vcmpph(const Opmask& k, const Xmm& x, const Operand& op, uint8_t imm) { opAVX_K_X_XM(k, x, op, T_0F3A | T_EW0 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B16, 0xC2, imm); }
|
||||
void vcmpps(const Opmask& k, const Xmm& x, const Operand& op, uint8_t imm) { opAVX_K_X_XM(k, x, op, T_0F | T_EW0 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B32, 0xC2, imm); }
|
||||
void vcmpsd(const Opmask& k, const Xmm& x, const Operand& op, uint8_t imm) { opAVX_K_X_XM(k, x, op, T_N8 | T_F2 | T_0F | T_EW1 | T_SAE_Z | T_MUST_EVEX, 0xC2, imm); }
|
||||
void vcmpsh(const Opmask& k, const Xmm& x, const Operand& op, uint8_t imm) { opAVX_K_X_XM(k, x, op, T_N2 | T_F3 | T_0F3A | T_EW0 | T_SAE_X | T_MUST_EVEX, 0xC2, imm); }
|
||||
void vcmpss(const Opmask& k, const Xmm& x, const Operand& op, uint8_t imm) { opAVX_K_X_XM(k, x, op, T_N4 | T_F3 | T_0F | T_EW0 | T_SAE_Z | T_MUST_EVEX, 0xC2, imm); }
|
||||
void vcmptrue_uspd(const Opmask& k, const Xmm& x, const Operand& op) { vcmppd(k, x, op, 31); }
|
||||
void vcmptrue_usps(const Opmask& k, const Xmm& x, const Operand& op) { vcmpps(k, x, op, 31); }
|
||||
void vcmptrue_ussd(const Opmask& k, const Xmm& x, const Operand& op) { vcmpsd(k, x, op, 31); }
|
||||
void vcmptrue_usss(const Opmask& k, const Xmm& x, const Operand& op) { vcmpss(k, x, op, 31); }
|
||||
void vcmptruepd(const Opmask& k, const Xmm& x, const Operand& op) { vcmppd(k, x, op, 15); }
|
||||
void vcmptrueps(const Opmask& k, const Xmm& x, const Operand& op) { vcmpps(k, x, op, 15); }
|
||||
void vcmptruesd(const Opmask& k, const Xmm& x, const Operand& op) { vcmpsd(k, x, op, 15); }
|
||||
void vcmptruess(const Opmask& k, const Xmm& x, const Operand& op) { vcmpss(k, x, op, 15); }
|
||||
void vcmpunord_spd(const Opmask& k, const Xmm& x, const Operand& op) { vcmppd(k, x, op, 19); }
|
||||
void vcmpunord_sps(const Opmask& k, const Xmm& x, const Operand& op) { vcmpps(k, x, op, 19); }
|
||||
void vcmpunord_ssd(const Opmask& k, const Xmm& x, const Operand& op) { vcmpsd(k, x, op, 19); }
|
||||
void vcmpunord_sss(const Opmask& k, const Xmm& x, const Operand& op) { vcmpss(k, x, op, 19); }
|
||||
void vcmpunordpd(const Opmask& k, const Xmm& x, const Operand& op) { vcmppd(k, x, op, 3); }
|
||||
void vcmpunordps(const Opmask& k, const Xmm& x, const Operand& op) { vcmpps(k, x, op, 3); }
|
||||
void vcmpunordsd(const Opmask& k, const Xmm& x, const Operand& op) { vcmpsd(k, x, op, 3); }
|
||||
void vcmpunordss(const Opmask& k, const Xmm& x, const Operand& op) { vcmpss(k, x, op, 3); }
|
||||
void vcomish(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_MAP5 | T_MUST_EVEX | T_EW0 | T_SAE_X | T_N2, 0x2F); }
|
||||
void vcompressb(const Operand& op, const Xmm& x) { opAVX_X_XM_IMM(x, op, T_N1 | T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX, 0x63); }
|
||||
void vcompresspd(const Operand& op, const Xmm& x) { opAVX_X_XM_IMM(x, op, T_N8 | T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX, 0x8A); }
|
||||
void vcompressps(const Operand& op, const Xmm& x) { opAVX_X_XM_IMM(x, op, T_N4 | T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX, 0x8A); }
|
||||
void vcompressw(const Operand& op, const Xmm& x) { opAVX_X_XM_IMM(x, op, T_N2 | T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX, 0x63); }
|
||||
void vcvtdq2ph(const Xmm& x, const Operand& op) { checkCvt4(x, op); opCvt(x, op, T_N16 | T_N_VL | T_MAP5 | T_EW0 | T_YMM | T_ER_Z | T_MUST_EVEX | T_B32, 0x5B); }
|
||||
void vcvtne2ps2bf16(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_F2 | T_0F38 | T_EW0 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B32, 0x72); }
|
||||
void vcvtneps2bf16(const Xmm& x, const Operand& op) { opCvt2(x, op, T_F3 | T_0F38 | T_EW0 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B32, 0x72); }
|
||||
void vcvtpd2ph(const Xmm& x, const Operand& op) { opCvt5(x, op, T_N16 | T_N_VL | T_66 | T_MAP5 | T_EW1 | T_ER_Z | T_MUST_EVEX | T_B64, 0x5A); }
|
||||
void vcvtpd2qq(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66 | T_0F | T_EW1 | T_YMM | T_ER_Z | T_MUST_EVEX | T_B64, 0x7B); }
|
||||
void vcvtpd2udq(const Xmm& x, const Operand& op) { opCvt2(x, op, T_0F | T_YMM | T_MUST_EVEX | T_EW1 | T_B64 | T_ER_Z, 0x79); }
|
||||
void vcvtpd2udq(const Xmm& x, const Operand& op) { opCvt2(x, op, T_0F | T_EW1 | T_YMM | T_ER_Z | T_MUST_EVEX | T_B64, 0x79); }
|
||||
void vcvtpd2uqq(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66 | T_0F | T_EW1 | T_YMM | T_ER_Z | T_MUST_EVEX | T_B64, 0x79); }
|
||||
void vcvtps2qq(const Xmm& x, const Operand& op) { checkCvt1(x, op); opVex(x, 0, op, T_66 | T_0F | T_YMM | T_MUST_EVEX | T_EW0 | T_B32 | T_N8 | T_N_VL | T_ER_Y, 0x7B); }
|
||||
void vcvtph2dq(const Xmm& x, const Operand& op) { checkCvt1(x, op); opVex(x, 0, op, T_N8 | T_N_VL | T_66 | T_MAP5 | T_EW0 | T_YMM | T_ER_Y | T_MUST_EVEX | T_B16, 0x5B); }
|
||||
void vcvtph2pd(const Xmm& x, const Operand& op) { if (!op.isXMM() && !op.isMEM()) XBYAK_THROW(ERR_BAD_MEM_SIZE) opVex(x, 0, op, T_N4 | T_N_VL | T_MAP5 | T_EW0 | T_YMM | T_SAE_X | T_MUST_EVEX | T_B16, 0x5A); }
|
||||
void vcvtph2psx(const Xmm& x, const Operand& op) { checkCvt1(x, op); opVex(x, 0, op, T_N8 | T_N_VL | T_66 | T_MAP6 | T_EW0 | T_YMM | T_SAE_Y | T_MUST_EVEX | T_B16, 0x13); }
|
||||
void vcvtph2qq(const Xmm& x, const Operand& op) { if (!op.isXMM() && !op.isMEM()) XBYAK_THROW(ERR_BAD_MEM_SIZE) opVex(x, 0, op, T_N4 | T_N_VL | T_66 | T_MAP5 | T_EW0 | T_YMM | T_ER_X | T_MUST_EVEX | T_B16, 0x7B); }
|
||||
void vcvtph2udq(const Xmm& x, const Operand& op) { checkCvt1(x, op); opVex(x, 0, op, T_N8 | T_N_VL | T_MAP5 | T_EW0 | T_YMM | T_ER_Y | T_MUST_EVEX | T_B16, 0x79); }
|
||||
void vcvtph2uqq(const Xmm& x, const Operand& op) { if (!op.isXMM() && !op.isMEM()) XBYAK_THROW(ERR_BAD_MEM_SIZE) opVex(x, 0, op, T_N4 | T_N_VL | T_66 | T_MAP5 | T_EW0 | T_YMM | T_ER_X | T_MUST_EVEX | T_B16, 0x79); }
|
||||
void vcvtph2uw(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_MAP5 | T_EW0 | T_YMM | T_ER_Z | T_MUST_EVEX | T_B16, 0x7D); }
|
||||
void vcvtph2w(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66 | T_MAP5 | T_EW0 | T_YMM | T_ER_Z | T_MUST_EVEX | T_B16, 0x7D); }
|
||||
void vcvtps2phx(const Xmm& x, const Operand& op) { checkCvt4(x, op); opCvt(x, op, T_N16 | T_N_VL | T_66 | T_MAP5 | T_EW0 | T_ER_Z | T_MUST_EVEX | T_B32, 0x1D); }
|
||||
void vcvtps2qq(const Xmm& x, const Operand& op) { checkCvt1(x, op); opVex(x, 0, op, T_N8 | T_N_VL | T_66 | T_0F | T_EW0 | T_YMM | T_ER_Y | T_MUST_EVEX | T_B32, 0x7B); }
|
||||
void vcvtps2udq(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_0F | T_EW0 | T_YMM | T_ER_Z | T_MUST_EVEX | T_B32, 0x79); }
|
||||
void vcvtps2uqq(const Xmm& x, const Operand& op) { checkCvt1(x, op); opVex(x, 0, op, T_66 | T_0F | T_YMM | T_MUST_EVEX | T_EW0 | T_B32 | T_N8 | T_N_VL | T_ER_Y, 0x79); }
|
||||
void vcvtps2uqq(const Xmm& x, const Operand& op) { checkCvt1(x, op); opVex(x, 0, op, T_N8 | T_N_VL | T_66 | T_0F | T_EW0 | T_YMM | T_ER_Y | T_MUST_EVEX | T_B32, 0x79); }
|
||||
void vcvtqq2pd(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_F3 | T_0F | T_EW1 | T_YMM | T_ER_Z | T_MUST_EVEX | T_B64, 0xE6); }
|
||||
void vcvtqq2ps(const Xmm& x, const Operand& op) { opCvt2(x, op, T_0F | T_YMM | T_MUST_EVEX | T_EW1 | T_B64 | T_ER_Z, 0x5B); }
|
||||
void vcvtsd2usi(const Reg32e& r, const Operand& op) { int type = (T_F2 | T_0F | T_MUST_EVEX | T_N8 | T_ER_X) | (r.isREG(64) ? T_EW1 : T_EW0); opAVX_X_X_XM(Xmm(r.getIdx()), xm0, op, type, 0x79); }
|
||||
void vcvtss2usi(const Reg32e& r, const Operand& op) { int type = (T_F3 | T_0F | T_MUST_EVEX | T_N4 | T_ER_X) | (r.isREG(64) ? T_EW1 : T_EW0); opAVX_X_X_XM(Xmm(r.getIdx()), xm0, op, type, 0x79); }
|
||||
void vcvtqq2ph(const Xmm& x, const Operand& op) { opCvt5(x, op, T_N16 | T_N_VL | T_MAP5 | T_EW1 | T_ER_Z | T_MUST_EVEX | T_B64, 0x5B); }
|
||||
void vcvtqq2ps(const Xmm& x, const Operand& op) { opCvt2(x, op, T_0F | T_EW1 | T_YMM | T_ER_Z | T_MUST_EVEX | T_B64, 0x5B); }
|
||||
void vcvtsd2sh(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N8 | T_F2 | T_MAP5 | T_EW1 | T_ER_X | T_MUST_EVEX, 0x5A); }
|
||||
void vcvtsd2usi(const Reg32e& r, const Operand& op) { int type = (T_N8 | T_F2 | T_0F | T_ER_X | T_MUST_EVEX) | (r.isREG(64) ? T_EW1 : T_EW0); opVex(r, &xm0, op, type, 0x79); }
|
||||
void vcvtsh2sd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N2 | T_F3 | T_MAP5 | T_EW0 | T_SAE_X | T_MUST_EVEX, 0x5A); }
|
||||
void vcvtsh2si(const Reg32e& r, const Operand& op) { int type = (T_N2 | T_F3 | T_MAP5 | T_ER_X | T_MUST_EVEX) | (r.isREG(64) ? T_EW1 : T_EW0); opVex(r, &xm0, op, type, 0x2D); }
|
||||
void vcvtsh2ss(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N2 | T_MAP6 | T_EW0 | T_SAE_X | T_MUST_EVEX, 0x13); }
|
||||
void vcvtsh2usi(const Reg32e& r, const Operand& op) { int type = (T_N2 | T_F3 | T_MAP5 | T_ER_X | T_MUST_EVEX) | (r.isREG(64) ? T_EW1 : T_EW0); opVex(r, &xm0, op, type, 0x79); }
|
||||
void vcvtsi2sh(const Xmm& x1, const Xmm& x2, const Operand& op) { if (!(x1.isXMM() && x2.isXMM() && op.isBit(32|64))) XBYAK_THROW(ERR_BAD_COMBINATION) int type = (T_F3 | T_MAP5 | T_ER_R | T_MUST_EVEX | T_M_K) | (op.isBit(32) ? (T_EW0 | T_N4) : (T_EW1 | T_N8)); opVex(x1, &x2, op, type, 0x2A); }
|
||||
void vcvtss2sh(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N4 | T_MAP5 | T_EW0 | T_ER_X | T_MUST_EVEX, 0x1D); }
|
||||
void vcvtss2usi(const Reg32e& r, const Operand& op) { int type = (T_N4 | T_F3 | T_0F | T_ER_X | T_MUST_EVEX) | (r.isREG(64) ? T_EW1 : T_EW0); opVex(r, &xm0, op, type, 0x79); }
|
||||
void vcvttpd2qq(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66 | T_0F | T_EW1 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B64, 0x7A); }
|
||||
void vcvttpd2udq(const Xmm& x, const Operand& op) { opCvt2(x, op, T_0F | T_YMM | T_MUST_EVEX | T_EW1 | T_B64 | T_SAE_Z, 0x78); }
|
||||
void vcvttpd2udq(const Xmm& x, const Operand& op) { opCvt2(x, op, T_0F | T_EW1 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B64, 0x78); }
|
||||
void vcvttpd2uqq(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66 | T_0F | T_EW1 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B64, 0x78); }
|
||||
void vcvttps2qq(const Xmm& x, const Operand& op) { checkCvt1(x, op); opVex(x, 0, op, T_66 | T_0F | T_YMM | T_MUST_EVEX | T_EW0 | T_B32 | T_N8 | T_N_VL | T_SAE_Y, 0x7A); }
|
||||
void vcvttph2dq(const Xmm& x, const Operand& op) { checkCvt1(x, op); opVex(x, 0, op, T_N8 | T_N_VL | T_F3 | T_MAP5 | T_EW0 | T_YMM | T_SAE_Y | T_MUST_EVEX | T_B16, 0x5B); }
|
||||
void vcvttph2qq(const Xmm& x, const Operand& op) { if (!op.isXMM() && !op.isMEM()) XBYAK_THROW(ERR_BAD_MEM_SIZE) opVex(x, 0, op, T_N4 | T_N_VL | T_66 | T_MAP5 | T_EW0 | T_YMM | T_SAE_X | T_MUST_EVEX | T_B16, 0x7A); }
|
||||
void vcvttph2udq(const Xmm& x, const Operand& op) { checkCvt1(x, op); opVex(x, 0, op, T_N8 | T_N_VL | T_MAP5 | T_EW0 | T_YMM | T_SAE_Y | T_MUST_EVEX | T_B16, 0x78); }
|
||||
void vcvttph2uqq(const Xmm& x, const Operand& op) { if (!op.isXMM() && !op.isMEM()) XBYAK_THROW(ERR_BAD_MEM_SIZE) opVex(x, 0, op, T_N4 | T_N_VL | T_66 | T_MAP5 | T_EW0 | T_YMM | T_SAE_X | T_MUST_EVEX | T_B16, 0x78); }
|
||||
void vcvttph2uw(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_MAP5 | T_EW0 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B16, 0x7C); }
|
||||
void vcvttph2w(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66 | T_MAP5 | T_EW0 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B16, 0x7C); }
|
||||
void vcvttps2qq(const Xmm& x, const Operand& op) { checkCvt1(x, op); opVex(x, 0, op, T_N8 | T_N_VL | T_66 | T_0F | T_EW0 | T_YMM | T_SAE_Y | T_MUST_EVEX | T_B32, 0x7A); }
|
||||
void vcvttps2udq(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_0F | T_EW0 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B32, 0x78); }
|
||||
void vcvttps2uqq(const Xmm& x, const Operand& op) { checkCvt1(x, op); opVex(x, 0, op, T_66 | T_0F | T_YMM | T_MUST_EVEX | T_EW0 | T_B32 | T_N8 | T_N_VL | T_SAE_Y, 0x78); }
|
||||
void vcvttsd2usi(const Reg32e& r, const Operand& op) { int type = (T_F2 | T_0F | T_MUST_EVEX | T_N8 | T_SAE_X) | (r.isREG(64) ? T_EW1 : T_EW0); opAVX_X_X_XM(Xmm(r.getIdx()), xm0, op, type, 0x78); }
|
||||
void vcvttss2usi(const Reg32e& r, const Operand& op) { int type = (T_F3 | T_0F | T_MUST_EVEX | T_N4 | T_SAE_X) | (r.isREG(64) ? T_EW1 : T_EW0); opAVX_X_X_XM(Xmm(r.getIdx()), xm0, op, type, 0x78); }
|
||||
void vcvtudq2pd(const Xmm& x, const Operand& op) { checkCvt1(x, op); opVex(x, 0, op, T_F3 | T_0F | T_YMM | T_MUST_EVEX | T_EW0 | T_B32 | T_N8 | T_N_VL, 0x7A); }
|
||||
void vcvttps2uqq(const Xmm& x, const Operand& op) { checkCvt1(x, op); opVex(x, 0, op, T_N8 | T_N_VL | T_66 | T_0F | T_EW0 | T_YMM | T_SAE_Y | T_MUST_EVEX | T_B32, 0x78); }
|
||||
void vcvttsd2usi(const Reg32e& r, const Operand& op) { int type = (T_N8 | T_F2 | T_0F | T_SAE_X | T_MUST_EVEX) | (r.isREG(64) ? T_EW1 : T_EW0); opVex(r, &xm0, op, type, 0x78); }
|
||||
void vcvttsh2si(const Reg32e& r, const Operand& op) { int type = (T_N2 | T_F3 | T_MAP5 | T_EW0 | T_SAE_X | T_MUST_EVEX) | (r.isREG(64) ? T_EW1 : T_EW0); opVex(r, &xm0, op, type, 0x2C); }
|
||||
void vcvttsh2usi(const Reg32e& r, const Operand& op) { int type = (T_N2 | T_F3 | T_MAP5 | T_EW0 | T_SAE_X | T_MUST_EVEX) | (r.isREG(64) ? T_EW1 : T_EW0); opVex(r, &xm0, op, type, 0x78); }
|
||||
void vcvttss2usi(const Reg32e& r, const Operand& op) { int type = (T_N4 | T_F3 | T_0F | T_SAE_X | T_MUST_EVEX) | (r.isREG(64) ? T_EW1 : T_EW0); opVex(r, &xm0, op, type, 0x78); }
|
||||
void vcvtudq2pd(const Xmm& x, const Operand& op) { checkCvt1(x, op); opVex(x, 0, op, T_N8 | T_N_VL | T_F3 | T_0F | T_EW0 | T_YMM | T_MUST_EVEX | T_B32, 0x7A); }
|
||||
void vcvtudq2ph(const Xmm& x, const Operand& op) { checkCvt4(x, op); opCvt(x, op, T_N16 | T_N_VL | T_F2 | T_MAP5 | T_EW0 | T_ER_Z | T_MUST_EVEX | T_B32, 0x7A); }
|
||||
void vcvtudq2ps(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_F2 | T_0F | T_EW0 | T_YMM | T_ER_Z | T_MUST_EVEX | T_B32, 0x7A); }
|
||||
void vcvtuqq2pd(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_F3 | T_0F | T_EW1 | T_YMM | T_ER_Z | T_MUST_EVEX | T_B64, 0x7A); }
|
||||
void vcvtuqq2ps(const Xmm& x, const Operand& op) { opCvt2(x, op, T_F2 | T_0F | T_YMM | T_MUST_EVEX | T_EW1 | T_B64 | T_ER_Z, 0x7A); }
|
||||
void vcvtuqq2ph(const Xmm& x, const Operand& op) { opCvt5(x, op, T_N16 | T_N_VL | T_F2 | T_MAP5 | T_EW1 | T_ER_Z | T_MUST_EVEX | T_B64, 0x7A); }
|
||||
void vcvtuqq2ps(const Xmm& x, const Operand& op) { opCvt2(x, op, T_F2 | T_0F | T_EW1 | T_YMM | T_ER_Z | T_MUST_EVEX | T_B64, 0x7A); }
|
||||
void vcvtusi2sd(const Xmm& x1, const Xmm& x2, const Operand& op) { opCvt3(x1, x2, op, T_F2 | T_0F | T_MUST_EVEX, T_W1 | T_EW1 | T_ER_X | T_N8, T_W0 | T_EW0 | T_N4, 0x7B); }
|
||||
void vcvtusi2sh(const Xmm& x1, const Xmm& x2, const Operand& op) { if (!(x1.isXMM() && x2.isXMM() && op.isBit(32|64))) XBYAK_THROW(ERR_BAD_COMBINATION) int type = (T_F3 | T_MAP5 | T_ER_R | T_MUST_EVEX | T_M_K) | (op.isBit(32) ? (T_EW0 | T_N4) : (T_EW1 | T_N8)); opVex(x1, &x2, op, type, 0x7B); }
|
||||
void vcvtusi2ss(const Xmm& x1, const Xmm& x2, const Operand& op) { opCvt3(x1, x2, op, T_F3 | T_0F | T_MUST_EVEX | T_ER_X, T_W1 | T_EW1 | T_N8, T_W0 | T_EW0 | T_N4, 0x7B); }
|
||||
void vcvtuw2ph(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_F2 | T_MAP5 | T_EW0 | T_YMM | T_ER_Z | T_MUST_EVEX | T_B16, 0x7D); }
|
||||
void vcvtw2ph(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_F3 | T_MAP5 | T_EW0 | T_YMM | T_ER_Z | T_MUST_EVEX | T_B16, 0x7D); }
|
||||
void vdbpsadbw(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_EW0 | T_YMM | T_MUST_EVEX, 0x42, imm); }
|
||||
void vdivph(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_MAP5 | T_EW0 | T_YMM | T_MUST_EVEX | T_ER_Z | T_B16, 0x5E); }
|
||||
void vdivsh(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_MAP5 | T_F3 | T_EW0 | T_MUST_EVEX | T_ER_X | T_N2, 0x5E); }
|
||||
void vdpbf16ps(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_F3 | T_0F38 | T_EW0 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B32, 0x52); }
|
||||
void vexp2pd(const Zmm& z, const Operand& op) { opAVX_X_XM_IMM(z, op, T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW1 | T_B64 | T_SAE_Z, 0xC8); }
|
||||
void vexp2ps(const Zmm& z, const Operand& op) { opAVX_X_XM_IMM(z, op, T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_B32 | T_SAE_Z, 0xC8); }
|
||||
|
@ -1801,13 +1979,49 @@ void vextracti32x4(const Operand& op, const Ymm& r, uint8_t imm) { if (!op.is(Op
|
|||
void vextracti32x8(const Operand& op, const Zmm& r, uint8_t imm) { if (!op.is(Operand::MEM | Operand::YMM)) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(r, 0, op, T_N32 | T_66 | T_0F3A | T_EW0 | T_YMM | T_MUST_EVEX, 0x3B, imm); }
|
||||
void vextracti64x2(const Operand& op, const Ymm& r, uint8_t imm) { if (!op.is(Operand::MEM | Operand::XMM)) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(r, 0, op, T_N16 | T_66 | T_0F3A | T_EW1 | T_YMM | T_MUST_EVEX, 0x39, imm); }
|
||||
void vextracti64x4(const Operand& op, const Zmm& r, uint8_t imm) { if (!op.is(Operand::MEM | Operand::YMM)) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(r, 0, op, T_N32 | T_66 | T_0F3A | T_EW1 | T_YMM | T_MUST_EVEX, 0x3B, imm); }
|
||||
void vfcmaddcph(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_F2 | T_MAP6 | T_EW0 | T_YMM | T_ER_Z | T_MUST_EVEX | T_B32, 0x56); }
|
||||
void vfcmulcph(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_F2 | T_MAP6 | T_EW0 | T_YMM | T_ER_Z | T_MUST_EVEX | T_B32, 0xD6); }
|
||||
void vfixupimmpd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_EW1 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B64, 0x54, imm); }
|
||||
void vfixupimmps(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F3A | T_EW0 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B32, 0x54, imm); }
|
||||
void vfixupimmsd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_N8 | T_66 | T_0F3A | T_EW1 | T_SAE_Z | T_MUST_EVEX, 0x55, imm); }
|
||||
void vfixupimmss(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_N4 | T_66 | T_0F3A | T_EW0 | T_SAE_Z | T_MUST_EVEX, 0x55, imm); }
|
||||
void vfmadd132ph(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_MAP6 | T_EW0 | T_YMM | T_ER_Z | T_MUST_EVEX | T_B16, 0x98); }
|
||||
void vfmadd132sh(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N2 | T_66 | T_MAP6 | T_EW0 | T_ER_X | T_MUST_EVEX, 0x99); }
|
||||
void vfmadd213ph(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_MAP6 | T_EW0 | T_YMM | T_ER_Z | T_MUST_EVEX | T_B16, 0xA8); }
|
||||
void vfmadd213sh(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N2 | T_66 | T_MAP6 | T_EW0 | T_ER_X | T_MUST_EVEX, 0xA9); }
|
||||
void vfmadd231ph(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_MAP6 | T_EW0 | T_YMM | T_ER_Z | T_MUST_EVEX | T_B16, 0xB8); }
|
||||
void vfmadd231sh(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N2 | T_66 | T_MAP6 | T_EW0 | T_ER_X | T_MUST_EVEX, 0xB9); }
|
||||
void vfmaddcph(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_F3 | T_MAP6 | T_EW0 | T_YMM | T_ER_Z | T_MUST_EVEX | T_B32, 0x56); }
|
||||
void vfmaddsub132ph(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_MAP6 | T_EW0 | T_YMM | T_ER_Z | T_MUST_EVEX | T_B16, 0x96); }
|
||||
void vfmaddsub213ph(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_MAP6 | T_EW0 | T_YMM | T_ER_Z | T_MUST_EVEX | T_B16, 0xA6); }
|
||||
void vfmaddsub231ph(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_MAP6 | T_EW0 | T_YMM | T_ER_Z | T_MUST_EVEX | T_B16, 0xB6); }
|
||||
void vfmsub132ph(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_MAP6 | T_EW0 | T_YMM | T_ER_Z | T_MUST_EVEX | T_B16, 0x9A); }
|
||||
void vfmsub132sh(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N2 | T_66 | T_MAP6 | T_EW0 | T_ER_X | T_MUST_EVEX, 0x9B); }
|
||||
void vfmsub213ph(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_MAP6 | T_EW0 | T_YMM | T_ER_Z | T_MUST_EVEX | T_B16, 0xAA); }
|
||||
void vfmsub213sh(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N2 | T_66 | T_MAP6 | T_EW0 | T_ER_X | T_MUST_EVEX, 0xAB); }
|
||||
void vfmsub231ph(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_MAP6 | T_EW0 | T_YMM | T_ER_Z | T_MUST_EVEX | T_B16, 0xBA); }
|
||||
void vfmsub231sh(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N2 | T_66 | T_MAP6 | T_EW0 | T_ER_X | T_MUST_EVEX, 0xBB); }
|
||||
void vfmsubadd132ph(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_MAP6 | T_EW0 | T_YMM | T_ER_Z | T_MUST_EVEX | T_B16, 0x97); }
|
||||
void vfmsubadd213ph(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_MAP6 | T_EW0 | T_YMM | T_ER_Z | T_MUST_EVEX | T_B16, 0xA7); }
|
||||
void vfmsubadd231ph(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_MAP6 | T_EW0 | T_YMM | T_ER_Z | T_MUST_EVEX | T_B16, 0xB7); }
|
||||
void vfmulcph(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_F3 | T_MAP6 | T_EW0 | T_YMM | T_ER_Z | T_MUST_EVEX | T_B32, 0xD6); }
|
||||
void vfnmadd132ph(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_MAP6 | T_EW0 | T_YMM | T_ER_Z | T_MUST_EVEX | T_B16, 0x9C); }
|
||||
void vfnmadd132sh(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N2 | T_66 | T_MAP6 | T_EW0 | T_ER_X | T_MUST_EVEX, 0x9D); }
|
||||
void vfnmadd213ph(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_MAP6 | T_EW0 | T_YMM | T_ER_Z | T_MUST_EVEX | T_B16, 0xAC); }
|
||||
void vfnmadd213sh(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N2 | T_66 | T_MAP6 | T_EW0 | T_ER_X | T_MUST_EVEX, 0xAD); }
|
||||
void vfnmadd231ph(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_MAP6 | T_EW0 | T_YMM | T_ER_Z | T_MUST_EVEX | T_B16, 0xBC); }
|
||||
void vfnmadd231sh(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N2 | T_66 | T_MAP6 | T_EW0 | T_ER_X | T_MUST_EVEX, 0xBD); }
|
||||
void vfnmsub132ph(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_MAP6 | T_EW0 | T_YMM | T_ER_Z | T_MUST_EVEX | T_B16, 0x9E); }
|
||||
void vfnmsub132sh(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N2 | T_66 | T_MAP6 | T_EW0 | T_ER_X | T_MUST_EVEX, 0x9F); }
|
||||
void vfnmsub213ph(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_MAP6 | T_EW0 | T_YMM | T_ER_Z | T_MUST_EVEX | T_B16, 0xAE); }
|
||||
void vfnmsub213sh(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N2 | T_66 | T_MAP6 | T_EW0 | T_ER_X | T_MUST_EVEX, 0xAF); }
|
||||
void vfnmsub231ph(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_MAP6 | T_EW0 | T_YMM | T_ER_Z | T_MUST_EVEX | T_B16, 0xBE); }
|
||||
void vfnmsub231sh(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N2 | T_66 | T_MAP6 | T_EW0 | T_ER_X | T_MUST_EVEX, 0xBF); }
|
||||
void vfpclasspd(const Opmask& k, const Operand& op, uint8_t imm) { if (!op.isBit(128|256|512)) XBYAK_THROW(ERR_BAD_MEM_SIZE) opVex(k.changeBit(op.getBit()), 0, op, T_66 | T_0F3A | T_MUST_EVEX | T_YMM | T_EW1 | T_B64, 0x66, imm); }
|
||||
void vfpclassph(const Opmask& k, const Operand& op, uint8_t imm) { if (!op.isBit(128|256|512)) XBYAK_THROW(ERR_BAD_MEM_SIZE) opVex(k.changeBit(op.getBit()), 0, op, T_0F3A | T_MUST_EVEX | T_YMM | T_EW0 | T_B16, 0x66, imm); }
|
||||
void vfpclassps(const Opmask& k, const Operand& op, uint8_t imm) { if (!op.isBit(128|256|512)) XBYAK_THROW(ERR_BAD_MEM_SIZE) opVex(k.changeBit(op.getBit()), 0, op, T_66 | T_0F3A | T_MUST_EVEX | T_YMM | T_EW0 | T_B32, 0x66, imm); }
|
||||
void vfpclasssd(const Opmask& k, const Operand& op, uint8_t imm) { if (!op.isXMEM()) XBYAK_THROW(ERR_BAD_MEM_SIZE) opVex(k, 0, op, T_66 | T_0F3A | T_MUST_EVEX | T_EW1 | T_N8, 0x67, imm); }
|
||||
void vfpclasssh(const Opmask& k, const Operand& op, uint8_t imm) { if (!op.isXMEM()) XBYAK_THROW(ERR_BAD_MEM_SIZE) opVex(k, 0, op, T_0F3A | T_MUST_EVEX | T_EW0 | T_N2, 0x67, imm); }
|
||||
void vfpclassss(const Opmask& k, const Operand& op, uint8_t imm) { if (!op.isXMEM()) XBYAK_THROW(ERR_BAD_MEM_SIZE) opVex(k, 0, op, T_66 | T_0F3A | T_MUST_EVEX | T_EW0 | T_N4, 0x67, imm); }
|
||||
void vgatherdpd(const Xmm& x, const Address& addr) { opGather2(x, addr, T_N8 | T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_VSIB, 0x92, 1); }
|
||||
void vgatherdps(const Xmm& x, const Address& addr) { opGather2(x, addr, T_N4 | T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_VSIB, 0x92, 0); }
|
||||
|
@ -1822,12 +2036,16 @@ void vgatherpf1qps(const Address& addr) { opGatherFetch(addr, zm2, T_N4 | T_66 |
|
|||
void vgatherqpd(const Xmm& x, const Address& addr) { opGather2(x, addr, T_N8 | T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_VSIB, 0x93, 0); }
|
||||
void vgatherqps(const Xmm& x, const Address& addr) { opGather2(x, addr, T_N4 | T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_VSIB, 0x93, 2); }
|
||||
void vgetexppd(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B64, 0x42); }
|
||||
void vgetexpph(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66 | T_MAP6 | T_EW0 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B16, 0x42); }
|
||||
void vgetexpps(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B32, 0x42); }
|
||||
void vgetexpsd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N8 | T_66 | T_0F38 | T_EW1 | T_SAE_X | T_MUST_EVEX, 0x43); }
|
||||
void vgetexpsh(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N2 | T_66 | T_MAP6 | T_EW0 | T_SAE_X | T_MUST_EVEX, 0x43); }
|
||||
void vgetexpss(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N4 | T_66 | T_0F38 | T_EW0 | T_SAE_X | T_MUST_EVEX, 0x43); }
|
||||
void vgetmantpd(const Xmm& x, const Operand& op, uint8_t imm) { opAVX_X_XM_IMM(x, op, T_66 | T_0F3A | T_EW1 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B64, 0x26, imm); }
|
||||
void vgetmantph(const Xmm& x, const Operand& op, uint8_t imm) { opAVX_X_XM_IMM(x, op, T_0F3A | T_EW0 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B16, 0x26, imm); }
|
||||
void vgetmantps(const Xmm& x, const Operand& op, uint8_t imm) { opAVX_X_XM_IMM(x, op, T_66 | T_0F3A | T_EW0 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B32, 0x26, imm); }
|
||||
void vgetmantsd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_N8 | T_66 | T_0F3A | T_EW1 | T_SAE_X | T_MUST_EVEX, 0x27, imm); }
|
||||
void vgetmantsh(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_N2 | T_0F3A | T_EW0 | T_SAE_X | T_MUST_EVEX, 0x27, imm); }
|
||||
void vgetmantss(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_N4 | T_66 | T_0F3A | T_EW0 | T_SAE_X | T_MUST_EVEX, 0x27, imm); }
|
||||
void vinsertf32x4(const Ymm& r1, const Ymm& r2, const Operand& op, uint8_t imm) {if (!(r1.getKind() == r2.getKind() && op.is(Operand::MEM | Operand::XMM))) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(r1, &r2, op, T_N16 | T_66 | T_0F3A | T_EW0 | T_YMM | T_MUST_EVEX, 0x18, imm); }
|
||||
void vinsertf32x8(const Zmm& r1, const Zmm& r2, const Operand& op, uint8_t imm) {if (!op.is(Operand::MEM | Operand::YMM)) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(r1, &r2, op, T_N32 | T_66 | T_0F3A | T_EW0 | T_YMM | T_MUST_EVEX, 0x1A, imm); }
|
||||
|
@ -1837,6 +2055,10 @@ void vinserti32x4(const Ymm& r1, const Ymm& r2, const Operand& op, uint8_t imm)
|
|||
void vinserti32x8(const Zmm& r1, const Zmm& r2, const Operand& op, uint8_t imm) {if (!op.is(Operand::MEM | Operand::YMM)) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(r1, &r2, op, T_N32 | T_66 | T_0F3A | T_EW0 | T_YMM | T_MUST_EVEX, 0x3A, imm); }
|
||||
void vinserti64x2(const Ymm& r1, const Ymm& r2, const Operand& op, uint8_t imm) {if (!(r1.getKind() == r2.getKind() && op.is(Operand::MEM | Operand::XMM))) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(r1, &r2, op, T_N16 | T_66 | T_0F3A | T_EW1 | T_YMM | T_MUST_EVEX, 0x38, imm); }
|
||||
void vinserti64x4(const Zmm& r1, const Zmm& r2, const Operand& op, uint8_t imm) {if (!op.is(Operand::MEM | Operand::YMM)) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(r1, &r2, op, T_N32 | T_66 | T_0F3A | T_EW1 | T_YMM | T_MUST_EVEX, 0x3A, imm); }
|
||||
void vmaxph(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_MAP5 | T_EW0 | T_YMM | T_MUST_EVEX | T_ER_Z | T_B16, 0x5F); }
|
||||
void vmaxsh(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_MAP5 | T_F3 | T_EW0 | T_MUST_EVEX | T_ER_X | T_N2, 0x5F); }
|
||||
void vminph(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_MAP5 | T_EW0 | T_YMM | T_MUST_EVEX | T_ER_Z | T_B16, 0x5D); }
|
||||
void vminsh(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_MAP5 | T_F3 | T_EW0 | T_MUST_EVEX | T_ER_X | T_N2, 0x5D); }
|
||||
void vmovdqa32(const Address& addr, const Xmm& x) { opAVX_X_XM_IMM(x, addr, T_66 | T_0F | T_EW0 | T_YMM | T_ER_X | T_ER_Y | T_ER_Z | T_MUST_EVEX | T_M_K, 0x7F); }
|
||||
void vmovdqa32(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66 | T_0F | T_EW0 | T_YMM | T_ER_X | T_ER_Y | T_ER_Z | T_MUST_EVEX, 0x6F); }
|
||||
void vmovdqa64(const Address& addr, const Xmm& x) { opAVX_X_XM_IMM(x, addr, T_66 | T_0F | T_EW1 | T_YMM | T_ER_X | T_ER_Y | T_ER_Z | T_MUST_EVEX | T_M_K, 0x7F); }
|
||||
|
@ -1849,6 +2071,14 @@ void vmovdqu64(const Address& addr, const Xmm& x) { opAVX_X_XM_IMM(x, addr, T_F3
|
|||
void vmovdqu64(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_F3 | T_0F | T_EW1 | T_YMM | T_ER_X | T_ER_Y | T_ER_Z | T_MUST_EVEX, 0x6F); }
|
||||
void vmovdqu8(const Address& addr, const Xmm& x) { opAVX_X_XM_IMM(x, addr, T_F2 | T_0F | T_EW0 | T_YMM | T_ER_X | T_ER_Y | T_ER_Z | T_MUST_EVEX | T_M_K, 0x7F); }
|
||||
void vmovdqu8(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_F2 | T_0F | T_EW0 | T_YMM | T_ER_X | T_ER_Y | T_ER_Z | T_MUST_EVEX, 0x6F); }
|
||||
void vmovsh(const Address& addr, const Xmm& x) { opAVX_X_XM_IMM(x, addr, T_N2 | T_F3 | T_MAP5 | T_EW0 | T_MUST_EVEX | T_M_K, 0x11); }
|
||||
void vmovsh(const Xmm& x, const Address& addr) { opAVX_X_X_XM(x, xm0, addr, T_N2 | T_F3 | T_MAP5 | T_EW0 | T_MUST_EVEX, 0x10); }
|
||||
void vmovsh(const Xmm& x1, const Xmm& x2, const Xmm& x3) { opAVX_X_X_XM(x1, x2, x3, T_N2 | T_F3 | T_MAP5 | T_EW0 | T_MUST_EVEX, 0x10); }
|
||||
void vmovw(const Address& addr, const Xmm& x) { opAVX_X_XM_IMM(x, addr, T_N2 | T_66 | T_MAP5 | T_MUST_EVEX, 0x7E); }
|
||||
void vmovw(const Reg32e& r, const Xmm& x) { opAVX_X_X_XM(x, xm0, r, T_N2 | T_66 | T_MAP5 | T_MUST_EVEX, 0x7E); }
|
||||
void vmovw(const Xmm& x, const Operand& op) { if (!op.isREG(32|64) && !op.isMEM()) XBYAK_THROW(ERR_BAD_COMBINATION) opAVX_X_X_XM(x, xm0, op, T_N2 | T_66 | T_MAP5 | T_MUST_EVEX, 0x6E); }
|
||||
void vmulph(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_MAP5 | T_EW0 | T_YMM | T_MUST_EVEX | T_ER_Z | T_B16, 0x59); }
|
||||
void vmulsh(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_MAP5 | T_F3 | T_EW0 | T_MUST_EVEX | T_ER_X | T_N2, 0x59); }
|
||||
void vp2intersectd(const Opmask& k, const Xmm& x, const Operand& op) { if (k.getOpmaskIdx() != 0) XBYAK_THROW(ERR_OPMASK_IS_ALREADY_SET) opAVX_K_X_XM(k, x, op, T_F2 | T_0F38 | T_YMM | T_EVEX | T_EW0 | T_B32, 0x68); }
|
||||
void vp2intersectq(const Opmask& k, const Xmm& x, const Operand& op) { if (k.getOpmaskIdx() != 0) XBYAK_THROW(ERR_OPMASK_IS_ALREADY_SET) opAVX_K_X_XM(k, x, op, T_F2 | T_0F38 | T_YMM | T_EVEX | T_EW1 | T_B64, 0x68); }
|
||||
void vp4dpwssd(const Zmm& z1, const Zmm& z2, const Address& addr) { opAVX_X_X_XM(z1, z2, addr, T_0F38 | T_F2 | T_EW0 | T_YMM | T_MUST_EVEX | T_N16, 0x52); }
|
||||
|
@ -2006,14 +2236,20 @@ void vrcp28pd(const Zmm& z, const Operand& op) { opAVX_X_XM_IMM(z, op, T_66 | T_
|
|||
void vrcp28ps(const Zmm& z, const Operand& op) { opAVX_X_XM_IMM(z, op, T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_B32 | T_SAE_Z, 0xCA); }
|
||||
void vrcp28sd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N8 | T_66 | T_0F38 | T_EW1 | T_SAE_X | T_MUST_EVEX, 0xCB); }
|
||||
void vrcp28ss(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N4 | T_66 | T_0F38 | T_EW0 | T_SAE_X | T_MUST_EVEX, 0xCB); }
|
||||
void vrcpph(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66 | T_MAP6 | T_EW0 | T_YMM | T_MUST_EVEX | T_B16, 0x4C); }
|
||||
void vrcpsh(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N2 | T_66 | T_MAP6 | T_EW0 | T_MUST_EVEX, 0x4D); }
|
||||
void vreducepd(const Xmm& x, const Operand& op, uint8_t imm) { opAVX_X_XM_IMM(x, op, T_66 | T_0F3A | T_EW1 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B64, 0x56, imm); }
|
||||
void vreduceph(const Xmm& x, const Operand& op, uint8_t imm) { opAVX_X_XM_IMM(x, op, T_0F3A | T_EW0 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B16, 0x56, imm); }
|
||||
void vreduceps(const Xmm& x, const Operand& op, uint8_t imm) { opAVX_X_XM_IMM(x, op, T_66 | T_0F3A | T_EW0 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B32, 0x56, imm); }
|
||||
void vreducesd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_N8 | T_66 | T_0F3A | T_EW1 | T_SAE_X | T_MUST_EVEX, 0x57, imm); }
|
||||
void vreducesh(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_N2 | T_0F3A | T_EW0 | T_SAE_X | T_MUST_EVEX, 0x57, imm); }
|
||||
void vreducess(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_N4 | T_66 | T_0F3A | T_EW0 | T_SAE_X | T_MUST_EVEX, 0x57, imm); }
|
||||
void vrndscalepd(const Xmm& x, const Operand& op, uint8_t imm) { opAVX_X_XM_IMM(x, op, T_66 | T_0F3A | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0x09, imm); }
|
||||
void vrndscaleps(const Xmm& x, const Operand& op, uint8_t imm) { opAVX_X_XM_IMM(x, op, T_66 | T_0F3A | T_EW0 | T_YMM | T_MUST_EVEX | T_B32, 0x08, imm); }
|
||||
void vrndscalesd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_N8 | T_66 | T_0F3A | T_EW1 | T_MUST_EVEX, 0x0B, imm); }
|
||||
void vrndscaless(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_N4 | T_66 | T_0F3A | T_EW0 | T_MUST_EVEX, 0x0A, imm); }
|
||||
void vrndscalepd(const Xmm& x, const Operand& op, uint8_t imm) { opAVX_X_XM_IMM(x, op, T_66 | T_0F3A | T_EW1 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B64, 0x09, imm); }
|
||||
void vrndscaleph(const Xmm& x, const Operand& op, uint8_t imm) { opAVX_X_XM_IMM(x, op, T_0F3A | T_EW0 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B16, 0x08, imm); }
|
||||
void vrndscaleps(const Xmm& x, const Operand& op, uint8_t imm) { opAVX_X_XM_IMM(x, op, T_66 | T_0F3A | T_EW0 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B32, 0x08, imm); }
|
||||
void vrndscalesd(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_N8 | T_66 | T_0F3A | T_EW1 | T_SAE_X | T_MUST_EVEX, 0x0B, imm); }
|
||||
void vrndscalesh(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_N2 | T_0F3A | T_EW0 | T_SAE_X | T_MUST_EVEX, 0x0A, imm); }
|
||||
void vrndscaless(const Xmm& x1, const Xmm& x2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(x1, x2, op, T_N4 | T_66 | T_0F3A | T_EW0 | T_SAE_X | T_MUST_EVEX, 0x0A, imm); }
|
||||
void vrsqrt14pd(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0x4E); }
|
||||
void vrsqrt14ps(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_B32, 0x4E); }
|
||||
void vrsqrt14sd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N8 | T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX, 0x4F); }
|
||||
|
@ -2022,9 +2258,13 @@ void vrsqrt28pd(const Zmm& z, const Operand& op) { opAVX_X_XM_IMM(z, op, T_66 |
|
|||
void vrsqrt28ps(const Zmm& z, const Operand& op) { opAVX_X_XM_IMM(z, op, T_66 | T_0F38 | T_MUST_EVEX | T_YMM | T_EW0 | T_B32 | T_SAE_Z, 0xCC); }
|
||||
void vrsqrt28sd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N8 | T_66 | T_0F38 | T_EW1 | T_SAE_X | T_MUST_EVEX, 0xCD); }
|
||||
void vrsqrt28ss(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N4 | T_66 | T_0F38 | T_EW0 | T_SAE_X | T_MUST_EVEX, 0xCD); }
|
||||
void vrsqrtph(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66 | T_MAP6 | T_EW0 | T_YMM | T_MUST_EVEX | T_B16, 0x4E); }
|
||||
void vrsqrtsh(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N2 | T_66 | T_MAP6 | T_EW0 | T_MUST_EVEX, 0x4F); }
|
||||
void vscalefpd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_ER_Z | T_MUST_EVEX | T_B64, 0x2C); }
|
||||
void vscalefph(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_MAP6 | T_EW0 | T_YMM | T_ER_Z | T_MUST_EVEX | T_B16, 0x2C); }
|
||||
void vscalefps(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_ER_Z | T_MUST_EVEX | T_B32, 0x2C); }
|
||||
void vscalefsd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N8 | T_66 | T_0F38 | T_EW1 | T_ER_X | T_MUST_EVEX, 0x2D); }
|
||||
void vscalefsh(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N2 | T_66 | T_MAP6 | T_EW0 | T_ER_X | T_MUST_EVEX, 0x2D); }
|
||||
void vscalefss(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N4 | T_66 | T_0F38 | T_EW0 | T_ER_X | T_MUST_EVEX, 0x2D); }
|
||||
void vscatterdpd(const Address& addr, const Xmm& x) { opGather2(x, addr, T_N8 | T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_M_K | T_VSIB, 0xA2, 1); }
|
||||
void vscatterdps(const Address& addr, const Xmm& x) { opGather2(x, addr, T_N4 | T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_M_K | T_VSIB, 0xA2, 0); }
|
||||
|
@ -2042,6 +2282,11 @@ void vshuff32x4(const Ymm& y1, const Ymm& y2, const Operand& op, uint8_t imm) {
|
|||
void vshuff64x2(const Ymm& y1, const Ymm& y2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(y1, y2, op, T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW1 | T_B64, 0x23, imm); }
|
||||
void vshufi32x4(const Ymm& y1, const Ymm& y2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(y1, y2, op, T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW0 | T_B32, 0x43, imm); }
|
||||
void vshufi64x2(const Ymm& y1, const Ymm& y2, const Operand& op, uint8_t imm) { opAVX_X_X_XM(y1, y2, op, T_66 | T_0F3A | T_YMM | T_MUST_EVEX | T_EW1 | T_B64, 0x43, imm); }
|
||||
void vsqrtph(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_MAP5 | T_EW0 | T_YMM | T_ER_Z | T_MUST_EVEX | T_B16, 0x51); }
|
||||
void vsqrtsh(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_N2 | T_F3 | T_MAP5 | T_EW0 | T_ER_X | T_MUST_EVEX, 0x51); }
|
||||
void vsubph(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_MAP5 | T_EW0 | T_YMM | T_MUST_EVEX | T_ER_Z | T_B16, 0x5C); }
|
||||
void vsubsh(const Xmm& xmm, const Operand& op1, const Operand& op2 = Operand()) { opAVX_X_X_XM(xmm, op1, op2, T_MAP5 | T_F3 | T_EW0 | T_MUST_EVEX | T_ER_X | T_N2, 0x5C); }
|
||||
void vucomish(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_MAP5 | T_MUST_EVEX | T_EW0 | T_SAE_X | T_N2, 0x2E); }
|
||||
#ifdef XBYAK64
|
||||
void kmovq(const Opmask& k, const Reg64& r) { opVex(k, 0, r, T_L0 | T_0F | T_F2 | T_W1, 0x92); }
|
||||
void kmovq(const Reg64& r, const Opmask& k) { opVex(r, 0, k, T_L0 | T_0F | T_F2 | T_W1, 0x93); }
|
||||
|
|
|
@ -17,7 +17,6 @@
|
|||
utility class and functions for Xbyak
|
||||
Xbyak::util::Clock ; rdtsc timer
|
||||
Xbyak::util::Cpu ; detect CPU
|
||||
@note this header is UNDER CONSTRUCTION!
|
||||
*/
|
||||
#include "xbyak.h"
|
||||
#endif // XBYAK_ONLY_CLASS_CPU
|
||||
|
@ -27,8 +26,8 @@
|
|||
#endif
|
||||
|
||||
#ifdef XBYAK_INTEL_CPU_SPECIFIC
|
||||
#ifdef _MSC_VER
|
||||
#if (_MSC_VER < 1400) && defined(XBYAK32)
|
||||
#ifdef _WIN32
|
||||
#if defined(_MSC_VER) && (_MSC_VER < 1400) && defined(XBYAK32)
|
||||
static inline __declspec(naked) void __cpuid(int[4], int)
|
||||
{
|
||||
__asm {
|
||||
|
@ -88,32 +87,64 @@ typedef enum {
|
|||
CoreLevel = 2
|
||||
} IntelCpuTopologyLevel;
|
||||
|
||||
namespace local {
|
||||
|
||||
template<uint64_t L, uint64_t H = 0>
|
||||
struct TypeT {
|
||||
};
|
||||
|
||||
template<uint64_t L1, uint64_t H1, uint64_t L2, uint64_t H2>
|
||||
TypeT<L1 | L2, H1 | H2> operator|(TypeT<L1, H1>, TypeT<L2, H2>) { return TypeT<L1 | L2, H1 | H2>(); }
|
||||
|
||||
} // local
|
||||
|
||||
/**
|
||||
CPU detection class
|
||||
@note static inline const member is supported by c++17 or later, so use template hack
|
||||
*/
|
||||
class Cpu {
|
||||
uint64_t type_;
|
||||
public:
|
||||
class Type {
|
||||
uint64_t L;
|
||||
uint64_t H;
|
||||
public:
|
||||
Type(uint64_t L = 0, uint64_t H = 0) : L(L), H(H) { }
|
||||
template<uint64_t L_, uint64_t H_>
|
||||
Type(local::TypeT<L_, H_>) : L(L_), H(H_) {}
|
||||
Type& operator&=(const Type& rhs) { L &= rhs.L; H &= rhs.H; return *this; }
|
||||
Type& operator|=(const Type& rhs) { L |= rhs.L; H |= rhs.H; return *this; }
|
||||
Type operator&(const Type& rhs) const { Type t = *this; t &= rhs; return t; }
|
||||
Type operator|(const Type& rhs) const { Type t = *this; t |= rhs; return t; }
|
||||
bool operator==(const Type& rhs) const { return H == rhs.H && L == rhs.L; }
|
||||
bool operator!=(const Type& rhs) const { return !operator==(rhs); }
|
||||
// without explicit because backward compatilibity
|
||||
operator bool() const { return (H | L) != 0; }
|
||||
uint64_t getL() const { return L; }
|
||||
uint64_t getH() const { return H; }
|
||||
};
|
||||
private:
|
||||
Type type_;
|
||||
//system topology
|
||||
bool x2APIC_supported_;
|
||||
static const size_t maxTopologyLevels = 2;
|
||||
unsigned int numCores_[maxTopologyLevels];
|
||||
uint32_t numCores_[maxTopologyLevels];
|
||||
|
||||
static const unsigned int maxNumberCacheLevels = 10;
|
||||
unsigned int dataCacheSize_[maxNumberCacheLevels];
|
||||
unsigned int coresSharignDataCache_[maxNumberCacheLevels];
|
||||
unsigned int dataCacheLevels_;
|
||||
static const uint32_t maxNumberCacheLevels = 10;
|
||||
uint32_t dataCacheSize_[maxNumberCacheLevels];
|
||||
uint32_t coresSharignDataCache_[maxNumberCacheLevels];
|
||||
uint32_t dataCacheLevels_;
|
||||
|
||||
unsigned int get32bitAsBE(const char *x) const
|
||||
uint32_t get32bitAsBE(const char *x) const
|
||||
{
|
||||
return x[0] | (x[1] << 8) | (x[2] << 16) | (x[3] << 24);
|
||||
}
|
||||
unsigned int mask(int n) const
|
||||
uint32_t mask(int n) const
|
||||
{
|
||||
return (1U << n) - 1;
|
||||
}
|
||||
void setFamily()
|
||||
{
|
||||
unsigned int data[4] = {};
|
||||
uint32_t data[4] = {};
|
||||
getCpuid(1, data);
|
||||
stepping = data[0] & mask(4);
|
||||
model = (data[0] >> 4) & mask(4);
|
||||
|
@ -132,15 +163,15 @@ class Cpu {
|
|||
displayModel = model;
|
||||
}
|
||||
}
|
||||
unsigned int extractBit(unsigned int val, unsigned int base, unsigned int end)
|
||||
uint32_t extractBit(uint32_t val, uint32_t base, uint32_t end)
|
||||
{
|
||||
return (val >> base) & ((1u << (end - base)) - 1);
|
||||
}
|
||||
void setNumCores()
|
||||
{
|
||||
if ((type_ & tINTEL) == 0) return;
|
||||
if (!has(tINTEL)) return;
|
||||
|
||||
unsigned int data[4] = {};
|
||||
uint32_t data[4] = {};
|
||||
|
||||
/* CAUTION: These numbers are configuration as shipped by Intel. */
|
||||
getCpuidEx(0x0, 0, data);
|
||||
|
@ -152,7 +183,7 @@ class Cpu {
|
|||
leaf 0xB can be zeroed-out by a hypervisor
|
||||
*/
|
||||
x2APIC_supported_ = true;
|
||||
for (unsigned int i = 0; i < maxTopologyLevels; i++) {
|
||||
for (uint32_t i = 0; i < maxTopologyLevels; i++) {
|
||||
getCpuidEx(0xB, i, data);
|
||||
IntelCpuTopologyLevel level = (IntelCpuTopologyLevel)extractBit(data[2], 8, 15);
|
||||
if (level == SmtLevel || level == CoreLevel) {
|
||||
|
@ -176,14 +207,14 @@ class Cpu {
|
|||
}
|
||||
void setCacheHierarchy()
|
||||
{
|
||||
if ((type_ & tINTEL) == 0) return;
|
||||
const unsigned int NO_CACHE = 0;
|
||||
const unsigned int DATA_CACHE = 1;
|
||||
// const unsigned int INSTRUCTION_CACHE = 2;
|
||||
const unsigned int UNIFIED_CACHE = 3;
|
||||
unsigned int smt_width = 0;
|
||||
unsigned int logical_cores = 0;
|
||||
unsigned int data[4] = {};
|
||||
if (!has(tINTEL)) return;
|
||||
const uint32_t NO_CACHE = 0;
|
||||
const uint32_t DATA_CACHE = 1;
|
||||
// const uint32_t INSTRUCTION_CACHE = 2;
|
||||
const uint32_t UNIFIED_CACHE = 3;
|
||||
uint32_t smt_width = 0;
|
||||
uint32_t logical_cores = 0;
|
||||
uint32_t data[4] = {};
|
||||
|
||||
if (x2APIC_supported_) {
|
||||
smt_width = numCores_[0];
|
||||
|
@ -201,10 +232,10 @@ class Cpu {
|
|||
*/
|
||||
for (int i = 0; dataCacheLevels_ < maxNumberCacheLevels; i++) {
|
||||
getCpuidEx(0x4, i, data);
|
||||
unsigned int cacheType = extractBit(data[0], 0, 4);
|
||||
uint32_t cacheType = extractBit(data[0], 0, 4);
|
||||
if (cacheType == NO_CACHE) break;
|
||||
if (cacheType == DATA_CACHE || cacheType == UNIFIED_CACHE) {
|
||||
unsigned int actual_logical_cores = extractBit(data[0], 14, 25) + 1;
|
||||
uint32_t actual_logical_cores = extractBit(data[0], 14, 25) + 1;
|
||||
if (logical_cores != 0) { // true only if leaf 0xB is supported and valid
|
||||
actual_logical_cores = (std::min)(actual_logical_cores, logical_cores);
|
||||
}
|
||||
|
@ -231,7 +262,7 @@ public:
|
|||
int displayFamily; // family + extFamily
|
||||
int displayModel; // model + extModel
|
||||
|
||||
unsigned int getNumCores(IntelCpuTopologyLevel level) const {
|
||||
uint32_t getNumCores(IntelCpuTopologyLevel level) const {
|
||||
if (!x2APIC_supported_) XBYAK_THROW_RET(ERR_X2APIC_IS_NOT_SUPPORTED, 0)
|
||||
switch (level) {
|
||||
case SmtLevel: return numCores_[level - 1];
|
||||
|
@ -240,13 +271,13 @@ public:
|
|||
}
|
||||
}
|
||||
|
||||
unsigned int getDataCacheLevels() const { return dataCacheLevels_; }
|
||||
unsigned int getCoresSharingDataCache(unsigned int i) const
|
||||
uint32_t getDataCacheLevels() const { return dataCacheLevels_; }
|
||||
uint32_t getCoresSharingDataCache(uint32_t i) const
|
||||
{
|
||||
if (i >= dataCacheLevels_) XBYAK_THROW_RET(ERR_BAD_PARAMETER, 0)
|
||||
return coresSharignDataCache_[i];
|
||||
}
|
||||
unsigned int getDataCacheSize(unsigned int i) const
|
||||
uint32_t getDataCacheSize(uint32_t i) const
|
||||
{
|
||||
if (i >= dataCacheLevels_) XBYAK_THROW_RET(ERR_BAD_PARAMETER, 0)
|
||||
return dataCacheSize_[i];
|
||||
|
@ -255,10 +286,10 @@ public:
|
|||
/*
|
||||
data[] = { eax, ebx, ecx, edx }
|
||||
*/
|
||||
static inline void getCpuid(unsigned int eaxIn, unsigned int data[4])
|
||||
static inline void getCpuid(uint32_t eaxIn, uint32_t data[4])
|
||||
{
|
||||
#ifdef XBYAK_INTEL_CPU_SPECIFIC
|
||||
#ifdef _MSC_VER
|
||||
#ifdef _WIN32
|
||||
__cpuid(reinterpret_cast<int*>(data), eaxIn);
|
||||
#else
|
||||
__cpuid(eaxIn, data[0], data[1], data[2], data[3]);
|
||||
|
@ -268,7 +299,7 @@ public:
|
|||
(void)data;
|
||||
#endif
|
||||
}
|
||||
static inline void getCpuidEx(unsigned int eaxIn, unsigned int ecxIn, unsigned int data[4])
|
||||
static inline void getCpuidEx(uint32_t eaxIn, uint32_t ecxIn, uint32_t data[4])
|
||||
{
|
||||
#ifdef XBYAK_INTEL_CPU_SPECIFIC
|
||||
#ifdef _MSC_VER
|
||||
|
@ -288,7 +319,7 @@ public:
|
|||
#ifdef _MSC_VER
|
||||
return _xgetbv(0);
|
||||
#else
|
||||
unsigned int eax, edx;
|
||||
uint32_t eax, edx;
|
||||
// xgetvb is not support on gcc 4.2
|
||||
// __asm__ volatile("xgetbv" : "=a"(eax), "=d"(edx) : "c"(0));
|
||||
__asm__ volatile(".byte 0x0f, 0x01, 0xd0" : "=a"(eax), "=d"(edx) : "c"(0));
|
||||
|
@ -298,93 +329,102 @@ public:
|
|||
return 0;
|
||||
#endif
|
||||
}
|
||||
typedef uint64_t Type;
|
||||
|
||||
static const Type NONE = 0;
|
||||
static const Type tMMX = 1 << 0;
|
||||
static const Type tMMX2 = 1 << 1;
|
||||
static const Type tCMOV = 1 << 2;
|
||||
static const Type tSSE = 1 << 3;
|
||||
static const Type tSSE2 = 1 << 4;
|
||||
static const Type tSSE3 = 1 << 5;
|
||||
static const Type tSSSE3 = 1 << 6;
|
||||
static const Type tSSE41 = 1 << 7;
|
||||
static const Type tSSE42 = 1 << 8;
|
||||
static const Type tPOPCNT = 1 << 9;
|
||||
static const Type tAESNI = 1 << 10;
|
||||
static const Type tSSE5 = 1 << 11;
|
||||
static const Type tOSXSAVE = 1 << 12;
|
||||
static const Type tPCLMULQDQ = 1 << 13;
|
||||
static const Type tAVX = 1 << 14;
|
||||
static const Type tFMA = 1 << 15;
|
||||
#define XBYAK_SPLIT_ID(id) ((0 <= id && id < 64) ? (1ull << (id % 64)) : 0), (id >= 64 ? (1ull << (id % 64)) : 0)
|
||||
#if (__cplusplus >= 201103) || (defined(_MSC_VER) && (_MSC_VER >= 1700)) /* VS2012 */
|
||||
#define XBYAK_DEFINE_TYPE(id, NAME) static const constexpr local::TypeT<XBYAK_SPLIT_ID(id)> NAME{}
|
||||
#else
|
||||
#define XBYAK_DEFINE_TYPE(id, NAME) static const local::TypeT<XBYAK_SPLIT_ID(id)> NAME
|
||||
#endif
|
||||
XBYAK_DEFINE_TYPE(0, tMMX);
|
||||
XBYAK_DEFINE_TYPE(1, tMMX2);
|
||||
XBYAK_DEFINE_TYPE(2, tCMOV);
|
||||
XBYAK_DEFINE_TYPE(3, tSSE);
|
||||
XBYAK_DEFINE_TYPE(4, tSSE2);
|
||||
XBYAK_DEFINE_TYPE(5, tSSE3);
|
||||
XBYAK_DEFINE_TYPE(6, tSSSE3);
|
||||
XBYAK_DEFINE_TYPE(7, tSSE41);
|
||||
XBYAK_DEFINE_TYPE(8, tSSE42);
|
||||
XBYAK_DEFINE_TYPE(9, tPOPCNT);
|
||||
XBYAK_DEFINE_TYPE(10, tAESNI);
|
||||
XBYAK_DEFINE_TYPE(11, tAVX512_FP16);
|
||||
XBYAK_DEFINE_TYPE(12, tOSXSAVE);
|
||||
XBYAK_DEFINE_TYPE(13, tPCLMULQDQ);
|
||||
XBYAK_DEFINE_TYPE(14, tAVX);
|
||||
XBYAK_DEFINE_TYPE(15, tFMA);
|
||||
XBYAK_DEFINE_TYPE(16, t3DN);
|
||||
XBYAK_DEFINE_TYPE(17, tE3DN);
|
||||
XBYAK_DEFINE_TYPE(18, tWAITPKG);
|
||||
XBYAK_DEFINE_TYPE(19, tRDTSCP);
|
||||
XBYAK_DEFINE_TYPE(20, tAVX2);
|
||||
XBYAK_DEFINE_TYPE(21, tBMI1); // andn, bextr, blsi, blsmsk, blsr, tzcnt
|
||||
XBYAK_DEFINE_TYPE(22, tBMI2); // bzhi, mulx, pdep, pext, rorx, sarx, shlx, shrx
|
||||
XBYAK_DEFINE_TYPE(23, tLZCNT);
|
||||
XBYAK_DEFINE_TYPE(24, tINTEL);
|
||||
XBYAK_DEFINE_TYPE(25, tAMD);
|
||||
XBYAK_DEFINE_TYPE(26, tENHANCED_REP); // enhanced rep movsb/stosb
|
||||
XBYAK_DEFINE_TYPE(27, tRDRAND);
|
||||
XBYAK_DEFINE_TYPE(28, tADX); // adcx, adox
|
||||
XBYAK_DEFINE_TYPE(29, tRDSEED); // rdseed
|
||||
XBYAK_DEFINE_TYPE(30, tSMAP); // stac
|
||||
XBYAK_DEFINE_TYPE(31, tHLE); // xacquire, xrelease, xtest
|
||||
XBYAK_DEFINE_TYPE(32, tRTM); // xbegin, xend, xabort
|
||||
XBYAK_DEFINE_TYPE(33, tF16C); // vcvtph2ps, vcvtps2ph
|
||||
XBYAK_DEFINE_TYPE(34, tMOVBE); // mobve
|
||||
XBYAK_DEFINE_TYPE(35, tAVX512F);
|
||||
XBYAK_DEFINE_TYPE(36, tAVX512DQ);
|
||||
XBYAK_DEFINE_TYPE(37, tAVX512_IFMA);
|
||||
XBYAK_DEFINE_TYPE(37, tAVX512IFMA);// = tAVX512_IFMA;
|
||||
XBYAK_DEFINE_TYPE(38, tAVX512PF);
|
||||
XBYAK_DEFINE_TYPE(39, tAVX512ER);
|
||||
XBYAK_DEFINE_TYPE(40, tAVX512CD);
|
||||
XBYAK_DEFINE_TYPE(41, tAVX512BW);
|
||||
XBYAK_DEFINE_TYPE(42, tAVX512VL);
|
||||
XBYAK_DEFINE_TYPE(43, tAVX512_VBMI);
|
||||
XBYAK_DEFINE_TYPE(43, tAVX512VBMI); // = tAVX512_VBMI; // changed by Intel's manual
|
||||
XBYAK_DEFINE_TYPE(44, tAVX512_4VNNIW);
|
||||
XBYAK_DEFINE_TYPE(45, tAVX512_4FMAPS);
|
||||
XBYAK_DEFINE_TYPE(46, tPREFETCHWT1);
|
||||
XBYAK_DEFINE_TYPE(47, tPREFETCHW);
|
||||
XBYAK_DEFINE_TYPE(48, tSHA);
|
||||
XBYAK_DEFINE_TYPE(49, tMPX);
|
||||
XBYAK_DEFINE_TYPE(50, tAVX512_VBMI2);
|
||||
XBYAK_DEFINE_TYPE(51, tGFNI);
|
||||
XBYAK_DEFINE_TYPE(52, tVAES);
|
||||
XBYAK_DEFINE_TYPE(53, tVPCLMULQDQ);
|
||||
XBYAK_DEFINE_TYPE(54, tAVX512_VNNI);
|
||||
XBYAK_DEFINE_TYPE(55, tAVX512_BITALG);
|
||||
XBYAK_DEFINE_TYPE(56, tAVX512_VPOPCNTDQ);
|
||||
XBYAK_DEFINE_TYPE(57, tAVX512_BF16);
|
||||
XBYAK_DEFINE_TYPE(58, tAVX512_VP2INTERSECT);
|
||||
XBYAK_DEFINE_TYPE(59, tAMX_TILE);
|
||||
XBYAK_DEFINE_TYPE(60, tAMX_INT8);
|
||||
XBYAK_DEFINE_TYPE(61, tAMX_BF16);
|
||||
XBYAK_DEFINE_TYPE(62, tAVX_VNNI);
|
||||
XBYAK_DEFINE_TYPE(63, tCLFLUSHOPT);
|
||||
XBYAK_DEFINE_TYPE(64, tCLDEMOTE);
|
||||
XBYAK_DEFINE_TYPE(65, tMOVDIRI);
|
||||
XBYAK_DEFINE_TYPE(66, tMOVDIR64B);
|
||||
XBYAK_DEFINE_TYPE(67, tCLZERO); // AMD Zen
|
||||
|
||||
static const Type t3DN = 1 << 16;
|
||||
static const Type tE3DN = 1 << 17;
|
||||
static const Type tSSE4a = 1 << 18;
|
||||
static const Type tRDTSCP = 1 << 19;
|
||||
static const Type tAVX2 = 1 << 20;
|
||||
static const Type tBMI1 = 1 << 21; // andn, bextr, blsi, blsmsk, blsr, tzcnt
|
||||
static const Type tBMI2 = 1 << 22; // bzhi, mulx, pdep, pext, rorx, sarx, shlx, shrx
|
||||
static const Type tLZCNT = 1 << 23;
|
||||
|
||||
static const Type tINTEL = 1 << 24;
|
||||
static const Type tAMD = 1 << 25;
|
||||
|
||||
static const Type tENHANCED_REP = 1 << 26; // enhanced rep movsb/stosb
|
||||
static const Type tRDRAND = 1 << 27;
|
||||
static const Type tADX = 1 << 28; // adcx, adox
|
||||
static const Type tRDSEED = 1 << 29; // rdseed
|
||||
static const Type tSMAP = 1 << 30; // stac
|
||||
static const Type tHLE = uint64_t(1) << 31; // xacquire, xrelease, xtest
|
||||
static const Type tRTM = uint64_t(1) << 32; // xbegin, xend, xabort
|
||||
static const Type tF16C = uint64_t(1) << 33; // vcvtph2ps, vcvtps2ph
|
||||
static const Type tMOVBE = uint64_t(1) << 34; // mobve
|
||||
static const Type tAVX512F = uint64_t(1) << 35;
|
||||
static const Type tAVX512DQ = uint64_t(1) << 36;
|
||||
static const Type tAVX512_IFMA = uint64_t(1) << 37;
|
||||
static const Type tAVX512IFMA = tAVX512_IFMA;
|
||||
static const Type tAVX512PF = uint64_t(1) << 38;
|
||||
static const Type tAVX512ER = uint64_t(1) << 39;
|
||||
static const Type tAVX512CD = uint64_t(1) << 40;
|
||||
static const Type tAVX512BW = uint64_t(1) << 41;
|
||||
static const Type tAVX512VL = uint64_t(1) << 42;
|
||||
static const Type tAVX512_VBMI = uint64_t(1) << 43;
|
||||
static const Type tAVX512VBMI = tAVX512_VBMI; // changed by Intel's manual
|
||||
static const Type tAVX512_4VNNIW = uint64_t(1) << 44;
|
||||
static const Type tAVX512_4FMAPS = uint64_t(1) << 45;
|
||||
static const Type tPREFETCHWT1 = uint64_t(1) << 46;
|
||||
static const Type tPREFETCHW = uint64_t(1) << 47;
|
||||
static const Type tSHA = uint64_t(1) << 48;
|
||||
static const Type tMPX = uint64_t(1) << 49;
|
||||
static const Type tAVX512_VBMI2 = uint64_t(1) << 50;
|
||||
static const Type tGFNI = uint64_t(1) << 51;
|
||||
static const Type tVAES = uint64_t(1) << 52;
|
||||
static const Type tVPCLMULQDQ = uint64_t(1) << 53;
|
||||
static const Type tAVX512_VNNI = uint64_t(1) << 54;
|
||||
static const Type tAVX512_BITALG = uint64_t(1) << 55;
|
||||
static const Type tAVX512_VPOPCNTDQ = uint64_t(1) << 56;
|
||||
static const Type tAVX512_BF16 = uint64_t(1) << 57;
|
||||
static const Type tAVX512_VP2INTERSECT = uint64_t(1) << 58;
|
||||
static const Type tAMX_TILE = uint64_t(1) << 59;
|
||||
static const Type tAMX_INT8 = uint64_t(1) << 60;
|
||||
static const Type tAMX_BF16 = uint64_t(1) << 61;
|
||||
static const Type tAVX_VNNI = uint64_t(1) << 62;
|
||||
#undef XBYAK_SPLIT_ID
|
||||
#undef XBYAK_DEFINE_TYPE
|
||||
|
||||
Cpu()
|
||||
: type_(NONE)
|
||||
: type_()
|
||||
, x2APIC_supported_(false)
|
||||
, numCores_()
|
||||
, dataCacheSize_()
|
||||
, coresSharignDataCache_()
|
||||
, dataCacheLevels_(0)
|
||||
{
|
||||
unsigned int data[4] = {};
|
||||
const unsigned int& EAX = data[0];
|
||||
const unsigned int& EBX = data[1];
|
||||
const unsigned int& ECX = data[2];
|
||||
const unsigned int& EDX = data[3];
|
||||
uint32_t data[4] = {};
|
||||
const uint32_t& EAX = data[0];
|
||||
const uint32_t& EBX = data[1];
|
||||
const uint32_t& ECX = data[2];
|
||||
const uint32_t& EDX = data[3];
|
||||
getCpuid(0, data);
|
||||
const unsigned int maxNum = EAX;
|
||||
const uint32_t maxNum = EAX;
|
||||
static const char intel[] = "ntel";
|
||||
static const char amd[] = "cAMD";
|
||||
if (ECX == get32bitAsBE(amd)) {
|
||||
|
@ -407,7 +447,8 @@ public:
|
|||
|
||||
// Extended flags information
|
||||
getCpuid(0x80000000, data);
|
||||
if (EAX >= 0x80000001) {
|
||||
const uint32_t maxExtendedNum = EAX;
|
||||
if (maxExtendedNum >= 0x80000001) {
|
||||
getCpuid(0x80000001, data);
|
||||
|
||||
if (EDX & (1U << 31)) type_ |= t3DN;
|
||||
|
@ -419,6 +460,11 @@ public:
|
|||
if (ECX & (1U << 8)) type_ |= tPREFETCHW;
|
||||
}
|
||||
|
||||
if (maxExtendedNum >= 0x80000008) {
|
||||
getCpuid(0x80000008, data);
|
||||
if (EBX & (1U << 0)) type_ |= tCLZERO;
|
||||
}
|
||||
|
||||
getCpuid(1, data);
|
||||
if (ECX & (1U << 0)) type_ |= tSSE3;
|
||||
if (ECX & (1U << 9)) type_ |= tSSSE3;
|
||||
|
@ -469,6 +515,7 @@ public:
|
|||
if (EDX & (1U << 2)) type_ |= tAVX512_4VNNIW;
|
||||
if (EDX & (1U << 3)) type_ |= tAVX512_4FMAPS;
|
||||
if (EDX & (1U << 8)) type_ |= tAVX512_VP2INTERSECT;
|
||||
if ((type_ & tAVX512BW) && (EDX & (1U << 23))) type_ |= tAVX512_FP16;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -483,11 +530,16 @@ public:
|
|||
if (EBX & (1U << 18)) type_ |= tRDSEED;
|
||||
if (EBX & (1U << 19)) type_ |= tADX;
|
||||
if (EBX & (1U << 20)) type_ |= tSMAP;
|
||||
if (EBX & (1U << 23)) type_ |= tCLFLUSHOPT;
|
||||
if (EBX & (1U << 4)) type_ |= tHLE;
|
||||
if (EBX & (1U << 11)) type_ |= tRTM;
|
||||
if (EBX & (1U << 14)) type_ |= tMPX;
|
||||
if (EBX & (1U << 29)) type_ |= tSHA;
|
||||
if (ECX & (1U << 0)) type_ |= tPREFETCHWT1;
|
||||
if (ECX & (1U << 5)) type_ |= tWAITPKG;
|
||||
if (ECX & (1U << 25)) type_ |= tCLDEMOTE;
|
||||
if (ECX & (1U << 27)) type_ |= tMOVDIRI;
|
||||
if (ECX & (1U << 28)) type_ |= tMOVDIR64B;
|
||||
if (EDX & (1U << 24)) type_ |= tAMX_TILE;
|
||||
if (EDX & (1U << 25)) type_ |= tAMX_INT8;
|
||||
if (EDX & (1U << 22)) type_ |= tAMX_BF16;
|
||||
|
@ -511,9 +563,9 @@ public:
|
|||
printf("display:family=%X, model=%X\n", displayFamily, displayModel);
|
||||
#endif
|
||||
}
|
||||
bool has(Type type) const
|
||||
bool has(const Type& type) const
|
||||
{
|
||||
return (type & type_) != 0;
|
||||
return (type & type_) == type;
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -526,7 +578,7 @@ public:
|
|||
#ifdef _MSC_VER
|
||||
return __rdtsc();
|
||||
#else
|
||||
unsigned int eax, edx;
|
||||
uint32_t eax, edx;
|
||||
__asm__ volatile("rdtsc" : "=a"(eax), "=d"(edx));
|
||||
return ((uint64_t)edx << 32) | eax;
|
||||
#endif
|
||||
|
@ -563,7 +615,7 @@ const int UseRDX = 1 << 7;
|
|||
|
||||
class Pack {
|
||||
static const size_t maxTblNum = 15;
|
||||
const Xbyak::Reg64 *tbl_[maxTblNum];
|
||||
Xbyak::Reg64 tbl_[maxTblNum];
|
||||
size_t n_;
|
||||
public:
|
||||
Pack() : tbl_(), n_(0) {}
|
||||
|
@ -580,32 +632,36 @@ public:
|
|||
return *this;
|
||||
}
|
||||
Pack(const Xbyak::Reg64& t0)
|
||||
{ n_ = 1; tbl_[0] = &t0; }
|
||||
{ n_ = 1; tbl_[0] = t0; }
|
||||
Pack(const Xbyak::Reg64& t1, const Xbyak::Reg64& t0)
|
||||
{ n_ = 2; tbl_[0] = &t0; tbl_[1] = &t1; }
|
||||
{ n_ = 2; tbl_[0] = t0; tbl_[1] = t1; }
|
||||
Pack(const Xbyak::Reg64& t2, const Xbyak::Reg64& t1, const Xbyak::Reg64& t0)
|
||||
{ n_ = 3; tbl_[0] = &t0; tbl_[1] = &t1; tbl_[2] = &t2; }
|
||||
{ n_ = 3; tbl_[0] = t0; tbl_[1] = t1; tbl_[2] = t2; }
|
||||
Pack(const Xbyak::Reg64& t3, const Xbyak::Reg64& t2, const Xbyak::Reg64& t1, const Xbyak::Reg64& t0)
|
||||
{ n_ = 4; tbl_[0] = &t0; tbl_[1] = &t1; tbl_[2] = &t2; tbl_[3] = &t3; }
|
||||
{ n_ = 4; tbl_[0] = t0; tbl_[1] = t1; tbl_[2] = t2; tbl_[3] = t3; }
|
||||
Pack(const Xbyak::Reg64& t4, const Xbyak::Reg64& t3, const Xbyak::Reg64& t2, const Xbyak::Reg64& t1, const Xbyak::Reg64& t0)
|
||||
{ n_ = 5; tbl_[0] = &t0; tbl_[1] = &t1; tbl_[2] = &t2; tbl_[3] = &t3; tbl_[4] = &t4; }
|
||||
{ n_ = 5; tbl_[0] = t0; tbl_[1] = t1; tbl_[2] = t2; tbl_[3] = t3; tbl_[4] = t4; }
|
||||
Pack(const Xbyak::Reg64& t5, const Xbyak::Reg64& t4, const Xbyak::Reg64& t3, const Xbyak::Reg64& t2, const Xbyak::Reg64& t1, const Xbyak::Reg64& t0)
|
||||
{ n_ = 6; tbl_[0] = &t0; tbl_[1] = &t1; tbl_[2] = &t2; tbl_[3] = &t3; tbl_[4] = &t4; tbl_[5] = &t5; }
|
||||
{ n_ = 6; tbl_[0] = t0; tbl_[1] = t1; tbl_[2] = t2; tbl_[3] = t3; tbl_[4] = t4; tbl_[5] = t5; }
|
||||
Pack(const Xbyak::Reg64& t6, const Xbyak::Reg64& t5, const Xbyak::Reg64& t4, const Xbyak::Reg64& t3, const Xbyak::Reg64& t2, const Xbyak::Reg64& t1, const Xbyak::Reg64& t0)
|
||||
{ n_ = 7; tbl_[0] = &t0; tbl_[1] = &t1; tbl_[2] = &t2; tbl_[3] = &t3; tbl_[4] = &t4; tbl_[5] = &t5; tbl_[6] = &t6; }
|
||||
{ n_ = 7; tbl_[0] = t0; tbl_[1] = t1; tbl_[2] = t2; tbl_[3] = t3; tbl_[4] = t4; tbl_[5] = t5; tbl_[6] = t6; }
|
||||
Pack(const Xbyak::Reg64& t7, const Xbyak::Reg64& t6, const Xbyak::Reg64& t5, const Xbyak::Reg64& t4, const Xbyak::Reg64& t3, const Xbyak::Reg64& t2, const Xbyak::Reg64& t1, const Xbyak::Reg64& t0)
|
||||
{ n_ = 8; tbl_[0] = &t0; tbl_[1] = &t1; tbl_[2] = &t2; tbl_[3] = &t3; tbl_[4] = &t4; tbl_[5] = &t5; tbl_[6] = &t6; tbl_[7] = &t7; }
|
||||
{ n_ = 8; tbl_[0] = t0; tbl_[1] = t1; tbl_[2] = t2; tbl_[3] = t3; tbl_[4] = t4; tbl_[5] = t5; tbl_[6] = t6; tbl_[7] = t7; }
|
||||
Pack(const Xbyak::Reg64& t8, const Xbyak::Reg64& t7, const Xbyak::Reg64& t6, const Xbyak::Reg64& t5, const Xbyak::Reg64& t4, const Xbyak::Reg64& t3, const Xbyak::Reg64& t2, const Xbyak::Reg64& t1, const Xbyak::Reg64& t0)
|
||||
{ n_ = 9; tbl_[0] = &t0; tbl_[1] = &t1; tbl_[2] = &t2; tbl_[3] = &t3; tbl_[4] = &t4; tbl_[5] = &t5; tbl_[6] = &t6; tbl_[7] = &t7; tbl_[8] = &t8; }
|
||||
{ n_ = 9; tbl_[0] = t0; tbl_[1] = t1; tbl_[2] = t2; tbl_[3] = t3; tbl_[4] = t4; tbl_[5] = t5; tbl_[6] = t6; tbl_[7] = t7; tbl_[8] = t8; }
|
||||
Pack(const Xbyak::Reg64& t9, const Xbyak::Reg64& t8, const Xbyak::Reg64& t7, const Xbyak::Reg64& t6, const Xbyak::Reg64& t5, const Xbyak::Reg64& t4, const Xbyak::Reg64& t3, const Xbyak::Reg64& t2, const Xbyak::Reg64& t1, const Xbyak::Reg64& t0)
|
||||
{ n_ = 10; tbl_[0] = &t0; tbl_[1] = &t1; tbl_[2] = &t2; tbl_[3] = &t3; tbl_[4] = &t4; tbl_[5] = &t5; tbl_[6] = &t6; tbl_[7] = &t7; tbl_[8] = &t8; tbl_[9] = &t9; }
|
||||
{ n_ = 10; tbl_[0] = t0; tbl_[1] = t1; tbl_[2] = t2; tbl_[3] = t3; tbl_[4] = t4; tbl_[5] = t5; tbl_[6] = t6; tbl_[7] = t7; tbl_[8] = t8; tbl_[9] = t9; }
|
||||
Pack(const Xbyak::Reg64& ta, const Xbyak::Reg64& t9, const Xbyak::Reg64& t8, const Xbyak::Reg64& t7, const Xbyak::Reg64& t6, const Xbyak::Reg64& t5, const Xbyak::Reg64& t4, const Xbyak::Reg64& t3, const Xbyak::Reg64& t2, const Xbyak::Reg64& t1, const Xbyak::Reg64& t0)
|
||||
{ n_ = 11; tbl_[0] = t0; tbl_[1] = t1; tbl_[2] = t2; tbl_[3] = t3; tbl_[4] = t4; tbl_[5] = t5; tbl_[6] = t6; tbl_[7] = t7; tbl_[8] = t8; tbl_[9] = t9; tbl_[10] = ta; }
|
||||
Pack(const Xbyak::Reg64& tb, const Xbyak::Reg64& ta, const Xbyak::Reg64& t9, const Xbyak::Reg64& t8, const Xbyak::Reg64& t7, const Xbyak::Reg64& t6, const Xbyak::Reg64& t5, const Xbyak::Reg64& t4, const Xbyak::Reg64& t3, const Xbyak::Reg64& t2, const Xbyak::Reg64& t1, const Xbyak::Reg64& t0)
|
||||
{ n_ = 12; tbl_[0] = t0; tbl_[1] = t1; tbl_[2] = t2; tbl_[3] = t3; tbl_[4] = t4; tbl_[5] = t5; tbl_[6] = t6; tbl_[7] = t7; tbl_[8] = t8; tbl_[9] = t9; tbl_[10] = ta; tbl_[11] = tb; }
|
||||
Pack& append(const Xbyak::Reg64& t)
|
||||
{
|
||||
if (n_ == maxTblNum) {
|
||||
fprintf(stderr, "ERR Pack::can't append\n");
|
||||
XBYAK_THROW_RET(ERR_BAD_PARAMETER, *this)
|
||||
}
|
||||
tbl_[n_++] = &t;
|
||||
tbl_[n_++] = t;
|
||||
return *this;
|
||||
}
|
||||
void init(const Xbyak::Reg64 *tbl, size_t n)
|
||||
|
@ -616,7 +672,7 @@ public:
|
|||
}
|
||||
n_ = n;
|
||||
for (size_t i = 0; i < n; i++) {
|
||||
tbl_[i] = &tbl[i];
|
||||
tbl_[i] = tbl[i];
|
||||
}
|
||||
}
|
||||
const Xbyak::Reg64& operator[](size_t n) const
|
||||
|
@ -625,7 +681,7 @@ public:
|
|||
fprintf(stderr, "ERR Pack bad n=%d(%d)\n", (int)n, (int)n_);
|
||||
XBYAK_THROW_RET(ERR_BAD_PARAMETER, rax)
|
||||
}
|
||||
return *tbl_[n];
|
||||
return tbl_[n];
|
||||
}
|
||||
size_t size() const { return n_; }
|
||||
/*
|
||||
|
@ -648,7 +704,7 @@ public:
|
|||
void put() const
|
||||
{
|
||||
for (size_t i = 0; i < n_; i++) {
|
||||
printf("%s ", tbl_[i]->toString());
|
||||
printf("%s ", tbl_[i].toString());
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
|
|
12
externals/dynarmic/src/dynarmic/CMakeLists.txt
vendored
12
externals/dynarmic/src/dynarmic/CMakeLists.txt
vendored
|
@ -55,8 +55,6 @@ add_library(dynarmic
|
|||
common/u128.cpp
|
||||
common/u128.h
|
||||
common/variant_util.h
|
||||
common/x64_disassemble.cpp
|
||||
common/x64_disassemble.h
|
||||
frontend/A32/a32_types.cpp
|
||||
frontend/A32/a32_types.h
|
||||
frontend/A64/a64_types.cpp
|
||||
|
@ -257,6 +255,12 @@ if ("A64" IN_LIST DYNARMIC_FRONTENDS)
|
|||
endif()
|
||||
|
||||
if (ARCHITECTURE STREQUAL "x86_64")
|
||||
target_link_libraries(dynarmic
|
||||
PRIVATE
|
||||
$<BUILD_INTERFACE:xbyak>
|
||||
$<BUILD_INTERFACE:Zydis>
|
||||
)
|
||||
|
||||
target_sources(dynarmic PRIVATE
|
||||
backend/x64/abi.cpp
|
||||
backend/x64/abi.h
|
||||
|
@ -300,6 +304,8 @@ if (ARCHITECTURE STREQUAL "x86_64")
|
|||
backend/x64/stack_layout.h
|
||||
common/spin_lock_x64.cpp
|
||||
common/spin_lock_x64.h
|
||||
common/x64_disassemble.cpp
|
||||
common/x64_disassemble.h
|
||||
)
|
||||
|
||||
if ("A32" IN_LIST DYNARMIC_FRONTENDS)
|
||||
|
@ -383,8 +389,6 @@ target_link_libraries(dynarmic
|
|||
$<BUILD_INTERFACE:boost>
|
||||
$<BUILD_INTERFACE:fmt::fmt>
|
||||
tsl::robin_map
|
||||
$<BUILD_INTERFACE:xbyak>
|
||||
$<BUILD_INTERFACE:Zydis>
|
||||
"$<$<BOOL:DYNARMIC_USE_LLVM>:${llvm_libs}>"
|
||||
)
|
||||
if (DYNARMIC_ENABLE_CPU_FEATURE_DETECTION)
|
||||
|
|
|
@ -167,7 +167,8 @@ private:
|
|||
PerformCacheInvalidation();
|
||||
}
|
||||
|
||||
IR::Block ir_block = A32::Translate(A32::LocationDescriptor{descriptor}, conf.callbacks, {conf.arch_version, conf.define_unpredictable_behaviour, conf.hook_hint_instructions});
|
||||
IR::Block ir_block = A32::Translate(A32::LocationDescriptor{descriptor}, conf.callbacks,
|
||||
{conf.arch_version, conf.define_unpredictable_behaviour, conf.hook_hint_instructions, conf.check_halt_on_memory_access});
|
||||
Optimization::PolyfillPass(ir_block, polyfill_options);
|
||||
if (conf.HasOptimization(OptimizationFlag::GetSetElimination)) {
|
||||
Optimization::A32GetSetElimination(ir_block);
|
||||
|
|
|
@ -265,7 +265,7 @@ private:
|
|||
// JIT Compile
|
||||
const auto get_code = [this](u64 vaddr) { return conf.callbacks->MemoryReadCode(vaddr); };
|
||||
IR::Block ir_block = A64::Translate(A64::LocationDescriptor{current_location}, get_code,
|
||||
{conf.define_unpredictable_behaviour, conf.wall_clock_cntpct});
|
||||
{conf.define_unpredictable_behaviour, conf.wall_clock_cntpct, conf.hook_hint_instructions, conf.check_halt_on_memory_access});
|
||||
Optimization::PolyfillPass(ir_block, polyfill_options);
|
||||
Optimization::A64CallbackConfigPass(ir_block, conf);
|
||||
if (conf.HasOptimization(OptimizationFlag::GetSetElimination)) {
|
||||
|
|
|
@ -10,6 +10,7 @@
|
|||
#include <mcl/assert.hpp>
|
||||
#include <mcl/mp/metavalue/lift_value.hpp>
|
||||
#include <mcl/mp/typelist/cartesian_product.hpp>
|
||||
#include <mcl/mp/typelist/get.hpp>
|
||||
#include <mcl/mp/typelist/lift_sequence.hpp>
|
||||
#include <mcl/mp/typelist/list.hpp>
|
||||
#include <mcl/mp/typelist/lower_to_tuple.hpp>
|
||||
|
@ -941,15 +942,14 @@ static void EmitFPRound(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, siz
|
|||
using exact_list = mp::list<std::true_type, std::false_type>;
|
||||
|
||||
static const auto lut = Common::GenerateLookupTableFromList(
|
||||
[](auto args) {
|
||||
[]<typename I>(I) {
|
||||
return std::pair{
|
||||
mp::lower_to_tuple_v<decltype(args)>,
|
||||
mp::lower_to_tuple_v<I>,
|
||||
Common::FptrCast(
|
||||
[](u64 input, FP::FPSR& fpsr, FP::FPCR fpcr) {
|
||||
constexpr auto t = mp::lower_to_tuple_v<decltype(args)>;
|
||||
constexpr size_t fsize = std::get<0>(t);
|
||||
constexpr FP::RoundingMode rounding_mode = std::get<1>(t);
|
||||
constexpr bool exact = std::get<2>(t);
|
||||
constexpr size_t fsize = mp::get<0, I>::value;
|
||||
constexpr FP::RoundingMode rounding_mode = mp::get<1, I>::value;
|
||||
constexpr bool exact = mp::get<2, I>::value;
|
||||
using InputSize = mcl::unsigned_integer_of_size<fsize>;
|
||||
|
||||
return FP::FPRoundInt<InputSize>(static_cast<InputSize>(input), fpcr, rounding_mode, exact, fpsr);
|
||||
|
@ -1582,14 +1582,13 @@ static void EmitFPToFixed(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
|
|||
mp::lift_value<FP::RoundingMode::ToNearest_TieAwayFromZero>>;
|
||||
|
||||
static const auto lut = Common::GenerateLookupTableFromList(
|
||||
[](auto args) {
|
||||
[]<typename I>(I) {
|
||||
return std::pair{
|
||||
mp::lower_to_tuple_v<decltype(args)>,
|
||||
mp::lower_to_tuple_v<I>,
|
||||
Common::FptrCast(
|
||||
[](u64 input, FP::FPSR& fpsr, FP::FPCR fpcr) {
|
||||
constexpr auto t = mp::lower_to_tuple_v<decltype(args)>;
|
||||
constexpr size_t fbits = std::get<0>(t);
|
||||
constexpr FP::RoundingMode rounding_mode = std::get<1>(t);
|
||||
constexpr size_t fbits = mp::get<0, I>::value;
|
||||
constexpr FP::RoundingMode rounding_mode = mp::get<1, I>::value;
|
||||
using FPT = mcl::unsigned_integer_of_size<fsize>;
|
||||
|
||||
return FP::FPToFixed<FPT>(isize, static_cast<FPT>(input), fbits, unsigned_, fpcr, rounding_mode, fpsr);
|
||||
|
|
|
@ -47,15 +47,13 @@ void EmitX64::EmitPackedAddS8(EmitContext& ctx, IR::Inst* inst) {
|
|||
const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]);
|
||||
|
||||
if (ge_inst) {
|
||||
const Xbyak::Xmm saturated_sum = ctx.reg_alloc.ScratchXmm();
|
||||
const Xbyak::Xmm xmm_ge = ctx.reg_alloc.ScratchXmm();
|
||||
|
||||
code.pxor(xmm_ge, xmm_ge);
|
||||
code.movdqa(saturated_sum, xmm_a);
|
||||
code.paddsb(saturated_sum, xmm_b);
|
||||
code.pcmpgtb(xmm_ge, saturated_sum);
|
||||
code.pcmpeqb(saturated_sum, saturated_sum);
|
||||
code.pxor(xmm_ge, saturated_sum);
|
||||
code.pcmpeqb(xmm0, xmm0);
|
||||
|
||||
code.movdqa(xmm_ge, xmm_a);
|
||||
code.paddsb(xmm_ge, xmm_b);
|
||||
code.pcmpgtb(xmm_ge, xmm0);
|
||||
|
||||
ctx.reg_alloc.DefineValue(ge_inst, xmm_ge);
|
||||
ctx.EraseInstruction(ge_inst);
|
||||
|
@ -116,15 +114,13 @@ void EmitX64::EmitPackedAddS16(EmitContext& ctx, IR::Inst* inst) {
|
|||
const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]);
|
||||
|
||||
if (ge_inst) {
|
||||
const Xbyak::Xmm saturated_sum = ctx.reg_alloc.ScratchXmm();
|
||||
const Xbyak::Xmm xmm_ge = ctx.reg_alloc.ScratchXmm();
|
||||
|
||||
code.pxor(xmm_ge, xmm_ge);
|
||||
code.movdqa(saturated_sum, xmm_a);
|
||||
code.paddsw(saturated_sum, xmm_b);
|
||||
code.pcmpgtw(xmm_ge, saturated_sum);
|
||||
code.pcmpeqw(saturated_sum, saturated_sum);
|
||||
code.pxor(xmm_ge, saturated_sum);
|
||||
code.pcmpeqw(xmm0, xmm0);
|
||||
|
||||
code.movdqa(xmm_ge, xmm_a);
|
||||
code.paddsw(xmm_ge, xmm_b);
|
||||
code.pcmpgtw(xmm_ge, xmm0);
|
||||
|
||||
ctx.reg_alloc.DefineValue(ge_inst, xmm_ge);
|
||||
ctx.EraseInstruction(ge_inst);
|
||||
|
@ -166,15 +162,13 @@ void EmitX64::EmitPackedSubS8(EmitContext& ctx, IR::Inst* inst) {
|
|||
const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]);
|
||||
|
||||
if (ge_inst) {
|
||||
const Xbyak::Xmm saturated_sum = ctx.reg_alloc.ScratchXmm();
|
||||
const Xbyak::Xmm xmm_ge = ctx.reg_alloc.ScratchXmm();
|
||||
|
||||
code.pxor(xmm_ge, xmm_ge);
|
||||
code.movdqa(saturated_sum, xmm_a);
|
||||
code.psubsb(saturated_sum, xmm_b);
|
||||
code.pcmpgtb(xmm_ge, saturated_sum);
|
||||
code.pcmpeqb(saturated_sum, saturated_sum);
|
||||
code.pxor(xmm_ge, saturated_sum);
|
||||
code.pcmpeqb(xmm0, xmm0);
|
||||
|
||||
code.movdqa(xmm_ge, xmm_a);
|
||||
code.psubsb(xmm_ge, xmm_b);
|
||||
code.pcmpgtb(xmm_ge, xmm0);
|
||||
|
||||
ctx.reg_alloc.DefineValue(ge_inst, xmm_ge);
|
||||
ctx.EraseInstruction(ge_inst);
|
||||
|
@ -244,15 +238,13 @@ void EmitX64::EmitPackedSubS16(EmitContext& ctx, IR::Inst* inst) {
|
|||
const Xbyak::Xmm xmm_b = ctx.reg_alloc.UseXmm(args[1]);
|
||||
|
||||
if (ge_inst) {
|
||||
const Xbyak::Xmm saturated_diff = ctx.reg_alloc.ScratchXmm();
|
||||
const Xbyak::Xmm xmm_ge = ctx.reg_alloc.ScratchXmm();
|
||||
|
||||
code.pxor(xmm_ge, xmm_ge);
|
||||
code.movdqa(saturated_diff, xmm_a);
|
||||
code.psubsw(saturated_diff, xmm_b);
|
||||
code.pcmpgtw(xmm_ge, saturated_diff);
|
||||
code.pcmpeqw(saturated_diff, saturated_diff);
|
||||
code.pxor(xmm_ge, saturated_diff);
|
||||
code.pcmpeqw(xmm0, xmm0);
|
||||
|
||||
code.movdqa(xmm_ge, xmm_a);
|
||||
code.psubsw(xmm_ge, xmm_b);
|
||||
code.pcmpgtw(xmm_ge, xmm0);
|
||||
|
||||
ctx.reg_alloc.DefineValue(ge_inst, xmm_ge);
|
||||
ctx.EraseInstruction(ge_inst);
|
||||
|
|
|
@ -12,6 +12,7 @@
|
|||
#include <mcl/assert.hpp>
|
||||
#include <mcl/mp/metavalue/lift_value.hpp>
|
||||
#include <mcl/mp/typelist/cartesian_product.hpp>
|
||||
#include <mcl/mp/typelist/get.hpp>
|
||||
#include <mcl/mp/typelist/lift_sequence.hpp>
|
||||
#include <mcl/mp/typelist/list.hpp>
|
||||
#include <mcl/mp/typelist/lower_to_tuple.hpp>
|
||||
|
@ -663,13 +664,12 @@ void EmitX64::EmitFPVectorFromHalf32(EmitContext& ctx, IR::Inst* inst) {
|
|||
mp::lift_value<FP::RoundingMode::ToNearest_TieAwayFromZero>>;
|
||||
|
||||
static const auto lut = Common::GenerateLookupTableFromList(
|
||||
[](auto arg) {
|
||||
[]<typename I>(I) {
|
||||
return std::pair{
|
||||
mp::lower_to_tuple_v<decltype(arg)>,
|
||||
mp::lower_to_tuple_v<I>,
|
||||
Common::FptrCast(
|
||||
[](VectorArray<u32>& output, const VectorArray<u16>& input, FP::FPCR fpcr, FP::FPSR& fpsr) {
|
||||
constexpr auto t = mp::lower_to_tuple_v<decltype(arg)>;
|
||||
constexpr FP::RoundingMode rounding_mode = std::get<0>(t);
|
||||
constexpr FP::RoundingMode rounding_mode = mp::get<0, I>::value;
|
||||
|
||||
for (size_t i = 0; i < output.size(); ++i) {
|
||||
output[i] = FP::FPConvert<u32, u16>(input[i], fpcr, rounding_mode, fpsr);
|
||||
|
@ -1421,8 +1421,6 @@ void EmitX64::EmitFPVectorRecipStepFused64(EmitContext& ctx, IR::Inst* inst) {
|
|||
|
||||
template<size_t fsize>
|
||||
void EmitFPVectorRoundInt(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
using FPT = mcl::unsigned_integer_of_size<fsize>;
|
||||
|
||||
const auto rounding = static_cast<FP::RoundingMode>(inst->GetArg(1).GetU8());
|
||||
const bool exact = inst->GetArg(2).GetU1();
|
||||
|
||||
|
@ -1460,14 +1458,14 @@ void EmitFPVectorRoundInt(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
|
|||
using exact_list = mp::list<std::true_type, std::false_type>;
|
||||
|
||||
static const auto lut = Common::GenerateLookupTableFromList(
|
||||
[](auto arg) {
|
||||
[]<typename I>(I) {
|
||||
using FPT = mcl::unsigned_integer_of_size<fsize>; // WORKAROUND: For issue 678 on MSVC
|
||||
return std::pair{
|
||||
mp::lower_to_tuple_v<decltype(arg)>,
|
||||
mp::lower_to_tuple_v<I>,
|
||||
Common::FptrCast(
|
||||
[](VectorArray<FPT>& output, const VectorArray<FPT>& input, FP::FPCR fpcr, FP::FPSR& fpsr) {
|
||||
constexpr auto t = mp::lower_to_tuple_v<decltype(arg)>;
|
||||
constexpr FP::RoundingMode rounding_mode = std::get<0>(t);
|
||||
constexpr bool exact = std::get<1>(t);
|
||||
constexpr FP::RoundingMode rounding_mode = mp::get<0, I>::value;
|
||||
constexpr bool exact = mp::get<1, I>::value;
|
||||
|
||||
for (size_t i = 0; i < output.size(); ++i) {
|
||||
output[i] = static_cast<FPT>(FP::FPRoundInt<FPT>(input[i], fpcr, rounding_mode, exact, fpsr));
|
||||
|
@ -1686,13 +1684,12 @@ void EmitX64::EmitFPVectorToHalf32(EmitContext& ctx, IR::Inst* inst) {
|
|||
mp::lift_value<FP::RoundingMode::ToNearest_TieAwayFromZero>>;
|
||||
|
||||
static const auto lut = Common::GenerateLookupTableFromList(
|
||||
[](auto arg) {
|
||||
[]<typename I>(I) {
|
||||
return std::pair{
|
||||
mp::lower_to_tuple_v<decltype(arg)>,
|
||||
mp::lower_to_tuple_v<I>,
|
||||
Common::FptrCast(
|
||||
[](VectorArray<u16>& output, const VectorArray<u32>& input, FP::FPCR fpcr, FP::FPSR& fpsr) {
|
||||
constexpr auto t = mp::lower_to_tuple_v<decltype(arg)>;
|
||||
constexpr FP::RoundingMode rounding_mode = std::get<0>(t);
|
||||
constexpr FP::RoundingMode rounding_mode = mp::get<0, I>::value;
|
||||
|
||||
for (size_t i = 0; i < output.size(); ++i) {
|
||||
if (i < input.size()) {
|
||||
|
@ -1710,8 +1707,6 @@ void EmitX64::EmitFPVectorToHalf32(EmitContext& ctx, IR::Inst* inst) {
|
|||
|
||||
template<size_t fsize, bool unsigned_>
|
||||
void EmitFPVectorToFixed(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
|
||||
using FPT = mcl::unsigned_integer_of_size<fsize>;
|
||||
|
||||
const size_t fbits = inst->GetArg(1).GetU8();
|
||||
const auto rounding = static_cast<FP::RoundingMode>(inst->GetArg(2).GetU8());
|
||||
[[maybe_unused]] const bool fpcr_controlled = inst->GetArg(3).GetU1();
|
||||
|
@ -1814,6 +1809,7 @@ void EmitFPVectorToFixed(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
|
|||
FCODE(orp)(src, exceed_unsigned);
|
||||
}
|
||||
} else {
|
||||
using FPT = mcl::unsigned_integer_of_size<fsize>; // WORKAROUND: For issue 678 on MSVC
|
||||
constexpr u64 integer_max = static_cast<FPT>(std::numeric_limits<std::conditional_t<unsigned_, FPT, std::make_signed_t<FPT>>>::max());
|
||||
|
||||
code.movaps(xmm0, GetVectorOf<fsize, float_upper_limit_signed>(code));
|
||||
|
@ -1837,14 +1833,14 @@ void EmitFPVectorToFixed(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
|
|||
mp::lift_value<FP::RoundingMode::ToNearest_TieAwayFromZero>>;
|
||||
|
||||
static const auto lut = Common::GenerateLookupTableFromList(
|
||||
[](auto arg) {
|
||||
[]<typename I>(I) {
|
||||
using FPT = mcl::unsigned_integer_of_size<fsize>; // WORKAROUND: For issue 678 on MSVC
|
||||
return std::pair{
|
||||
mp::lower_to_tuple_v<decltype(arg)>,
|
||||
mp::lower_to_tuple_v<I>,
|
||||
Common::FptrCast(
|
||||
[](VectorArray<FPT>& output, const VectorArray<FPT>& input, FP::FPCR fpcr, FP::FPSR& fpsr) {
|
||||
constexpr auto t = mp::lower_to_tuple_v<decltype(arg)>;
|
||||
constexpr size_t fbits = std::get<0>(t);
|
||||
constexpr FP::RoundingMode rounding_mode = std::get<1>(t);
|
||||
constexpr size_t fbits = mp::get<0, I>::value;
|
||||
constexpr FP::RoundingMode rounding_mode = mp::get<1, I>::value;
|
||||
|
||||
for (size_t i = 0; i < output.size(); ++i) {
|
||||
output[i] = static_cast<FPT>(FP::FPToFixed<FPT>(fsize, input[i], fbits, unsigned_, fpcr, rounding_mode, fpsr));
|
||||
|
|
|
@ -76,11 +76,11 @@ MachHandler::MachHandler() {
|
|||
#undef KCHECK
|
||||
|
||||
thread = std::thread(&MachHandler::MessagePump, this);
|
||||
thread.detach();
|
||||
}
|
||||
|
||||
MachHandler::~MachHandler() {
|
||||
mach_port_destroy(mach_task_self(), server_port);
|
||||
thread.join();
|
||||
mach_port_deallocate(mach_task_self(), server_port);
|
||||
}
|
||||
|
||||
void MachHandler::MessagePump() {
|
||||
|
|
|
@ -29,6 +29,12 @@ struct TranslationOptions {
|
|||
/// If this is false, we treat the instruction as a NOP.
|
||||
/// If this is true, we emit an ExceptionRaised instruction.
|
||||
bool hook_hint_instructions = true;
|
||||
|
||||
/// This changes what IR we emit when we translate a memory instruction.
|
||||
/// If this is false, memory accesses are not considered terminal.
|
||||
/// If this is true, memory access are considered terminal. This allows
|
||||
/// accurately emulating protection fault handlers.
|
||||
bool check_halt_on_memory_access = false;
|
||||
};
|
||||
|
||||
/**
|
||||
|
|
|
@ -53,6 +53,15 @@ bool TranslatorVisitor::RaiseException(Exception exception) {
|
|||
return false;
|
||||
}
|
||||
|
||||
bool TranslatorVisitor::MemoryInstructionContinues() {
|
||||
if (options.check_halt_on_memory_access) {
|
||||
ir.SetTerm(IR::Term::LinkBlock{ir.current_location.AdvancePC(static_cast<s32>(current_instruction_size))});
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
IR::UAny TranslatorVisitor::I(size_t bitsize, u64 value) {
|
||||
switch (bitsize) {
|
||||
case 8:
|
||||
|
|
|
@ -41,6 +41,7 @@ struct TranslatorVisitor final {
|
|||
bool UndefinedInstruction();
|
||||
bool DecodeError();
|
||||
bool RaiseException(Exception exception);
|
||||
bool MemoryInstructionContinues();
|
||||
|
||||
struct ImmAndCarry {
|
||||
u32 imm32;
|
||||
|
|
|
@ -119,7 +119,7 @@ bool TranslatorVisitor::v8_VST_multiple(bool D, Reg n, size_t Vd, Imm<4> type, s
|
|||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
return MemoryInstructionContinues();
|
||||
}
|
||||
|
||||
bool TranslatorVisitor::v8_VLD_multiple(bool D, Reg n, size_t Vd, Imm<4> type, size_t size, size_t align, Reg m) {
|
||||
|
@ -176,7 +176,7 @@ bool TranslatorVisitor::v8_VLD_multiple(bool D, Reg n, size_t Vd, Imm<4> type, s
|
|||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
return MemoryInstructionContinues();
|
||||
}
|
||||
|
||||
bool TranslatorVisitor::v8_VLD_all_lanes(bool D, Reg n, size_t Vd, size_t nn, size_t sz, bool T, bool a, Reg m) {
|
||||
|
@ -241,7 +241,7 @@ bool TranslatorVisitor::v8_VLD_all_lanes(bool D, Reg n, size_t Vd, size_t nn, si
|
|||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
return MemoryInstructionContinues();
|
||||
}
|
||||
|
||||
bool TranslatorVisitor::v8_VST_single(bool D, Reg n, size_t Vd, size_t sz, size_t nn, size_t index_align, Reg m) {
|
||||
|
@ -305,7 +305,7 @@ bool TranslatorVisitor::v8_VST_single(bool D, Reg n, size_t Vd, size_t sz, size_
|
|||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
return MemoryInstructionContinues();
|
||||
}
|
||||
|
||||
bool TranslatorVisitor::v8_VLD_single(bool D, Reg n, size_t Vd, size_t sz, size_t nn, size_t index_align, Reg m) {
|
||||
|
@ -370,6 +370,6 @@ bool TranslatorVisitor::v8_VLD_single(bool D, Reg n, size_t Vd, size_t sz, size_
|
|||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
return MemoryInstructionContinues();
|
||||
}
|
||||
} // namespace Dynarmic::A32
|
||||
|
|
|
@ -30,7 +30,8 @@ bool TranslatorVisitor::asimd_VMOV_imm(Imm<1> a, bool D, Imm<1> b, Imm<1> c, Imm
|
|||
};
|
||||
|
||||
// VMVN
|
||||
const auto mvn = [&] {
|
||||
// mvn is a predefined macro in arm64 MSVC
|
||||
const auto mvn_ = [&] {
|
||||
const auto imm64 = ir.Imm64(~imm);
|
||||
if (Q) {
|
||||
ir.SetVector(d_reg, ir.VectorBroadcast(64, imm64));
|
||||
|
@ -89,7 +90,7 @@ bool TranslatorVisitor::asimd_VMOV_imm(Imm<1> a, bool D, Imm<1> b, Imm<1> c, Imm
|
|||
case 0b10101:
|
||||
case 0b11001:
|
||||
case 0b11011:
|
||||
return mvn();
|
||||
return mvn_();
|
||||
case 0b00010:
|
||||
case 0b00110:
|
||||
case 0b01010:
|
||||
|
|
|
@ -83,7 +83,7 @@ bool TranslatorVisitor::arm_LDR_lit(Cond cond, bool U, Reg t, Imm<12> imm12) {
|
|||
}
|
||||
|
||||
ir.SetRegister(t, data);
|
||||
return true;
|
||||
return MemoryInstructionContinues();
|
||||
}
|
||||
|
||||
// LDR <Rt>, [<Rn>, #+/-<imm>]{!}
|
||||
|
@ -120,7 +120,7 @@ bool TranslatorVisitor::arm_LDR_imm(Cond cond, bool P, bool U, bool W, Reg n, Re
|
|||
}
|
||||
|
||||
ir.SetRegister(t, data);
|
||||
return true;
|
||||
return MemoryInstructionContinues();
|
||||
}
|
||||
|
||||
// LDR <Rt>, [<Rn>, #+/-<Rm>]{!}
|
||||
|
@ -150,7 +150,7 @@ bool TranslatorVisitor::arm_LDR_reg(Cond cond, bool P, bool U, bool W, Reg n, Re
|
|||
}
|
||||
|
||||
ir.SetRegister(t, data);
|
||||
return true;
|
||||
return MemoryInstructionContinues();
|
||||
}
|
||||
|
||||
// LDRB <Rt>, [PC, #+/-<imm>]
|
||||
|
@ -170,7 +170,7 @@ bool TranslatorVisitor::arm_LDRB_lit(Cond cond, bool U, Reg t, Imm<12> imm12) {
|
|||
const auto data = ir.ZeroExtendByteToWord(ir.ReadMemory8(ir.Imm32(address), IR::AccType::NORMAL));
|
||||
|
||||
ir.SetRegister(t, data);
|
||||
return true;
|
||||
return MemoryInstructionContinues();
|
||||
}
|
||||
|
||||
// LDRB <Rt>, [<Rn>, #+/-<imm>]{!}
|
||||
|
@ -199,7 +199,7 @@ bool TranslatorVisitor::arm_LDRB_imm(Cond cond, bool P, bool U, bool W, Reg n, R
|
|||
const auto data = ir.ZeroExtendByteToWord(ir.ReadMemory8(address, IR::AccType::NORMAL));
|
||||
|
||||
ir.SetRegister(t, data);
|
||||
return true;
|
||||
return MemoryInstructionContinues();
|
||||
}
|
||||
|
||||
// LDRB <Rt>, [<Rn>, #+/-<Rm>]{!}
|
||||
|
@ -223,7 +223,7 @@ bool TranslatorVisitor::arm_LDRB_reg(Cond cond, bool P, bool U, bool W, Reg n, R
|
|||
const auto data = ir.ZeroExtendByteToWord(ir.ReadMemory8(address, IR::AccType::NORMAL));
|
||||
|
||||
ir.SetRegister(t, data);
|
||||
return true;
|
||||
return MemoryInstructionContinues();
|
||||
}
|
||||
|
||||
// LDRD <Rt>, <Rt2>, [PC, #+/-<imm>]
|
||||
|
@ -257,7 +257,7 @@ bool TranslatorVisitor::arm_LDRD_lit(Cond cond, bool U, Reg t, Imm<4> imm8a, Imm
|
|||
ir.SetRegister(t, ir.LeastSignificantWord(data));
|
||||
ir.SetRegister(t2, ir.MostSignificantWord(data).result);
|
||||
}
|
||||
return true;
|
||||
return MemoryInstructionContinues();
|
||||
}
|
||||
|
||||
// LDRD <Rt>, [<Rn>, #+/-<imm>]{!}
|
||||
|
@ -303,7 +303,7 @@ bool TranslatorVisitor::arm_LDRD_imm(Cond cond, bool P, bool U, bool W, Reg n, R
|
|||
ir.SetRegister(t, ir.LeastSignificantWord(data));
|
||||
ir.SetRegister(t2, ir.MostSignificantWord(data).result);
|
||||
}
|
||||
return true;
|
||||
return MemoryInstructionContinues();
|
||||
}
|
||||
|
||||
// LDRD <Rt>, [<Rn>, #+/-<Rm>]{!}
|
||||
|
@ -343,7 +343,7 @@ bool TranslatorVisitor::arm_LDRD_reg(Cond cond, bool P, bool U, bool W, Reg n, R
|
|||
ir.SetRegister(t, ir.LeastSignificantWord(data));
|
||||
ir.SetRegister(t2, ir.MostSignificantWord(data).result);
|
||||
}
|
||||
return true;
|
||||
return MemoryInstructionContinues();
|
||||
}
|
||||
|
||||
// LDRH <Rt>, [PC, #-/+<imm>]
|
||||
|
@ -368,7 +368,7 @@ bool TranslatorVisitor::arm_LDRH_lit(Cond cond, bool P, bool U, bool W, Reg t, I
|
|||
const auto data = ir.ZeroExtendHalfToWord(ir.ReadMemory16(ir.Imm32(address), IR::AccType::NORMAL));
|
||||
|
||||
ir.SetRegister(t, data);
|
||||
return true;
|
||||
return MemoryInstructionContinues();
|
||||
}
|
||||
|
||||
// LDRH <Rt>, [<Rn>, #+/-<imm>]{!}
|
||||
|
@ -397,7 +397,7 @@ bool TranslatorVisitor::arm_LDRH_imm(Cond cond, bool P, bool U, bool W, Reg n, R
|
|||
const auto data = ir.ZeroExtendHalfToWord(ir.ReadMemory16(address, IR::AccType::NORMAL));
|
||||
|
||||
ir.SetRegister(t, data);
|
||||
return true;
|
||||
return MemoryInstructionContinues();
|
||||
}
|
||||
|
||||
// LDRH <Rt>, [<Rn>, #+/-<Rm>]{!}
|
||||
|
@ -421,7 +421,7 @@ bool TranslatorVisitor::arm_LDRH_reg(Cond cond, bool P, bool U, bool W, Reg n, R
|
|||
const auto data = ir.ZeroExtendHalfToWord(ir.ReadMemory16(address, IR::AccType::NORMAL));
|
||||
|
||||
ir.SetRegister(t, data);
|
||||
return true;
|
||||
return MemoryInstructionContinues();
|
||||
}
|
||||
|
||||
// LDRSB <Rt>, [PC, #+/-<imm>]
|
||||
|
@ -442,7 +442,7 @@ bool TranslatorVisitor::arm_LDRSB_lit(Cond cond, bool U, Reg t, Imm<4> imm8a, Im
|
|||
const auto data = ir.SignExtendByteToWord(ir.ReadMemory8(ir.Imm32(address), IR::AccType::NORMAL));
|
||||
|
||||
ir.SetRegister(t, data);
|
||||
return true;
|
||||
return MemoryInstructionContinues();
|
||||
}
|
||||
|
||||
// LDRSB <Rt>, [<Rn>, #+/-<imm>]{!}
|
||||
|
@ -471,7 +471,7 @@ bool TranslatorVisitor::arm_LDRSB_imm(Cond cond, bool P, bool U, bool W, Reg n,
|
|||
const auto data = ir.SignExtendByteToWord(ir.ReadMemory8(address, IR::AccType::NORMAL));
|
||||
|
||||
ir.SetRegister(t, data);
|
||||
return true;
|
||||
return MemoryInstructionContinues();
|
||||
}
|
||||
|
||||
// LDRSB <Rt>, [<Rn>, #+/-<Rm>]{!}
|
||||
|
@ -495,7 +495,7 @@ bool TranslatorVisitor::arm_LDRSB_reg(Cond cond, bool P, bool U, bool W, Reg n,
|
|||
const auto data = ir.SignExtendByteToWord(ir.ReadMemory8(address, IR::AccType::NORMAL));
|
||||
|
||||
ir.SetRegister(t, data);
|
||||
return true;
|
||||
return MemoryInstructionContinues();
|
||||
}
|
||||
|
||||
// LDRSH <Rt>, [PC, #-/+<imm>]
|
||||
|
@ -515,7 +515,7 @@ bool TranslatorVisitor::arm_LDRSH_lit(Cond cond, bool U, Reg t, Imm<4> imm8a, Im
|
|||
const auto data = ir.SignExtendHalfToWord(ir.ReadMemory16(ir.Imm32(address), IR::AccType::NORMAL));
|
||||
|
||||
ir.SetRegister(t, data);
|
||||
return true;
|
||||
return MemoryInstructionContinues();
|
||||
}
|
||||
|
||||
// LDRSH <Rt>, [<Rn>, #+/-<imm>]{!}
|
||||
|
@ -544,7 +544,7 @@ bool TranslatorVisitor::arm_LDRSH_imm(Cond cond, bool P, bool U, bool W, Reg n,
|
|||
const auto data = ir.SignExtendHalfToWord(ir.ReadMemory16(address, IR::AccType::NORMAL));
|
||||
|
||||
ir.SetRegister(t, data);
|
||||
return true;
|
||||
return MemoryInstructionContinues();
|
||||
}
|
||||
|
||||
// LDRSH <Rt>, [<Rn>, #+/-<Rm>]{!}
|
||||
|
@ -568,7 +568,7 @@ bool TranslatorVisitor::arm_LDRSH_reg(Cond cond, bool P, bool U, bool W, Reg n,
|
|||
const auto data = ir.SignExtendHalfToWord(ir.ReadMemory16(address, IR::AccType::NORMAL));
|
||||
|
||||
ir.SetRegister(t, data);
|
||||
return true;
|
||||
return MemoryInstructionContinues();
|
||||
}
|
||||
|
||||
// STR <Rt>, [<Rn>, #+/-<imm>]{!}
|
||||
|
@ -585,7 +585,7 @@ bool TranslatorVisitor::arm_STR_imm(Cond cond, bool P, bool U, bool W, Reg n, Re
|
|||
const auto offset = ir.Imm32(imm12.ZeroExtend());
|
||||
const auto address = GetAddress(ir, P, U, W, n, offset);
|
||||
ir.WriteMemory32(address, ir.GetRegister(t), IR::AccType::NORMAL);
|
||||
return true;
|
||||
return MemoryInstructionContinues();
|
||||
}
|
||||
|
||||
// STR <Rt>, [<Rn>, #+/-<Rm>]{!}
|
||||
|
@ -606,7 +606,7 @@ bool TranslatorVisitor::arm_STR_reg(Cond cond, bool P, bool U, bool W, Reg n, Re
|
|||
const auto offset = EmitImmShift(ir.GetRegister(m), shift, imm5, ir.GetCFlag()).result;
|
||||
const auto address = GetAddress(ir, P, U, W, n, offset);
|
||||
ir.WriteMemory32(address, ir.GetRegister(t), IR::AccType::NORMAL);
|
||||
return true;
|
||||
return MemoryInstructionContinues();
|
||||
}
|
||||
|
||||
// STRB <Rt>, [<Rn>, #+/-<imm>]{!}
|
||||
|
@ -627,7 +627,7 @@ bool TranslatorVisitor::arm_STRB_imm(Cond cond, bool P, bool U, bool W, Reg n, R
|
|||
const auto offset = ir.Imm32(imm12.ZeroExtend());
|
||||
const auto address = GetAddress(ir, P, U, W, n, offset);
|
||||
ir.WriteMemory8(address, ir.LeastSignificantByte(ir.GetRegister(t)), IR::AccType::NORMAL);
|
||||
return true;
|
||||
return MemoryInstructionContinues();
|
||||
}
|
||||
|
||||
// STRB <Rt>, [<Rn>, #+/-<Rm>]{!}
|
||||
|
@ -648,7 +648,7 @@ bool TranslatorVisitor::arm_STRB_reg(Cond cond, bool P, bool U, bool W, Reg n, R
|
|||
const auto offset = EmitImmShift(ir.GetRegister(m), shift, imm5, ir.GetCFlag()).result;
|
||||
const auto address = GetAddress(ir, P, U, W, n, offset);
|
||||
ir.WriteMemory8(address, ir.LeastSignificantByte(ir.GetRegister(t)), IR::AccType::NORMAL);
|
||||
return true;
|
||||
return MemoryInstructionContinues();
|
||||
}
|
||||
|
||||
// STRD <Rt>, [<Rn>, #+/-<imm>]{!}
|
||||
|
@ -686,7 +686,7 @@ bool TranslatorVisitor::arm_STRD_imm(Cond cond, bool P, bool U, bool W, Reg n, R
|
|||
|
||||
// NOTE: If alignment is exactly off by 4, each word is an atomic access.
|
||||
ir.WriteMemory64(address, data, IR::AccType::ATOMIC);
|
||||
return true;
|
||||
return MemoryInstructionContinues();
|
||||
}
|
||||
|
||||
// STRD <Rt>, [<Rn>, #+/-<Rm>]{!}
|
||||
|
@ -723,7 +723,7 @@ bool TranslatorVisitor::arm_STRD_reg(Cond cond, bool P, bool U, bool W, Reg n, R
|
|||
|
||||
// NOTE: If alignment is exactly off by 4, each word is an atomic access.
|
||||
ir.WriteMemory64(address, data, IR::AccType::ATOMIC);
|
||||
return true;
|
||||
return MemoryInstructionContinues();
|
||||
}
|
||||
|
||||
// STRH <Rt>, [<Rn>, #+/-<imm>]{!}
|
||||
|
@ -746,7 +746,7 @@ bool TranslatorVisitor::arm_STRH_imm(Cond cond, bool P, bool U, bool W, Reg n, R
|
|||
const auto address = GetAddress(ir, P, U, W, n, offset);
|
||||
|
||||
ir.WriteMemory16(address, ir.LeastSignificantHalf(ir.GetRegister(t)), IR::AccType::NORMAL);
|
||||
return true;
|
||||
return MemoryInstructionContinues();
|
||||
}
|
||||
|
||||
// STRH <Rt>, [<Rn>, #+/-<Rm>]{!}
|
||||
|
@ -768,29 +768,31 @@ bool TranslatorVisitor::arm_STRH_reg(Cond cond, bool P, bool U, bool W, Reg n, R
|
|||
const auto address = GetAddress(ir, P, U, W, n, offset);
|
||||
|
||||
ir.WriteMemory16(address, ir.LeastSignificantHalf(ir.GetRegister(t)), IR::AccType::NORMAL);
|
||||
return true;
|
||||
return MemoryInstructionContinues();
|
||||
}
|
||||
|
||||
static bool LDMHelper(A32::IREmitter& ir, bool W, Reg n, RegList list, IR::U32 start_address, IR::U32 writeback_address) {
|
||||
static bool LDMHelper(TranslatorVisitor& v, bool W, Reg n, RegList list, IR::U32 start_address, IR::U32 writeback_address) {
|
||||
auto address = start_address;
|
||||
for (size_t i = 0; i <= 14; i++) {
|
||||
if (mcl::bit::get_bit(i, list)) {
|
||||
ir.SetRegister(static_cast<Reg>(i), ir.ReadMemory32(address, IR::AccType::ATOMIC));
|
||||
address = ir.Add(address, ir.Imm32(4));
|
||||
v.ir.SetRegister(static_cast<Reg>(i), v.ir.ReadMemory32(address, IR::AccType::ATOMIC));
|
||||
address = v.ir.Add(address, v.ir.Imm32(4));
|
||||
}
|
||||
}
|
||||
if (W && !mcl::bit::get_bit(RegNumber(n), list)) {
|
||||
ir.SetRegister(n, writeback_address);
|
||||
v.ir.SetRegister(n, writeback_address);
|
||||
}
|
||||
if (mcl::bit::get_bit<15>(list)) {
|
||||
ir.LoadWritePC(ir.ReadMemory32(address, IR::AccType::ATOMIC));
|
||||
if (n == Reg::R13)
|
||||
ir.SetTerm(IR::Term::PopRSBHint{});
|
||||
v.ir.LoadWritePC(v.ir.ReadMemory32(address, IR::AccType::ATOMIC));
|
||||
if (v.options.check_halt_on_memory_access)
|
||||
v.ir.SetTerm(IR::Term::CheckHalt{IR::Term::ReturnToDispatch{}});
|
||||
else if (n == Reg::R13)
|
||||
v.ir.SetTerm(IR::Term::PopRSBHint{});
|
||||
else
|
||||
ir.SetTerm(IR::Term::FastDispatchHint{});
|
||||
v.ir.SetTerm(IR::Term::FastDispatchHint{});
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
return v.MemoryInstructionContinues();
|
||||
}
|
||||
|
||||
// LDM <Rn>{!}, <reg_list>
|
||||
|
@ -808,7 +810,7 @@ bool TranslatorVisitor::arm_LDM(Cond cond, bool W, Reg n, RegList list) {
|
|||
|
||||
const auto start_address = ir.GetRegister(n);
|
||||
const auto writeback_address = ir.Add(start_address, ir.Imm32(u32(mcl::bit::count_ones(list) * 4)));
|
||||
return LDMHelper(ir, W, n, list, start_address, writeback_address);
|
||||
return LDMHelper(*this, W, n, list, start_address, writeback_address);
|
||||
}
|
||||
|
||||
// LDMDA <Rn>{!}, <reg_list>
|
||||
|
@ -826,7 +828,7 @@ bool TranslatorVisitor::arm_LDMDA(Cond cond, bool W, Reg n, RegList list) {
|
|||
|
||||
const auto start_address = ir.Sub(ir.GetRegister(n), ir.Imm32(u32(4 * mcl::bit::count_ones(list) - 4)));
|
||||
const auto writeback_address = ir.Sub(start_address, ir.Imm32(4));
|
||||
return LDMHelper(ir, W, n, list, start_address, writeback_address);
|
||||
return LDMHelper(*this, W, n, list, start_address, writeback_address);
|
||||
}
|
||||
|
||||
// LDMDB <Rn>{!}, <reg_list>
|
||||
|
@ -844,7 +846,7 @@ bool TranslatorVisitor::arm_LDMDB(Cond cond, bool W, Reg n, RegList list) {
|
|||
|
||||
const auto start_address = ir.Sub(ir.GetRegister(n), ir.Imm32(u32(4 * mcl::bit::count_ones(list))));
|
||||
const auto writeback_address = start_address;
|
||||
return LDMHelper(ir, W, n, list, start_address, writeback_address);
|
||||
return LDMHelper(*this, W, n, list, start_address, writeback_address);
|
||||
}
|
||||
|
||||
// LDMIB <Rn>{!}, <reg_list>
|
||||
|
@ -862,7 +864,7 @@ bool TranslatorVisitor::arm_LDMIB(Cond cond, bool W, Reg n, RegList list) {
|
|||
|
||||
const auto start_address = ir.Add(ir.GetRegister(n), ir.Imm32(4));
|
||||
const auto writeback_address = ir.Add(ir.GetRegister(n), ir.Imm32(u32(4 * mcl::bit::count_ones(list))));
|
||||
return LDMHelper(ir, W, n, list, start_address, writeback_address);
|
||||
return LDMHelper(*this, W, n, list, start_address, writeback_address);
|
||||
}
|
||||
|
||||
bool TranslatorVisitor::arm_LDM_usr() {
|
||||
|
@ -873,21 +875,21 @@ bool TranslatorVisitor::arm_LDM_eret() {
|
|||
return InterpretThisInstruction();
|
||||
}
|
||||
|
||||
static bool STMHelper(A32::IREmitter& ir, bool W, Reg n, RegList list, IR::U32 start_address, IR::U32 writeback_address) {
|
||||
static bool STMHelper(TranslatorVisitor& v, bool W, Reg n, RegList list, IR::U32 start_address, IR::U32 writeback_address) {
|
||||
auto address = start_address;
|
||||
for (size_t i = 0; i <= 14; i++) {
|
||||
if (mcl::bit::get_bit(i, list)) {
|
||||
ir.WriteMemory32(address, ir.GetRegister(static_cast<Reg>(i)), IR::AccType::ATOMIC);
|
||||
address = ir.Add(address, ir.Imm32(4));
|
||||
v.ir.WriteMemory32(address, v.ir.GetRegister(static_cast<Reg>(i)), IR::AccType::ATOMIC);
|
||||
address = v.ir.Add(address, v.ir.Imm32(4));
|
||||
}
|
||||
}
|
||||
if (W) {
|
||||
ir.SetRegister(n, writeback_address);
|
||||
v.ir.SetRegister(n, writeback_address);
|
||||
}
|
||||
if (mcl::bit::get_bit<15>(list)) {
|
||||
ir.WriteMemory32(address, ir.Imm32(ir.PC()), IR::AccType::ATOMIC);
|
||||
v.ir.WriteMemory32(address, v.ir.Imm32(v.ir.PC()), IR::AccType::ATOMIC);
|
||||
}
|
||||
return true;
|
||||
return v.MemoryInstructionContinues();
|
||||
}
|
||||
|
||||
// STM <Rn>{!}, <reg_list>
|
||||
|
@ -902,7 +904,7 @@ bool TranslatorVisitor::arm_STM(Cond cond, bool W, Reg n, RegList list) {
|
|||
|
||||
const auto start_address = ir.GetRegister(n);
|
||||
const auto writeback_address = ir.Add(start_address, ir.Imm32(u32(mcl::bit::count_ones(list) * 4)));
|
||||
return STMHelper(ir, W, n, list, start_address, writeback_address);
|
||||
return STMHelper(*this, W, n, list, start_address, writeback_address);
|
||||
}
|
||||
|
||||
// STMDA <Rn>{!}, <reg_list>
|
||||
|
@ -917,7 +919,7 @@ bool TranslatorVisitor::arm_STMDA(Cond cond, bool W, Reg n, RegList list) {
|
|||
|
||||
const auto start_address = ir.Sub(ir.GetRegister(n), ir.Imm32(u32(4 * mcl::bit::count_ones(list) - 4)));
|
||||
const auto writeback_address = ir.Sub(start_address, ir.Imm32(4));
|
||||
return STMHelper(ir, W, n, list, start_address, writeback_address);
|
||||
return STMHelper(*this, W, n, list, start_address, writeback_address);
|
||||
}
|
||||
|
||||
// STMDB <Rn>{!}, <reg_list>
|
||||
|
@ -932,7 +934,7 @@ bool TranslatorVisitor::arm_STMDB(Cond cond, bool W, Reg n, RegList list) {
|
|||
|
||||
const auto start_address = ir.Sub(ir.GetRegister(n), ir.Imm32(u32(4 * mcl::bit::count_ones(list))));
|
||||
const auto writeback_address = start_address;
|
||||
return STMHelper(ir, W, n, list, start_address, writeback_address);
|
||||
return STMHelper(*this, W, n, list, start_address, writeback_address);
|
||||
}
|
||||
|
||||
// STMIB <Rn>{!}, <reg_list>
|
||||
|
@ -947,7 +949,7 @@ bool TranslatorVisitor::arm_STMIB(Cond cond, bool W, Reg n, RegList list) {
|
|||
|
||||
const auto start_address = ir.Add(ir.GetRegister(n), ir.Imm32(4));
|
||||
const auto writeback_address = ir.Add(ir.GetRegister(n), ir.Imm32(u32(4 * mcl::bit::count_ones(list))));
|
||||
return STMHelper(ir, W, n, list, start_address, writeback_address);
|
||||
return STMHelper(*this, W, n, list, start_address, writeback_address);
|
||||
}
|
||||
|
||||
bool TranslatorVisitor::arm_STM_usr() {
|
||||
|
|
|
@ -29,7 +29,7 @@ bool TranslatorVisitor::arm_SWP(Cond cond, Reg n, Reg t, Reg t2) {
|
|||
ir.WriteMemory32(ir.GetRegister(n), ir.GetRegister(t2), IR::AccType::SWAP);
|
||||
// TODO: Alignment check
|
||||
ir.SetRegister(t, data);
|
||||
return true;
|
||||
return MemoryInstructionContinues();
|
||||
}
|
||||
|
||||
// SWPB<c> <Rt>, <Rt2>, [<Rn>]
|
||||
|
@ -48,7 +48,7 @@ bool TranslatorVisitor::arm_SWPB(Cond cond, Reg n, Reg t, Reg t2) {
|
|||
ir.WriteMemory8(ir.GetRegister(n), ir.LeastSignificantByte(ir.GetRegister(t2)), IR::AccType::SWAP);
|
||||
// TODO: Alignment check
|
||||
ir.SetRegister(t, ir.ZeroExtendByteToWord(data));
|
||||
return true;
|
||||
return MemoryInstructionContinues();
|
||||
}
|
||||
|
||||
// LDA<c> <Rt>, [<Rn>]
|
||||
|
@ -63,7 +63,7 @@ bool TranslatorVisitor::arm_LDA(Cond cond, Reg n, Reg t) {
|
|||
|
||||
const auto address = ir.GetRegister(n);
|
||||
ir.SetRegister(t, ir.ReadMemory32(address, IR::AccType::ORDERED));
|
||||
return true;
|
||||
return MemoryInstructionContinues();
|
||||
}
|
||||
// LDAB<c> <Rt>, [<Rn>]
|
||||
bool TranslatorVisitor::arm_LDAB(Cond cond, Reg n, Reg t) {
|
||||
|
@ -77,7 +77,7 @@ bool TranslatorVisitor::arm_LDAB(Cond cond, Reg n, Reg t) {
|
|||
|
||||
const auto address = ir.GetRegister(n);
|
||||
ir.SetRegister(t, ir.ZeroExtendToWord(ir.ReadMemory8(address, IR::AccType::ORDERED)));
|
||||
return true;
|
||||
return MemoryInstructionContinues();
|
||||
}
|
||||
// LDAH<c> <Rt>, [<Rn>]
|
||||
bool TranslatorVisitor::arm_LDAH(Cond cond, Reg n, Reg t) {
|
||||
|
@ -91,7 +91,7 @@ bool TranslatorVisitor::arm_LDAH(Cond cond, Reg n, Reg t) {
|
|||
|
||||
const auto address = ir.GetRegister(n);
|
||||
ir.SetRegister(t, ir.ZeroExtendToWord(ir.ReadMemory16(address, IR::AccType::ORDERED)));
|
||||
return true;
|
||||
return MemoryInstructionContinues();
|
||||
}
|
||||
|
||||
// LDAEX<c> <Rt>, [<Rn>]
|
||||
|
@ -106,7 +106,7 @@ bool TranslatorVisitor::arm_LDAEX(Cond cond, Reg n, Reg t) {
|
|||
|
||||
const auto address = ir.GetRegister(n);
|
||||
ir.SetRegister(t, ir.ExclusiveReadMemory32(address, IR::AccType::ORDERED));
|
||||
return true;
|
||||
return MemoryInstructionContinues();
|
||||
}
|
||||
|
||||
// LDAEXB<c> <Rt>, [<Rn>]
|
||||
|
@ -121,7 +121,7 @@ bool TranslatorVisitor::arm_LDAEXB(Cond cond, Reg n, Reg t) {
|
|||
|
||||
const auto address = ir.GetRegister(n);
|
||||
ir.SetRegister(t, ir.ZeroExtendByteToWord(ir.ExclusiveReadMemory8(address, IR::AccType::ORDERED)));
|
||||
return true;
|
||||
return MemoryInstructionContinues();
|
||||
}
|
||||
|
||||
// LDAEXD<c> <Rt>, <Rt2>, [<Rn>]
|
||||
|
@ -139,7 +139,7 @@ bool TranslatorVisitor::arm_LDAEXD(Cond cond, Reg n, Reg t) {
|
|||
// DO NOT SWAP hi AND lo IN BIG ENDIAN MODE, THIS IS CORRECT BEHAVIOUR
|
||||
ir.SetRegister(t, lo);
|
||||
ir.SetRegister(t + 1, hi);
|
||||
return true;
|
||||
return MemoryInstructionContinues();
|
||||
}
|
||||
|
||||
// LDAEXH<c> <Rt>, [<Rn>]
|
||||
|
@ -154,7 +154,7 @@ bool TranslatorVisitor::arm_LDAEXH(Cond cond, Reg n, Reg t) {
|
|||
|
||||
const auto address = ir.GetRegister(n);
|
||||
ir.SetRegister(t, ir.ZeroExtendHalfToWord(ir.ExclusiveReadMemory16(address, IR::AccType::ORDERED)));
|
||||
return true;
|
||||
return MemoryInstructionContinues();
|
||||
}
|
||||
|
||||
// STL<c> <Rt>, [<Rn>]
|
||||
|
@ -169,7 +169,7 @@ bool TranslatorVisitor::arm_STL(Cond cond, Reg n, Reg t) {
|
|||
|
||||
const auto address = ir.GetRegister(n);
|
||||
ir.WriteMemory32(address, ir.GetRegister(t), IR::AccType::ORDERED);
|
||||
return true;
|
||||
return MemoryInstructionContinues();
|
||||
}
|
||||
|
||||
// STLB<c> <Rt>, [<Rn>]
|
||||
|
@ -184,7 +184,7 @@ bool TranslatorVisitor::arm_STLB(Cond cond, Reg n, Reg t) {
|
|||
|
||||
const auto address = ir.GetRegister(n);
|
||||
ir.WriteMemory8(address, ir.LeastSignificantByte(ir.GetRegister(t)), IR::AccType::ORDERED);
|
||||
return true;
|
||||
return MemoryInstructionContinues();
|
||||
}
|
||||
|
||||
// STLH<c> <Rd>, <Rt>, [<Rn>]
|
||||
|
@ -199,7 +199,7 @@ bool TranslatorVisitor::arm_STLH(Cond cond, Reg n, Reg t) {
|
|||
|
||||
const auto address = ir.GetRegister(n);
|
||||
ir.WriteMemory16(address, ir.LeastSignificantHalf(ir.GetRegister(t)), IR::AccType::ORDERED);
|
||||
return true;
|
||||
return MemoryInstructionContinues();
|
||||
}
|
||||
|
||||
// STLEXB<c> <Rd>, <Rt>, [<Rn>]
|
||||
|
@ -220,7 +220,7 @@ bool TranslatorVisitor::arm_STLEXB(Cond cond, Reg n, Reg d, Reg t) {
|
|||
const auto value = ir.LeastSignificantByte(ir.GetRegister(t));
|
||||
const auto passed = ir.ExclusiveWriteMemory8(address, value, IR::AccType::ORDERED);
|
||||
ir.SetRegister(d, passed);
|
||||
return true;
|
||||
return MemoryInstructionContinues();
|
||||
}
|
||||
// STLEXD<c> <Rd>, <Rt>, <Rt2>, [<Rn>]
|
||||
bool TranslatorVisitor::arm_STLEXD(Cond cond, Reg n, Reg d, Reg t) {
|
||||
|
@ -242,7 +242,7 @@ bool TranslatorVisitor::arm_STLEXD(Cond cond, Reg n, Reg d, Reg t) {
|
|||
const auto value_hi = ir.GetRegister(t2);
|
||||
const auto passed = ir.ExclusiveWriteMemory64(address, value_lo, value_hi, IR::AccType::ORDERED);
|
||||
ir.SetRegister(d, passed);
|
||||
return true;
|
||||
return MemoryInstructionContinues();
|
||||
}
|
||||
|
||||
// STLEXH<c> <Rd>, <Rt>, [<Rn>]
|
||||
|
@ -263,7 +263,7 @@ bool TranslatorVisitor::arm_STLEXH(Cond cond, Reg n, Reg d, Reg t) {
|
|||
const auto value = ir.LeastSignificantHalf(ir.GetRegister(t));
|
||||
const auto passed = ir.ExclusiveWriteMemory16(address, value, IR::AccType::ORDERED);
|
||||
ir.SetRegister(d, passed);
|
||||
return true;
|
||||
return MemoryInstructionContinues();
|
||||
}
|
||||
|
||||
// STLEX<c> <Rd>, <Rt>, [<Rn>]
|
||||
|
@ -284,7 +284,7 @@ bool TranslatorVisitor::arm_STLEX(Cond cond, Reg n, Reg d, Reg t) {
|
|||
const auto value = ir.GetRegister(t);
|
||||
const auto passed = ir.ExclusiveWriteMemory32(address, value, IR::AccType::ORDERED);
|
||||
ir.SetRegister(d, passed);
|
||||
return true;
|
||||
return MemoryInstructionContinues();
|
||||
}
|
||||
|
||||
// LDREX<c> <Rt>, [<Rn>]
|
||||
|
@ -299,7 +299,7 @@ bool TranslatorVisitor::arm_LDREX(Cond cond, Reg n, Reg t) {
|
|||
|
||||
const auto address = ir.GetRegister(n);
|
||||
ir.SetRegister(t, ir.ExclusiveReadMemory32(address, IR::AccType::ATOMIC));
|
||||
return true;
|
||||
return MemoryInstructionContinues();
|
||||
}
|
||||
|
||||
// LDREXB<c> <Rt>, [<Rn>]
|
||||
|
@ -314,7 +314,7 @@ bool TranslatorVisitor::arm_LDREXB(Cond cond, Reg n, Reg t) {
|
|||
|
||||
const auto address = ir.GetRegister(n);
|
||||
ir.SetRegister(t, ir.ZeroExtendByteToWord(ir.ExclusiveReadMemory8(address, IR::AccType::ATOMIC)));
|
||||
return true;
|
||||
return MemoryInstructionContinues();
|
||||
}
|
||||
|
||||
// LDREXD<c> <Rt>, <Rt2>, [<Rn>]
|
||||
|
@ -332,7 +332,7 @@ bool TranslatorVisitor::arm_LDREXD(Cond cond, Reg n, Reg t) {
|
|||
// DO NOT SWAP hi AND lo IN BIG ENDIAN MODE, THIS IS CORRECT BEHAVIOUR
|
||||
ir.SetRegister(t, lo);
|
||||
ir.SetRegister(t + 1, hi);
|
||||
return true;
|
||||
return MemoryInstructionContinues();
|
||||
}
|
||||
|
||||
// LDREXH<c> <Rt>, [<Rn>]
|
||||
|
@ -347,7 +347,7 @@ bool TranslatorVisitor::arm_LDREXH(Cond cond, Reg n, Reg t) {
|
|||
|
||||
const auto address = ir.GetRegister(n);
|
||||
ir.SetRegister(t, ir.ZeroExtendHalfToWord(ir.ExclusiveReadMemory16(address, IR::AccType::ATOMIC)));
|
||||
return true;
|
||||
return MemoryInstructionContinues();
|
||||
}
|
||||
|
||||
// STREX<c> <Rd>, <Rt>, [<Rn>]
|
||||
|
@ -368,7 +368,7 @@ bool TranslatorVisitor::arm_STREX(Cond cond, Reg n, Reg d, Reg t) {
|
|||
const auto value = ir.GetRegister(t);
|
||||
const auto passed = ir.ExclusiveWriteMemory32(address, value, IR::AccType::ATOMIC);
|
||||
ir.SetRegister(d, passed);
|
||||
return true;
|
||||
return MemoryInstructionContinues();
|
||||
}
|
||||
|
||||
// STREXB<c> <Rd>, <Rt>, [<Rn>]
|
||||
|
@ -389,7 +389,7 @@ bool TranslatorVisitor::arm_STREXB(Cond cond, Reg n, Reg d, Reg t) {
|
|||
const auto value = ir.LeastSignificantByte(ir.GetRegister(t));
|
||||
const auto passed = ir.ExclusiveWriteMemory8(address, value, IR::AccType::ATOMIC);
|
||||
ir.SetRegister(d, passed);
|
||||
return true;
|
||||
return MemoryInstructionContinues();
|
||||
}
|
||||
|
||||
// STREXD<c> <Rd>, <Rt>, <Rt2>, [<Rn>]
|
||||
|
@ -412,7 +412,7 @@ bool TranslatorVisitor::arm_STREXD(Cond cond, Reg n, Reg d, Reg t) {
|
|||
const auto value_hi = ir.GetRegister(t2);
|
||||
const auto passed = ir.ExclusiveWriteMemory64(address, value_lo, value_hi, IR::AccType::ATOMIC);
|
||||
ir.SetRegister(d, passed);
|
||||
return true;
|
||||
return MemoryInstructionContinues();
|
||||
}
|
||||
|
||||
// STREXH<c> <Rd>, <Rt>, [<Rn>]
|
||||
|
@ -433,7 +433,7 @@ bool TranslatorVisitor::arm_STREXH(Cond cond, Reg n, Reg d, Reg t) {
|
|||
const auto value = ir.LeastSignificantHalf(ir.GetRegister(t));
|
||||
const auto passed = ir.ExclusiveWriteMemory16(address, value, IR::AccType::ATOMIC);
|
||||
ir.SetRegister(d, passed);
|
||||
return true;
|
||||
return MemoryInstructionContinues();
|
||||
}
|
||||
|
||||
} // namespace Dynarmic::A32
|
||||
|
|
|
@ -449,7 +449,7 @@ bool TranslatorVisitor::thumb16_LDR_literal(Reg t, Imm<8> imm8) {
|
|||
const auto data = ir.ReadMemory32(ir.Imm32(address), IR::AccType::NORMAL);
|
||||
|
||||
ir.SetRegister(t, data);
|
||||
return true;
|
||||
return MemoryInstructionContinues();
|
||||
}
|
||||
|
||||
// STR <Rt>, [<Rn>, <Rm>]
|
||||
|
@ -459,7 +459,7 @@ bool TranslatorVisitor::thumb16_STR_reg(Reg m, Reg n, Reg t) {
|
|||
const auto data = ir.GetRegister(t);
|
||||
|
||||
ir.WriteMemory32(address, data, IR::AccType::NORMAL);
|
||||
return true;
|
||||
return MemoryInstructionContinues();
|
||||
}
|
||||
|
||||
// STRH <Rt>, [<Rn>, <Rm>]
|
||||
|
@ -469,7 +469,7 @@ bool TranslatorVisitor::thumb16_STRH_reg(Reg m, Reg n, Reg t) {
|
|||
const auto data = ir.LeastSignificantHalf(ir.GetRegister(t));
|
||||
|
||||
ir.WriteMemory16(address, data, IR::AccType::NORMAL);
|
||||
return true;
|
||||
return MemoryInstructionContinues();
|
||||
}
|
||||
|
||||
// STRB <Rt>, [<Rn>, <Rm>]
|
||||
|
@ -479,7 +479,7 @@ bool TranslatorVisitor::thumb16_STRB_reg(Reg m, Reg n, Reg t) {
|
|||
const auto data = ir.LeastSignificantByte(ir.GetRegister(t));
|
||||
|
||||
ir.WriteMemory8(address, data, IR::AccType::NORMAL);
|
||||
return true;
|
||||
return MemoryInstructionContinues();
|
||||
}
|
||||
|
||||
// LDRSB <Rt>, [<Rn>, <Rm>]
|
||||
|
@ -489,7 +489,7 @@ bool TranslatorVisitor::thumb16_LDRSB_reg(Reg m, Reg n, Reg t) {
|
|||
const auto data = ir.SignExtendByteToWord(ir.ReadMemory8(address, IR::AccType::NORMAL));
|
||||
|
||||
ir.SetRegister(t, data);
|
||||
return true;
|
||||
return MemoryInstructionContinues();
|
||||
}
|
||||
|
||||
// LDR <Rt>, [<Rn>, <Rm>]
|
||||
|
@ -499,7 +499,7 @@ bool TranslatorVisitor::thumb16_LDR_reg(Reg m, Reg n, Reg t) {
|
|||
const auto data = ir.ReadMemory32(address, IR::AccType::NORMAL);
|
||||
|
||||
ir.SetRegister(t, data);
|
||||
return true;
|
||||
return MemoryInstructionContinues();
|
||||
}
|
||||
|
||||
// LDRH <Rt>, [<Rn>, <Rm>]
|
||||
|
@ -509,7 +509,7 @@ bool TranslatorVisitor::thumb16_LDRH_reg(Reg m, Reg n, Reg t) {
|
|||
const auto data = ir.ZeroExtendHalfToWord(ir.ReadMemory16(address, IR::AccType::NORMAL));
|
||||
|
||||
ir.SetRegister(t, data);
|
||||
return true;
|
||||
return MemoryInstructionContinues();
|
||||
}
|
||||
|
||||
// LDRB <Rt>, [<Rn>, <Rm>]
|
||||
|
@ -519,7 +519,7 @@ bool TranslatorVisitor::thumb16_LDRB_reg(Reg m, Reg n, Reg t) {
|
|||
const auto data = ir.ZeroExtendByteToWord(ir.ReadMemory8(address, IR::AccType::NORMAL));
|
||||
|
||||
ir.SetRegister(t, data);
|
||||
return true;
|
||||
return MemoryInstructionContinues();
|
||||
}
|
||||
|
||||
// LDRH <Rt>, [<Rn>, <Rm>]
|
||||
|
@ -529,7 +529,7 @@ bool TranslatorVisitor::thumb16_LDRSH_reg(Reg m, Reg n, Reg t) {
|
|||
const auto data = ir.SignExtendHalfToWord(ir.ReadMemory16(address, IR::AccType::NORMAL));
|
||||
|
||||
ir.SetRegister(t, data);
|
||||
return true;
|
||||
return MemoryInstructionContinues();
|
||||
}
|
||||
|
||||
// STR <Rt>, [<Rn>, #<imm>]
|
||||
|
@ -540,7 +540,7 @@ bool TranslatorVisitor::thumb16_STR_imm_t1(Imm<5> imm5, Reg n, Reg t) {
|
|||
const auto data = ir.GetRegister(t);
|
||||
|
||||
ir.WriteMemory32(address, data, IR::AccType::NORMAL);
|
||||
return true;
|
||||
return MemoryInstructionContinues();
|
||||
}
|
||||
|
||||
// LDR <Rt>, [<Rn>, #<imm>]
|
||||
|
@ -551,7 +551,7 @@ bool TranslatorVisitor::thumb16_LDR_imm_t1(Imm<5> imm5, Reg n, Reg t) {
|
|||
const auto data = ir.ReadMemory32(address, IR::AccType::NORMAL);
|
||||
|
||||
ir.SetRegister(t, data);
|
||||
return true;
|
||||
return MemoryInstructionContinues();
|
||||
}
|
||||
|
||||
// STRB <Rt>, [<Rn>, #<imm>]
|
||||
|
@ -573,7 +573,7 @@ bool TranslatorVisitor::thumb16_LDRB_imm(Imm<5> imm5, Reg n, Reg t) {
|
|||
const auto data = ir.ZeroExtendByteToWord(ir.ReadMemory8(address, IR::AccType::NORMAL));
|
||||
|
||||
ir.SetRegister(t, data);
|
||||
return true;
|
||||
return MemoryInstructionContinues();
|
||||
}
|
||||
|
||||
// STRH <Rt>, [<Rn>, #<imm5>]
|
||||
|
@ -583,7 +583,7 @@ bool TranslatorVisitor::thumb16_STRH_imm(Imm<5> imm5, Reg n, Reg t) {
|
|||
const auto data = ir.LeastSignificantHalf(ir.GetRegister(t));
|
||||
|
||||
ir.WriteMemory16(address, data, IR::AccType::NORMAL);
|
||||
return true;
|
||||
return MemoryInstructionContinues();
|
||||
}
|
||||
|
||||
// LDRH <Rt>, [<Rn>, #<imm5>]
|
||||
|
@ -593,7 +593,7 @@ bool TranslatorVisitor::thumb16_LDRH_imm(Imm<5> imm5, Reg n, Reg t) {
|
|||
const auto data = ir.ZeroExtendHalfToWord(ir.ReadMemory16(address, IR::AccType::NORMAL));
|
||||
|
||||
ir.SetRegister(t, data);
|
||||
return true;
|
||||
return MemoryInstructionContinues();
|
||||
}
|
||||
|
||||
// STR <Rt>, [<Rn>, #<imm>]
|
||||
|
@ -605,7 +605,7 @@ bool TranslatorVisitor::thumb16_STR_imm_t2(Reg t, Imm<8> imm8) {
|
|||
const auto data = ir.GetRegister(t);
|
||||
|
||||
ir.WriteMemory32(address, data, IR::AccType::NORMAL);
|
||||
return true;
|
||||
return MemoryInstructionContinues();
|
||||
}
|
||||
|
||||
// LDR <Rt>, [<Rn>, #<imm>]
|
||||
|
@ -617,7 +617,7 @@ bool TranslatorVisitor::thumb16_LDR_imm_t2(Reg t, Imm<8> imm8) {
|
|||
const auto data = ir.ReadMemory32(address, IR::AccType::NORMAL);
|
||||
|
||||
ir.SetRegister(t, data);
|
||||
return true;
|
||||
return MemoryInstructionContinues();
|
||||
}
|
||||
|
||||
// ADR <Rd>, <label>
|
||||
|
@ -775,7 +775,7 @@ bool TranslatorVisitor::thumb16_PUSH(bool M, RegList reg_list) {
|
|||
|
||||
ir.SetRegister(Reg::SP, final_address);
|
||||
// TODO(optimization): Possible location for an RSB push.
|
||||
return true;
|
||||
return MemoryInstructionContinues();
|
||||
}
|
||||
|
||||
// POP <reg_list>
|
||||
|
@ -804,11 +804,15 @@ bool TranslatorVisitor::thumb16_POP(bool P, RegList reg_list) {
|
|||
ir.LoadWritePC(data);
|
||||
address = ir.Add(address, ir.Imm32(4));
|
||||
ir.SetRegister(Reg::SP, address);
|
||||
ir.SetTerm(IR::Term::PopRSBHint{});
|
||||
if (options.check_halt_on_memory_access) {
|
||||
ir.SetTerm(IR::Term::CheckHalt{IR::Term::PopRSBHint{}});
|
||||
} else {
|
||||
ir.SetTerm(IR::Term::PopRSBHint{});
|
||||
}
|
||||
return false;
|
||||
} else {
|
||||
ir.SetRegister(Reg::SP, address);
|
||||
return true;
|
||||
return MemoryInstructionContinues();
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -887,7 +891,7 @@ bool TranslatorVisitor::thumb16_STMIA(Reg n, RegList reg_list) {
|
|||
}
|
||||
|
||||
ir.SetRegister(n, address);
|
||||
return true;
|
||||
return MemoryInstructionContinues();
|
||||
}
|
||||
|
||||
// LDM <Rn>!, <reg_list>
|
||||
|
@ -910,7 +914,7 @@ bool TranslatorVisitor::thumb16_LDMIA(Reg n, RegList reg_list) {
|
|||
if (write_back) {
|
||||
ir.SetRegister(n, address);
|
||||
}
|
||||
return true;
|
||||
return MemoryInstructionContinues();
|
||||
}
|
||||
|
||||
// CB{N}Z <Rn>, <label>
|
||||
|
|
|
@ -34,7 +34,7 @@ static bool LoadByteLiteral(TranslatorVisitor& v, bool U, Reg t, Imm<12> imm12,
|
|||
const auto data = (v.ir.*ext_fn)(v.ir.ReadMemory8(v.ir.Imm32(address), IR::AccType::NORMAL));
|
||||
|
||||
v.ir.SetRegister(t, data);
|
||||
return true;
|
||||
return v.MemoryInstructionContinues();
|
||||
}
|
||||
|
||||
static bool LoadByteRegister(TranslatorVisitor& v, Reg n, Reg t, Imm<2> imm2, Reg m, ExtensionFunction ext_fn) {
|
||||
|
@ -49,7 +49,7 @@ static bool LoadByteRegister(TranslatorVisitor& v, Reg n, Reg t, Imm<2> imm2, Re
|
|||
const auto data = (v.ir.*ext_fn)(v.ir.ReadMemory8(address, IR::AccType::NORMAL));
|
||||
|
||||
v.ir.SetRegister(t, data);
|
||||
return true;
|
||||
return v.MemoryInstructionContinues();
|
||||
}
|
||||
|
||||
static bool LoadByteImmediate(TranslatorVisitor& v, Reg n, Reg t, bool P, bool U, bool W, Imm<12> imm12, ExtensionFunction ext_fn) {
|
||||
|
@ -64,7 +64,7 @@ static bool LoadByteImmediate(TranslatorVisitor& v, Reg n, Reg t, bool P, bool U
|
|||
if (W) {
|
||||
v.ir.SetRegister(n, offset_address);
|
||||
}
|
||||
return true;
|
||||
return v.MemoryInstructionContinues();
|
||||
}
|
||||
|
||||
bool TranslatorVisitor::thumb32_PLD_lit(bool /*U*/, Imm<12> /*imm12*/) {
|
||||
|
|
|
@ -16,7 +16,7 @@ static bool LoadHalfLiteral(TranslatorVisitor& v, bool U, Reg t, Imm<12> imm12,
|
|||
const auto data = (v.ir.*ext_fn)(v.ir.ReadMemory16(v.ir.Imm32(address), IR::AccType::NORMAL));
|
||||
|
||||
v.ir.SetRegister(t, data);
|
||||
return true;
|
||||
return v.MemoryInstructionContinues();
|
||||
}
|
||||
|
||||
static bool LoadHalfRegister(TranslatorVisitor& v, Reg n, Reg t, Imm<2> imm2, Reg m, ExtensionFunction ext_fn) {
|
||||
|
@ -31,7 +31,7 @@ static bool LoadHalfRegister(TranslatorVisitor& v, Reg n, Reg t, Imm<2> imm2, Re
|
|||
const IR::U32 data = (v.ir.*ext_fn)(v.ir.ReadMemory16(address, IR::AccType::NORMAL));
|
||||
|
||||
v.ir.SetRegister(t, data);
|
||||
return true;
|
||||
return v.MemoryInstructionContinues();
|
||||
}
|
||||
|
||||
static bool LoadHalfImmediate(TranslatorVisitor& v, Reg n, Reg t, bool P, bool U, bool W, Imm<12> imm12, ExtensionFunction ext_fn) {
|
||||
|
@ -48,7 +48,7 @@ static bool LoadHalfImmediate(TranslatorVisitor& v, Reg n, Reg t, bool P, bool U
|
|||
}
|
||||
|
||||
v.ir.SetRegister(t, data);
|
||||
return true;
|
||||
return v.MemoryInstructionContinues();
|
||||
}
|
||||
|
||||
bool TranslatorVisitor::thumb32_LDRH_lit(bool U, Reg t, Imm<12> imm12) {
|
||||
|
|
|
@ -36,7 +36,11 @@ static bool TableBranch(TranslatorVisitor& v, Reg n, Reg m, bool half) {
|
|||
|
||||
v.ir.UpdateUpperLocationDescriptor();
|
||||
v.ir.BranchWritePC(branch_value);
|
||||
v.ir.SetTerm(IR::Term::FastDispatchHint{});
|
||||
if (v.options.check_halt_on_memory_access) {
|
||||
v.ir.SetTerm(IR::Term::CheckHalt{IR::Term::ReturnToDispatch{}});
|
||||
} else {
|
||||
v.ir.SetTerm(IR::Term::FastDispatchHint{});
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
|
@ -68,7 +72,7 @@ static bool LoadDualImmediate(TranslatorVisitor& v, bool P, bool U, bool W, Reg
|
|||
if (W) {
|
||||
v.ir.SetRegister(n, offset_address);
|
||||
}
|
||||
return true;
|
||||
return v.MemoryInstructionContinues();
|
||||
}
|
||||
|
||||
static bool LoadDualLiteral(TranslatorVisitor& v, bool U, bool W, Reg t, Reg t2, Imm<8> imm8) {
|
||||
|
@ -94,7 +98,7 @@ static bool LoadDualLiteral(TranslatorVisitor& v, bool U, bool W, Reg t, Reg t2,
|
|||
v.ir.SetRegister(t2, v.ir.MostSignificantWord(data).result);
|
||||
}
|
||||
|
||||
return true;
|
||||
return v.MemoryInstructionContinues();
|
||||
}
|
||||
|
||||
static bool StoreDual(TranslatorVisitor& v, bool P, bool U, bool W, Reg n, Reg t, Reg t2, Imm<8> imm8) {
|
||||
|
@ -123,7 +127,7 @@ static bool StoreDual(TranslatorVisitor& v, bool P, bool U, bool W, Reg n, Reg t
|
|||
if (W) {
|
||||
v.ir.SetRegister(n, offset_address);
|
||||
}
|
||||
return true;
|
||||
return v.MemoryInstructionContinues();
|
||||
}
|
||||
|
||||
bool TranslatorVisitor::thumb32_LDA(Reg n, Reg t) {
|
||||
|
@ -169,7 +173,7 @@ bool TranslatorVisitor::thumb32_LDREX(Reg n, Reg t, Imm<8> imm8) {
|
|||
const auto value = ir.ExclusiveReadMemory32(address, IR::AccType::ATOMIC);
|
||||
|
||||
ir.SetRegister(t, value);
|
||||
return true;
|
||||
return MemoryInstructionContinues();
|
||||
}
|
||||
|
||||
bool TranslatorVisitor::thumb32_LDREXB(Reg n, Reg t) {
|
||||
|
@ -181,7 +185,7 @@ bool TranslatorVisitor::thumb32_LDREXB(Reg n, Reg t) {
|
|||
const auto value = ir.ZeroExtendToWord(ir.ExclusiveReadMemory8(address, IR::AccType::ATOMIC));
|
||||
|
||||
ir.SetRegister(t, value);
|
||||
return true;
|
||||
return MemoryInstructionContinues();
|
||||
}
|
||||
|
||||
bool TranslatorVisitor::thumb32_LDREXD(Reg n, Reg t, Reg t2) {
|
||||
|
@ -195,7 +199,7 @@ bool TranslatorVisitor::thumb32_LDREXD(Reg n, Reg t, Reg t2) {
|
|||
// DO NOT SWAP hi AND lo IN BIG ENDIAN MODE, THIS IS CORRECT BEHAVIOUR
|
||||
ir.SetRegister(t, lo);
|
||||
ir.SetRegister(t2, hi);
|
||||
return true;
|
||||
return MemoryInstructionContinues();
|
||||
}
|
||||
|
||||
bool TranslatorVisitor::thumb32_LDREXH(Reg n, Reg t) {
|
||||
|
@ -207,7 +211,7 @@ bool TranslatorVisitor::thumb32_LDREXH(Reg n, Reg t) {
|
|||
const auto value = ir.ZeroExtendToWord(ir.ExclusiveReadMemory16(address, IR::AccType::ATOMIC));
|
||||
|
||||
ir.SetRegister(t, value);
|
||||
return true;
|
||||
return MemoryInstructionContinues();
|
||||
}
|
||||
|
||||
bool TranslatorVisitor::thumb32_STL(Reg n, Reg t) {
|
||||
|
@ -217,7 +221,7 @@ bool TranslatorVisitor::thumb32_STL(Reg n, Reg t) {
|
|||
|
||||
const auto address = ir.GetRegister(n);
|
||||
ir.WriteMemory32(address, ir.GetRegister(t), IR::AccType::ORDERED);
|
||||
return true;
|
||||
return MemoryInstructionContinues();
|
||||
}
|
||||
|
||||
bool TranslatorVisitor::thumb32_STREX(Reg n, Reg t, Reg d, Imm<8> imm8) {
|
||||
|
@ -232,7 +236,7 @@ bool TranslatorVisitor::thumb32_STREX(Reg n, Reg t, Reg d, Imm<8> imm8) {
|
|||
const auto value = ir.GetRegister(t);
|
||||
const auto passed = ir.ExclusiveWriteMemory32(address, value, IR::AccType::ATOMIC);
|
||||
ir.SetRegister(d, passed);
|
||||
return true;
|
||||
return MemoryInstructionContinues();
|
||||
}
|
||||
|
||||
bool TranslatorVisitor::thumb32_STREXB(Reg n, Reg t, Reg d) {
|
||||
|
@ -247,7 +251,7 @@ bool TranslatorVisitor::thumb32_STREXB(Reg n, Reg t, Reg d) {
|
|||
const auto value = ir.LeastSignificantByte(ir.GetRegister(t));
|
||||
const auto passed = ir.ExclusiveWriteMemory8(address, value, IR::AccType::ATOMIC);
|
||||
ir.SetRegister(d, passed);
|
||||
return true;
|
||||
return MemoryInstructionContinues();
|
||||
}
|
||||
|
||||
bool TranslatorVisitor::thumb32_STREXD(Reg n, Reg t, Reg t2, Reg d) {
|
||||
|
@ -263,7 +267,7 @@ bool TranslatorVisitor::thumb32_STREXD(Reg n, Reg t, Reg t2, Reg d) {
|
|||
const auto value_hi = ir.GetRegister(t2);
|
||||
const auto passed = ir.ExclusiveWriteMemory64(address, value_lo, value_hi, IR::AccType::ATOMIC);
|
||||
ir.SetRegister(d, passed);
|
||||
return true;
|
||||
return MemoryInstructionContinues();
|
||||
}
|
||||
|
||||
bool TranslatorVisitor::thumb32_STREXH(Reg n, Reg t, Reg d) {
|
||||
|
@ -278,7 +282,7 @@ bool TranslatorVisitor::thumb32_STREXH(Reg n, Reg t, Reg d) {
|
|||
const auto value = ir.LeastSignificantHalf(ir.GetRegister(t));
|
||||
const auto passed = ir.ExclusiveWriteMemory16(address, value, IR::AccType::ATOMIC);
|
||||
ir.SetRegister(d, passed);
|
||||
return true;
|
||||
return MemoryInstructionContinues();
|
||||
}
|
||||
|
||||
bool TranslatorVisitor::thumb32_TBB(Reg n, Reg m) {
|
||||
|
|
|
@ -12,42 +12,44 @@ static bool ITBlockCheck(const A32::IREmitter& ir) {
|
|||
return ir.current_location.IT().IsInITBlock() && !ir.current_location.IT().IsLastInITBlock();
|
||||
}
|
||||
|
||||
static bool LDMHelper(A32::IREmitter& ir, bool W, Reg n, u32 list, const IR::U32& start_address, const IR::U32& writeback_address) {
|
||||
static bool LDMHelper(TranslatorVisitor& v, bool W, Reg n, u32 list, const IR::U32& start_address, const IR::U32& writeback_address) {
|
||||
auto address = start_address;
|
||||
for (size_t i = 0; i <= 14; i++) {
|
||||
if (mcl::bit::get_bit(i, list)) {
|
||||
ir.SetRegister(static_cast<Reg>(i), ir.ReadMemory32(address, IR::AccType::ATOMIC));
|
||||
address = ir.Add(address, ir.Imm32(4));
|
||||
v.ir.SetRegister(static_cast<Reg>(i), v.ir.ReadMemory32(address, IR::AccType::ATOMIC));
|
||||
address = v.ir.Add(address, v.ir.Imm32(4));
|
||||
}
|
||||
}
|
||||
if (W && !mcl::bit::get_bit(RegNumber(n), list)) {
|
||||
ir.SetRegister(n, writeback_address);
|
||||
v.ir.SetRegister(n, writeback_address);
|
||||
}
|
||||
if (mcl::bit::get_bit<15>(list)) {
|
||||
ir.UpdateUpperLocationDescriptor();
|
||||
ir.LoadWritePC(ir.ReadMemory32(address, IR::AccType::ATOMIC));
|
||||
if (n == Reg::R13) {
|
||||
ir.SetTerm(IR::Term::PopRSBHint{});
|
||||
v.ir.UpdateUpperLocationDescriptor();
|
||||
v.ir.LoadWritePC(v.ir.ReadMemory32(address, IR::AccType::ATOMIC));
|
||||
if (v.options.check_halt_on_memory_access) {
|
||||
v.ir.SetTerm(IR::Term::CheckHalt{IR::Term::ReturnToDispatch{}});
|
||||
} else if (n == Reg::R13) {
|
||||
v.ir.SetTerm(IR::Term::PopRSBHint{});
|
||||
} else {
|
||||
ir.SetTerm(IR::Term::FastDispatchHint{});
|
||||
v.ir.SetTerm(IR::Term::FastDispatchHint{});
|
||||
}
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
return v.MemoryInstructionContinues();
|
||||
}
|
||||
|
||||
static bool STMHelper(A32::IREmitter& ir, bool W, Reg n, u32 list, const IR::U32& start_address, const IR::U32& writeback_address) {
|
||||
static bool STMHelper(TranslatorVisitor& v, bool W, Reg n, u32 list, const IR::U32& start_address, const IR::U32& writeback_address) {
|
||||
auto address = start_address;
|
||||
for (size_t i = 0; i <= 14; i++) {
|
||||
if (mcl::bit::get_bit(i, list)) {
|
||||
ir.WriteMemory32(address, ir.GetRegister(static_cast<Reg>(i)), IR::AccType::ATOMIC);
|
||||
address = ir.Add(address, ir.Imm32(4));
|
||||
v.ir.WriteMemory32(address, v.ir.GetRegister(static_cast<Reg>(i)), IR::AccType::ATOMIC);
|
||||
address = v.ir.Add(address, v.ir.Imm32(4));
|
||||
}
|
||||
}
|
||||
if (W) {
|
||||
ir.SetRegister(n, writeback_address);
|
||||
v.ir.SetRegister(n, writeback_address);
|
||||
}
|
||||
return true;
|
||||
return v.MemoryInstructionContinues();
|
||||
}
|
||||
|
||||
bool TranslatorVisitor::thumb32_LDMDB(bool W, Reg n, Imm<16> reg_list) {
|
||||
|
@ -72,7 +74,7 @@ bool TranslatorVisitor::thumb32_LDMDB(bool W, Reg n, Imm<16> reg_list) {
|
|||
|
||||
// Start address is the same as the writeback address.
|
||||
const IR::U32 start_address = ir.Sub(ir.GetRegister(n), ir.Imm32(4 * num_regs));
|
||||
return LDMHelper(ir, W, n, regs_imm, start_address, start_address);
|
||||
return LDMHelper(*this, W, n, regs_imm, start_address, start_address);
|
||||
}
|
||||
|
||||
bool TranslatorVisitor::thumb32_LDMIA(bool W, Reg n, Imm<16> reg_list) {
|
||||
|
@ -97,7 +99,7 @@ bool TranslatorVisitor::thumb32_LDMIA(bool W, Reg n, Imm<16> reg_list) {
|
|||
|
||||
const auto start_address = ir.GetRegister(n);
|
||||
const auto writeback_address = ir.Add(start_address, ir.Imm32(num_regs * 4));
|
||||
return LDMHelper(ir, W, n, regs_imm, start_address, writeback_address);
|
||||
return LDMHelper(*this, W, n, regs_imm, start_address, writeback_address);
|
||||
}
|
||||
|
||||
bool TranslatorVisitor::thumb32_POP(Imm<16> reg_list) {
|
||||
|
@ -124,7 +126,7 @@ bool TranslatorVisitor::thumb32_STMIA(bool W, Reg n, Imm<15> reg_list) {
|
|||
|
||||
const auto start_address = ir.GetRegister(n);
|
||||
const auto writeback_address = ir.Add(start_address, ir.Imm32(num_regs * 4));
|
||||
return STMHelper(ir, W, n, regs_imm, start_address, writeback_address);
|
||||
return STMHelper(*this, W, n, regs_imm, start_address, writeback_address);
|
||||
}
|
||||
|
||||
bool TranslatorVisitor::thumb32_STMDB(bool W, Reg n, Imm<15> reg_list) {
|
||||
|
@ -143,7 +145,7 @@ bool TranslatorVisitor::thumb32_STMDB(bool W, Reg n, Imm<15> reg_list) {
|
|||
|
||||
// Start address is the same as the writeback address.
|
||||
const IR::U32 start_address = ir.Sub(ir.GetRegister(n), ir.Imm32(4 * num_regs));
|
||||
return STMHelper(ir, W, n, regs_imm, start_address, start_address);
|
||||
return STMHelper(*this, W, n, regs_imm, start_address, start_address);
|
||||
}
|
||||
|
||||
} // namespace Dynarmic::A32
|
||||
|
|
|
@ -23,12 +23,16 @@ bool TranslatorVisitor::thumb32_LDR_lit(bool U, Reg t, Imm<12> imm12) {
|
|||
if (t == Reg::PC) {
|
||||
ir.UpdateUpperLocationDescriptor();
|
||||
ir.LoadWritePC(data);
|
||||
ir.SetTerm(IR::Term::FastDispatchHint{});
|
||||
if (options.check_halt_on_memory_access) {
|
||||
ir.SetTerm(IR::Term::CheckHalt{IR::Term::ReturnToDispatch{}});
|
||||
} else {
|
||||
ir.SetTerm(IR::Term::FastDispatchHint{});
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
ir.SetRegister(t, data);
|
||||
return true;
|
||||
return MemoryInstructionContinues();
|
||||
}
|
||||
|
||||
bool TranslatorVisitor::thumb32_LDR_imm8(Reg n, Reg t, bool P, bool U, bool W, Imm<8> imm8) {
|
||||
|
@ -58,7 +62,9 @@ bool TranslatorVisitor::thumb32_LDR_imm8(Reg n, Reg t, bool P, bool U, bool W, I
|
|||
ir.UpdateUpperLocationDescriptor();
|
||||
ir.LoadWritePC(data);
|
||||
|
||||
if (!P && W && n == Reg::R13) {
|
||||
if (options.check_halt_on_memory_access) {
|
||||
ir.SetTerm(IR::Term::CheckHalt{IR::Term::ReturnToDispatch{}});
|
||||
} else if (!P && W && n == Reg::R13) {
|
||||
ir.SetTerm(IR::Term::PopRSBHint{});
|
||||
} else {
|
||||
ir.SetTerm(IR::Term::FastDispatchHint{});
|
||||
|
@ -68,7 +74,7 @@ bool TranslatorVisitor::thumb32_LDR_imm8(Reg n, Reg t, bool P, bool U, bool W, I
|
|||
}
|
||||
|
||||
ir.SetRegister(t, data);
|
||||
return true;
|
||||
return MemoryInstructionContinues();
|
||||
}
|
||||
|
||||
bool TranslatorVisitor::thumb32_LDR_imm12(Reg n, Reg t, Imm<12> imm12) {
|
||||
|
@ -84,12 +90,16 @@ bool TranslatorVisitor::thumb32_LDR_imm12(Reg n, Reg t, Imm<12> imm12) {
|
|||
if (t == Reg::PC) {
|
||||
ir.UpdateUpperLocationDescriptor();
|
||||
ir.LoadWritePC(data);
|
||||
ir.SetTerm(IR::Term::FastDispatchHint{});
|
||||
if (options.check_halt_on_memory_access) {
|
||||
ir.SetTerm(IR::Term::CheckHalt{IR::Term::ReturnToDispatch{}});
|
||||
} else {
|
||||
ir.SetTerm(IR::Term::FastDispatchHint{});
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
ir.SetRegister(t, data);
|
||||
return true;
|
||||
return MemoryInstructionContinues();
|
||||
}
|
||||
|
||||
bool TranslatorVisitor::thumb32_LDR_reg(Reg n, Reg t, Imm<2> imm2, Reg m) {
|
||||
|
@ -109,12 +119,16 @@ bool TranslatorVisitor::thumb32_LDR_reg(Reg n, Reg t, Imm<2> imm2, Reg m) {
|
|||
if (t == Reg::PC) {
|
||||
ir.UpdateUpperLocationDescriptor();
|
||||
ir.LoadWritePC(data);
|
||||
ir.SetTerm(IR::Term::FastDispatchHint{});
|
||||
if (options.check_halt_on_memory_access) {
|
||||
ir.SetTerm(IR::Term::CheckHalt{IR::Term::ReturnToDispatch{}});
|
||||
} else {
|
||||
ir.SetTerm(IR::Term::FastDispatchHint{});
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
ir.SetRegister(t, data);
|
||||
return true;
|
||||
return MemoryInstructionContinues();
|
||||
}
|
||||
|
||||
bool TranslatorVisitor::thumb32_LDRT(Reg n, Reg t, Imm<8> imm8) {
|
||||
|
|
|
@ -1201,7 +1201,7 @@ bool TranslatorVisitor::vfp_VPOP(Cond cond, bool D, size_t Vd, bool sz, Imm<8> i
|
|||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
return MemoryInstructionContinues();
|
||||
}
|
||||
|
||||
// VPUSH.{F32,F64} <list>
|
||||
|
@ -1242,7 +1242,7 @@ bool TranslatorVisitor::vfp_VPUSH(Cond cond, bool D, size_t Vd, bool sz, Imm<8>
|
|||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
return MemoryInstructionContinues();
|
||||
}
|
||||
|
||||
// VLDR<c> <Dd>, [<Rn>{, #+/-<imm>}]
|
||||
|
@ -1268,7 +1268,7 @@ bool TranslatorVisitor::vfp_VLDR(Cond cond, bool U, bool D, Reg n, size_t Vd, bo
|
|||
ir.SetExtendedRegister(d, ir.ReadMemory32(address, IR::AccType::ATOMIC));
|
||||
}
|
||||
|
||||
return true;
|
||||
return MemoryInstructionContinues();
|
||||
}
|
||||
|
||||
// VSTR<c> <Dd>, [<Rn>{, #+/-<imm>}]
|
||||
|
@ -1295,7 +1295,7 @@ bool TranslatorVisitor::vfp_VSTR(Cond cond, bool U, bool D, Reg n, size_t Vd, bo
|
|||
ir.WriteMemory32(address, ir.GetExtendedRegister(d), IR::AccType::ATOMIC);
|
||||
}
|
||||
|
||||
return true;
|
||||
return MemoryInstructionContinues();
|
||||
}
|
||||
|
||||
// VSTM{mode}<c> <Rn>{!}, <list of double registers>
|
||||
|
@ -1347,7 +1347,7 @@ bool TranslatorVisitor::vfp_VSTM_a1(Cond cond, bool p, bool u, bool D, bool w, R
|
|||
address = ir.Add(address, ir.Imm32(4));
|
||||
}
|
||||
|
||||
return true;
|
||||
return MemoryInstructionContinues();
|
||||
}
|
||||
|
||||
// VSTM{mode}<c> <Rn>{!}, <list of single registers>
|
||||
|
@ -1390,7 +1390,7 @@ bool TranslatorVisitor::vfp_VSTM_a2(Cond cond, bool p, bool u, bool D, bool w, R
|
|||
address = ir.Add(address, ir.Imm32(4));
|
||||
}
|
||||
|
||||
return true;
|
||||
return MemoryInstructionContinues();
|
||||
}
|
||||
|
||||
// VLDM{mode}<c> <Rn>{!}, <list of double registers>
|
||||
|
@ -1440,7 +1440,7 @@ bool TranslatorVisitor::vfp_VLDM_a1(Cond cond, bool p, bool u, bool D, bool w, R
|
|||
ir.SetExtendedRegister(d + i, ir.Pack2x32To1x64(word1, word2));
|
||||
}
|
||||
|
||||
return true;
|
||||
return MemoryInstructionContinues();
|
||||
}
|
||||
|
||||
// VLDM{mode}<c> <Rn>{!}, <list of single registers>
|
||||
|
@ -1483,7 +1483,7 @@ bool TranslatorVisitor::vfp_VLDM_a2(Cond cond, bool p, bool u, bool D, bool w, R
|
|||
ir.SetExtendedRegister(d + i, word);
|
||||
}
|
||||
|
||||
return true;
|
||||
return MemoryInstructionContinues();
|
||||
}
|
||||
|
||||
} // namespace Dynarmic::A32
|
||||
|
|
|
@ -34,6 +34,12 @@ struct TranslationOptions {
|
|||
/// If this is false, we treat the instruction as a NOP.
|
||||
/// If this is true, we emit an ExceptionRaised instruction.
|
||||
bool hook_hint_instructions = true;
|
||||
|
||||
/// This changes what IR we emit when we translate a memory instruction.
|
||||
/// If this is false, memory accesses are not considered terminal.
|
||||
/// If this is true, memory access are considered terminal. This allows
|
||||
/// accurately emulating protection fault handlers.
|
||||
bool check_halt_on_memory_access = false;
|
||||
};
|
||||
|
||||
/**
|
||||
|
|
|
@ -41,6 +41,15 @@ bool TranslatorVisitor::RaiseException(Exception exception) {
|
|||
return false;
|
||||
}
|
||||
|
||||
bool TranslatorVisitor::MemoryInstructionContinues() {
|
||||
if (options.check_halt_on_memory_access) {
|
||||
ir.SetTerm(IR::Term::LinkBlock{ir.current_location->AdvancePC(4)});
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
std::optional<TranslatorVisitor::BitMasks> TranslatorVisitor::DecodeBitMasks(bool immN, Imm<6> imms, Imm<6> immr, bool immediate) {
|
||||
const int len = mcl::bit::highest_set_bit((immN ? 1 << 6 : 0) | (imms.ZeroExtend() ^ 0b111111));
|
||||
if (len < 1) {
|
||||
|
|
|
@ -30,6 +30,7 @@ struct TranslatorVisitor final {
|
|||
bool ReservedValue();
|
||||
bool UnallocatedEncoding();
|
||||
bool RaiseException(Exception exception);
|
||||
bool MemoryInstructionContinues();
|
||||
|
||||
struct BitMasks {
|
||||
u64 wmask, tmask;
|
||||
|
|
|
@ -72,7 +72,7 @@ static bool ExclusiveSharedDecodeAndOperation(TranslatorVisitor& v, bool pair, s
|
|||
UNREACHABLE();
|
||||
}
|
||||
|
||||
return true;
|
||||
return v.MemoryInstructionContinues();
|
||||
}
|
||||
|
||||
bool TranslatorVisitor::STXR(Imm<2> sz, Reg Rs, Reg Rn, Reg Rt) {
|
||||
|
@ -175,7 +175,7 @@ static bool OrderedSharedDecodeAndOperation(TranslatorVisitor& v, size_t size, b
|
|||
UNREACHABLE();
|
||||
}
|
||||
|
||||
return true;
|
||||
return v.MemoryInstructionContinues();
|
||||
}
|
||||
|
||||
bool TranslatorVisitor::STLLR(Imm<2> sz, Reg Rn, Reg Rt) {
|
||||
|
|
|
@ -15,7 +15,7 @@ bool TranslatorVisitor::LDR_lit_gen(bool opc_0, Imm<19> imm19, Reg Rt) {
|
|||
const auto data = Mem(ir.Imm64(address), size, IR::AccType::NORMAL);
|
||||
|
||||
X(8 * size, Rt, data);
|
||||
return true;
|
||||
return MemoryInstructionContinues();
|
||||
}
|
||||
|
||||
bool TranslatorVisitor::LDR_lit_fpsimd(Imm<2> opc, Imm<19> imm19, Vec Vt) {
|
||||
|
@ -33,7 +33,7 @@ bool TranslatorVisitor::LDR_lit_fpsimd(Imm<2> opc, Imm<19> imm19, Vec Vt) {
|
|||
} else {
|
||||
V(128, Vt, ir.ZeroExtendToQuad(data));
|
||||
}
|
||||
return true;
|
||||
return MemoryInstructionContinues();
|
||||
}
|
||||
|
||||
bool TranslatorVisitor::LDRSW_lit(Imm<19> imm19, Reg Rt) {
|
||||
|
@ -42,7 +42,7 @@ bool TranslatorVisitor::LDRSW_lit(Imm<19> imm19, Reg Rt) {
|
|||
const auto data = Mem(ir.Imm64(address), 4, IR::AccType::NORMAL);
|
||||
|
||||
X(64, Rt, ir.SignExtendWordToLong(data));
|
||||
return true;
|
||||
return MemoryInstructionContinues();
|
||||
}
|
||||
|
||||
bool TranslatorVisitor::PRFM_lit(Imm<19> /*imm19*/, Imm<5> /*prfop*/) {
|
||||
|
|
|
@ -104,7 +104,7 @@ static bool SharedDecodeAndOperation(TranslatorVisitor& v, bool wback, IR::MemOp
|
|||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
return v.MemoryInstructionContinues();
|
||||
}
|
||||
|
||||
bool TranslatorVisitor::STx_mult_1(bool Q, Imm<4> opcode, Imm<2> size, Reg Rn, Vec Vt) {
|
||||
|
|
|
@ -72,7 +72,7 @@ static bool LoadStoreRegisterImmediate(TranslatorVisitor& v, bool wback, bool po
|
|||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
return v.MemoryInstructionContinues();
|
||||
}
|
||||
|
||||
bool TranslatorVisitor::STRx_LDRx_imm_1(Imm<2> size, Imm<2> opc, Imm<9> imm9, bool not_postindex, Reg Rn, Reg Rt) {
|
||||
|
@ -165,7 +165,7 @@ static bool LoadStoreSIMD(TranslatorVisitor& v, bool wback, bool postindex, size
|
|||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
return v.MemoryInstructionContinues();
|
||||
}
|
||||
|
||||
bool TranslatorVisitor::STR_imm_fpsimd_1(Imm<2> size, Imm<1> opc_1, Imm<9> imm9, bool not_postindex, Reg Rn, Vec Vt) {
|
||||
|
|
|
@ -78,7 +78,7 @@ bool TranslatorVisitor::STP_LDP_gen(Imm<2> opc, bool not_postindex, bool wback,
|
|||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
return MemoryInstructionContinues();
|
||||
}
|
||||
|
||||
bool TranslatorVisitor::STP_LDP_fpsimd(Imm<2> opc, bool not_postindex, bool wback, Imm<1> L, Imm<7> imm7, Vec Vt2, Reg Rn, Vec Vt) {
|
||||
|
@ -148,7 +148,7 @@ bool TranslatorVisitor::STP_LDP_fpsimd(Imm<2> opc, bool not_postindex, bool wbac
|
|||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
return MemoryInstructionContinues();
|
||||
}
|
||||
|
||||
} // namespace Dynarmic::A64
|
||||
|
|
|
@ -70,7 +70,7 @@ static bool RegSharedDecodeAndOperation(TranslatorVisitor& v, size_t scale, u8 s
|
|||
UNREACHABLE();
|
||||
}
|
||||
|
||||
return true;
|
||||
return v.MemoryInstructionContinues();
|
||||
}
|
||||
|
||||
bool TranslatorVisitor::STRx_reg(Imm<2> size, Imm<1> opc_1, Reg Rm, Imm<3> option, bool S, Reg Rn, Reg Rt) {
|
||||
|
@ -128,7 +128,7 @@ static bool VecSharedDecodeAndOperation(TranslatorVisitor& v, size_t scale, u8 s
|
|||
UNREACHABLE();
|
||||
}
|
||||
|
||||
return true;
|
||||
return v.MemoryInstructionContinues();
|
||||
}
|
||||
|
||||
bool TranslatorVisitor::STR_reg_fpsimd(Imm<2> size, Imm<1> opc_1, Reg Rm, Imm<3> option, bool S, Reg Rn, Vec Vt) {
|
||||
|
|
|
@ -22,7 +22,7 @@ static bool StoreRegister(TranslatorVisitor& v, const size_t datasize, const Imm
|
|||
|
||||
const IR::UAny data = v.X(datasize, Rt);
|
||||
v.Mem(address, datasize / 8, acctype, data);
|
||||
return true;
|
||||
return v.MemoryInstructionContinues();
|
||||
}
|
||||
|
||||
static bool LoadRegister(TranslatorVisitor& v, const size_t datasize, const Imm<9> imm9, const Reg Rn, const Reg Rt) {
|
||||
|
@ -42,7 +42,7 @@ static bool LoadRegister(TranslatorVisitor& v, const size_t datasize, const Imm<
|
|||
// max is used to zeroextend < 32 to 32, and > 32 to 64
|
||||
const size_t extended_size = std::max<size_t>(32, datasize);
|
||||
v.X(extended_size, Rt, v.ZeroExtend(data, extended_size));
|
||||
return true;
|
||||
return v.MemoryInstructionContinues();
|
||||
}
|
||||
|
||||
static bool LoadRegisterSigned(TranslatorVisitor& v, const size_t datasize, const Imm<2> opc, const Imm<9> imm9, const Reg Rn, const Reg Rt) {
|
||||
|
@ -90,7 +90,7 @@ static bool LoadRegisterSigned(TranslatorVisitor& v, const size_t datasize, cons
|
|||
// Prefetch(address, Rt);
|
||||
break;
|
||||
}
|
||||
return true;
|
||||
return v.MemoryInstructionContinues();
|
||||
}
|
||||
|
||||
bool TranslatorVisitor::STTRB(Imm<9> imm9, Reg Rn, Reg Rt) {
|
||||
|
@ -144,6 +144,6 @@ bool TranslatorVisitor::LDTRSW(Imm<9> imm9, Reg Rn, Reg Rt) {
|
|||
|
||||
const IR::UAny data = Mem(address, 4, acctype);
|
||||
X(64, Rt, SignExtend(data, 64));
|
||||
return true;
|
||||
return MemoryInstructionContinues();
|
||||
}
|
||||
} // namespace Dynarmic::A64
|
||||
|
|
|
@ -98,7 +98,7 @@ static bool SharedDecodeAndOperation(TranslatorVisitor& v, bool wback, IR::MemOp
|
|||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
return v.MemoryInstructionContinues();
|
||||
}
|
||||
|
||||
bool TranslatorVisitor::LD1_sngl_1(bool Q, Imm<2> upper_opcode, bool S, Imm<2> size, Reg Rn, Vec Vt) {
|
||||
|
|
|
@ -208,6 +208,10 @@ struct UserConfig {
|
|||
/// to avoid writting certain unnecessary code only needed for cycle timers.
|
||||
bool wall_clock_cntpct = false;
|
||||
|
||||
/// This allows accurately emulating protection fault handlers. If true, we check
|
||||
/// for exit after every data memory access by the emulated program.
|
||||
bool check_halt_on_memory_access = false;
|
||||
|
||||
/// This option allows you to disable cycle counting. If this is set to false,
|
||||
/// AddTicks and GetTicksRemaining are never called, and no cycle counting is done.
|
||||
bool enable_cycle_counting = true;
|
||||
|
|
|
@ -273,6 +273,10 @@ struct UserConfig {
|
|||
/// to avoid writting certain unnecessary code only needed for cycle timers.
|
||||
bool wall_clock_cntpct = false;
|
||||
|
||||
/// This allows accurately emulating protection fault handlers. If true, we check
|
||||
/// for exit after every data memory access by the emulated program.
|
||||
bool check_halt_on_memory_access = false;
|
||||
|
||||
/// This option allows you to disable cycle counting. If this is set to false,
|
||||
/// AddTicks and GetTicksRemaining are never called, and no cycle counting is done.
|
||||
bool enable_cycle_counting = true;
|
||||
|
|
100
externals/dynarmic/tests/CMakeLists.txt
vendored
100
externals/dynarmic/tests/CMakeLists.txt
vendored
|
@ -1,16 +1,4 @@
|
|||
if ((NOT "A32" IN_LIST DYNARMIC_FRONTENDS) OR (NOT "A64" IN_LIST DYNARMIC_FRONTENDS))
|
||||
return()
|
||||
endif()
|
||||
|
||||
add_executable(dynarmic_tests
|
||||
A32/test_arm_disassembler.cpp
|
||||
A32/test_arm_instructions.cpp
|
||||
A32/test_thumb_instructions.cpp
|
||||
A32/testenv.h
|
||||
A64/a64.cpp
|
||||
A64/testenv.h
|
||||
cpu_info.cpp
|
||||
decoder_tests.cpp
|
||||
fp/FPToFixed.cpp
|
||||
fp/FPValue.cpp
|
||||
fp/mantissa_util_tests.cpp
|
||||
|
@ -19,46 +7,86 @@ add_executable(dynarmic_tests
|
|||
rand_int.h
|
||||
)
|
||||
|
||||
if (NOT MSVC)
|
||||
if ("A32" IN_LIST DYNARMIC_FRONTENDS)
|
||||
target_sources(dynarmic_tests PRIVATE
|
||||
rsqrt_test.cpp
|
||||
rsqrt_test_fn.s
|
||||
A32/test_arm_disassembler.cpp
|
||||
A32/test_arm_instructions.cpp
|
||||
A32/test_thumb_instructions.cpp
|
||||
A32/testenv.h
|
||||
decoder_tests.cpp
|
||||
)
|
||||
endif()
|
||||
|
||||
if ("A64" IN_LIST DYNARMIC_FRONTENDS)
|
||||
target_sources(dynarmic_tests PRIVATE
|
||||
A64/a64.cpp
|
||||
A64/testenv.h
|
||||
)
|
||||
endif()
|
||||
|
||||
if (DYNARMIC_TESTS_USE_UNICORN)
|
||||
target_link_libraries(dynarmic_tests PRIVATE Unicorn::Unicorn)
|
||||
|
||||
target_sources(dynarmic_tests PRIVATE
|
||||
A32/fuzz_arm.cpp
|
||||
A32/fuzz_thumb.cpp
|
||||
A64/fuzz_with_unicorn.cpp
|
||||
A64/misaligned_page_table.cpp
|
||||
A64/verify_unicorn.cpp
|
||||
fuzz_util.cpp
|
||||
fuzz_util.h
|
||||
unicorn_emu/a32_unicorn.cpp
|
||||
unicorn_emu/a32_unicorn.h
|
||||
unicorn_emu/a64_unicorn.cpp
|
||||
unicorn_emu/a64_unicorn.h
|
||||
)
|
||||
target_link_libraries(dynarmic_tests PRIVATE Unicorn::Unicorn)
|
||||
|
||||
if ("A32" IN_LIST DYNARMIC_FRONTENDS)
|
||||
target_sources(dynarmic_tests PRIVATE
|
||||
A32/fuzz_arm.cpp
|
||||
A32/fuzz_thumb.cpp
|
||||
unicorn_emu/a32_unicorn.cpp
|
||||
unicorn_emu/a32_unicorn.h
|
||||
)
|
||||
endif()
|
||||
|
||||
if ("A64" IN_LIST DYNARMIC_FRONTENDS)
|
||||
target_sources(dynarmic_tests PRIVATE
|
||||
A64/fuzz_with_unicorn.cpp
|
||||
A64/misaligned_page_table.cpp
|
||||
A64/verify_unicorn.cpp
|
||||
unicorn_emu/a64_unicorn.cpp
|
||||
unicorn_emu/a64_unicorn.h
|
||||
)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
add_executable(dynarmic_print_info
|
||||
print_info.cpp
|
||||
)
|
||||
if (ARCHITECTURE STREQUAL "x86_64")
|
||||
target_link_libraries(dynarmic_tests PRIVATE xbyak)
|
||||
|
||||
target_sources(dynarmic_tests PRIVATE
|
||||
x64_cpu_info.cpp
|
||||
)
|
||||
|
||||
if (NOT MSVC)
|
||||
target_sources(dynarmic_tests PRIVATE
|
||||
rsqrt_test.cpp
|
||||
rsqrt_test_fn.s
|
||||
)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
include(CreateDirectoryGroups)
|
||||
create_target_directory_groups(dynarmic_tests)
|
||||
create_target_directory_groups(dynarmic_print_info)
|
||||
|
||||
target_link_libraries(dynarmic_tests PRIVATE dynarmic boost catch fmt xbyak)
|
||||
if (("A32" IN_LIST DYNARMIC_FRONTENDS) AND ("A64" IN_LIST DYNARMIC_FRONTENDS))
|
||||
add_executable(dynarmic_print_info
|
||||
print_info.cpp
|
||||
)
|
||||
|
||||
create_target_directory_groups(dynarmic_print_info)
|
||||
|
||||
target_link_libraries(dynarmic_print_info PRIVATE dynarmic boost catch fmt)
|
||||
target_include_directories(dynarmic_print_info PRIVATE . ../src)
|
||||
target_compile_options(dynarmic_print_info PRIVATE ${DYNARMIC_CXX_FLAGS})
|
||||
target_compile_definitions(dynarmic_print_info PRIVATE FMT_USE_USER_DEFINED_LITERALS=1)
|
||||
endif()
|
||||
|
||||
create_target_directory_groups(dynarmic_tests)
|
||||
|
||||
target_link_libraries(dynarmic_tests PRIVATE dynarmic boost catch fmt)
|
||||
target_include_directories(dynarmic_tests PRIVATE . ../src)
|
||||
target_compile_options(dynarmic_tests PRIVATE ${DYNARMIC_CXX_FLAGS})
|
||||
target_compile_definitions(dynarmic_tests PRIVATE FMT_USE_USER_DEFINED_LITERALS=1 CATCH_CONFIG_ENABLE_BENCHMARKING=1)
|
||||
|
||||
target_link_libraries(dynarmic_print_info PRIVATE dynarmic boost catch fmt)
|
||||
target_include_directories(dynarmic_print_info PRIVATE . ../src)
|
||||
target_compile_options(dynarmic_print_info PRIVATE ${DYNARMIC_CXX_FLAGS})
|
||||
target_compile_definitions(dynarmic_print_info PRIVATE FMT_USE_USER_DEFINED_LITERALS=1)
|
||||
|
||||
add_test(dynarmic_tests dynarmic_tests --durations yes)
|
||||
|
|
115
externals/dynarmic/tests/x64_cpu_info.cpp
vendored
Executable file
115
externals/dynarmic/tests/x64_cpu_info.cpp
vendored
Executable file
|
@ -0,0 +1,115 @@
|
|||
/* This file is part of the dynarmic project.
|
||||
* Copyright (c) 2020 MerryMage
|
||||
* SPDX-License-Identifier: 0BSD
|
||||
*/
|
||||
|
||||
#include <array>
|
||||
#include <utility>
|
||||
|
||||
#include <catch2/catch.hpp>
|
||||
#include <xbyak/xbyak_util.h>
|
||||
|
||||
TEST_CASE("Host CPU supports", "[a64]") {
|
||||
using Cpu = Xbyak::util::Cpu;
|
||||
Cpu cpu_info;
|
||||
|
||||
std::array<uint32_t, 4> cpu_name;
|
||||
for (uint32_t i = 2; i < 5; ++i) {
|
||||
cpu_info.getCpuid(0x80000000 | i, cpu_name.data());
|
||||
std::printf("%.16s", reinterpret_cast<const char*>(cpu_name.data()));
|
||||
}
|
||||
std::putchar('\n');
|
||||
|
||||
cpu_info.putFamily();
|
||||
const std::array types{
|
||||
#define X(NAME) std::make_pair(Cpu::Type{Cpu::NAME}, &#NAME[1])
|
||||
X(t3DN),
|
||||
X(tADX),
|
||||
X(tAESNI),
|
||||
X(tAMD),
|
||||
X(tAMX_BF16),
|
||||
X(tAMX_INT8),
|
||||
X(tAMX_TILE),
|
||||
X(tAVX),
|
||||
X(tAVX2),
|
||||
X(tAVX512_4FMAPS),
|
||||
X(tAVX512_4VNNIW),
|
||||
X(tAVX512_BF16),
|
||||
X(tAVX512_BITALG),
|
||||
X(tAVX512_FP16),
|
||||
X(tAVX512_IFMA),
|
||||
X(tAVX512_VBMI),
|
||||
X(tAVX512_VBMI2),
|
||||
X(tAVX512_VNNI),
|
||||
X(tAVX512_VP2INTERSECT),
|
||||
X(tAVX512_VPOPCNTDQ),
|
||||
X(tAVX512BW),
|
||||
X(tAVX512CD),
|
||||
X(tAVX512DQ),
|
||||
X(tAVX512ER),
|
||||
X(tAVX512F),
|
||||
X(tAVX512IFMA),
|
||||
X(tAVX512PF),
|
||||
X(tAVX512VBMI),
|
||||
X(tAVX512VL),
|
||||
X(tAVX_VNNI),
|
||||
X(tBMI1),
|
||||
X(tBMI2),
|
||||
X(tCLDEMOTE),
|
||||
X(tCLFLUSHOPT),
|
||||
X(tCLZERO),
|
||||
X(tCMOV),
|
||||
X(tE3DN),
|
||||
X(tENHANCED_REP),
|
||||
X(tF16C),
|
||||
X(tFMA),
|
||||
X(tGFNI),
|
||||
X(tHLE),
|
||||
X(tINTEL),
|
||||
X(tLZCNT),
|
||||
X(tMMX),
|
||||
X(tMMX2),
|
||||
X(tMOVBE),
|
||||
X(tMOVDIR64B),
|
||||
X(tMOVDIRI),
|
||||
X(tMPX),
|
||||
X(tOSXSAVE),
|
||||
X(tPCLMULQDQ),
|
||||
X(tPOPCNT),
|
||||
X(tPREFETCHW),
|
||||
X(tPREFETCHWT1),
|
||||
X(tRDRAND),
|
||||
X(tRDSEED),
|
||||
X(tRDTSCP),
|
||||
X(tRTM),
|
||||
X(tSHA),
|
||||
X(tSMAP),
|
||||
X(tSSE),
|
||||
X(tSSE2),
|
||||
X(tSSE3),
|
||||
X(tSSE41),
|
||||
X(tSSE42),
|
||||
X(tSSSE3),
|
||||
X(tVAES),
|
||||
X(tVPCLMULQDQ),
|
||||
X(tWAITPKG),
|
||||
#undef X
|
||||
};
|
||||
|
||||
constexpr size_t line_max = 80;
|
||||
size_t line_length = 0;
|
||||
for (const auto& [type, name] : types) {
|
||||
if (cpu_info.has(type)) {
|
||||
const size_t name_length = std::strlen(name) + 1;
|
||||
if ((line_length + name_length) >= line_max) {
|
||||
line_length = name_length;
|
||||
std::putchar('\n');
|
||||
} else if (line_length) {
|
||||
std::putchar(' ');
|
||||
}
|
||||
std::fputs(name, stdout);
|
||||
line_length += name_length;
|
||||
}
|
||||
}
|
||||
std::putchar('\n');
|
||||
}
|
|
@ -15,6 +15,9 @@ enum class PageType : u8 {
|
|||
Unmapped,
|
||||
/// Page is mapped to regular memory. This is the only type you can get pointers to.
|
||||
Memory,
|
||||
/// Page is mapped to regular memory, but inaccessible from CPU fastmem and must use
|
||||
/// the callbacks.
|
||||
DebugMemory,
|
||||
/// Page is mapped to regular memory, but also needs to check for rasterizer cache flushing and
|
||||
/// invalidation
|
||||
RasterizerCachedMemory,
|
||||
|
|
|
@ -121,8 +121,15 @@ void ARM_Interface::Run() {
|
|||
|
||||
// Notify the debugger and go to sleep if a breakpoint was hit.
|
||||
if (Has(hr, breakpoint)) {
|
||||
RewindBreakpointInstruction();
|
||||
system.GetDebugger().NotifyThreadStopped(current_thread);
|
||||
current_thread->RequestSuspend(Kernel::SuspendType::Debug);
|
||||
current_thread->RequestSuspend(SuspendType::Debug);
|
||||
break;
|
||||
}
|
||||
if (Has(hr, watchpoint)) {
|
||||
RewindBreakpointInstruction();
|
||||
system.GetDebugger().NotifyThreadWatchpoint(current_thread, *HaltedWatchpoint());
|
||||
current_thread->RequestSuspend(SuspendType::Debug);
|
||||
break;
|
||||
}
|
||||
|
||||
|
@ -136,4 +143,36 @@ void ARM_Interface::Run() {
|
|||
}
|
||||
}
|
||||
|
||||
void ARM_Interface::LoadWatchpointArray(const WatchpointArray& wp) {
|
||||
watchpoints = ℘
|
||||
}
|
||||
|
||||
const Kernel::DebugWatchpoint* ARM_Interface::MatchingWatchpoint(
|
||||
VAddr addr, u64 size, Kernel::DebugWatchpointType access_type) const {
|
||||
if (!watchpoints) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
const VAddr start_address{addr};
|
||||
const VAddr end_address{addr + size};
|
||||
|
||||
for (size_t i = 0; i < Core::Hardware::NUM_WATCHPOINTS; i++) {
|
||||
const auto& watch{(*watchpoints)[i]};
|
||||
|
||||
if (end_address <= watch.start_address) {
|
||||
continue;
|
||||
}
|
||||
if (start_address >= watch.end_address) {
|
||||
continue;
|
||||
}
|
||||
if ((access_type & watch.type) == Kernel::DebugWatchpointType::None) {
|
||||
continue;
|
||||
}
|
||||
|
||||
return &watch;
|
||||
}
|
||||
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
} // namespace Core
|
||||
|
|
|
@ -5,6 +5,7 @@
|
|||
#pragma once
|
||||
|
||||
#include <array>
|
||||
#include <span>
|
||||
#include <vector>
|
||||
|
||||
#include <dynarmic/interface/halt_reason.h>
|
||||
|
@ -19,13 +20,16 @@ struct PageTable;
|
|||
|
||||
namespace Kernel {
|
||||
enum class VMAPermission : u8;
|
||||
}
|
||||
enum class DebugWatchpointType : u8;
|
||||
struct DebugWatchpoint;
|
||||
} // namespace Kernel
|
||||
|
||||
namespace Core {
|
||||
class System;
|
||||
class CPUInterruptHandler;
|
||||
|
||||
using CPUInterrupts = std::array<CPUInterruptHandler, Core::Hardware::NUM_CPU_CORES>;
|
||||
using WatchpointArray = std::array<Kernel::DebugWatchpoint, Core::Hardware::NUM_WATCHPOINTS>;
|
||||
|
||||
/// Generic ARMv8 CPU interface
|
||||
class ARM_Interface {
|
||||
|
@ -170,6 +174,7 @@ public:
|
|||
virtual void SaveContext(ThreadContext64& ctx) = 0;
|
||||
virtual void LoadContext(const ThreadContext32& ctx) = 0;
|
||||
virtual void LoadContext(const ThreadContext64& ctx) = 0;
|
||||
void LoadWatchpointArray(const WatchpointArray& wp);
|
||||
|
||||
/// Clears the exclusive monitor's state.
|
||||
virtual void ClearExclusiveState() = 0;
|
||||
|
@ -198,18 +203,24 @@ public:
|
|||
static constexpr Dynarmic::HaltReason break_loop = Dynarmic::HaltReason::UserDefined2;
|
||||
static constexpr Dynarmic::HaltReason svc_call = Dynarmic::HaltReason::UserDefined3;
|
||||
static constexpr Dynarmic::HaltReason breakpoint = Dynarmic::HaltReason::UserDefined4;
|
||||
static constexpr Dynarmic::HaltReason watchpoint = Dynarmic::HaltReason::UserDefined5;
|
||||
|
||||
protected:
|
||||
/// System context that this ARM interface is running under.
|
||||
System& system;
|
||||
CPUInterrupts& interrupt_handlers;
|
||||
const WatchpointArray* watchpoints;
|
||||
bool uses_wall_clock;
|
||||
|
||||
static void SymbolicateBacktrace(Core::System& system, std::vector<BacktraceEntry>& out);
|
||||
const Kernel::DebugWatchpoint* MatchingWatchpoint(
|
||||
VAddr addr, u64 size, Kernel::DebugWatchpointType access_type) const;
|
||||
|
||||
virtual Dynarmic::HaltReason RunJit() = 0;
|
||||
virtual Dynarmic::HaltReason StepJit() = 0;
|
||||
virtual u32 GetSvcNumber() const = 0;
|
||||
virtual const Kernel::DebugWatchpoint* HaltedWatchpoint() const = 0;
|
||||
virtual void RewindBreakpointInstruction() = 0;
|
||||
};
|
||||
|
||||
} // namespace Core
|
||||
|
|
|
@ -29,45 +29,62 @@ using namespace Common::Literals;
|
|||
class DynarmicCallbacks32 : public Dynarmic::A32::UserCallbacks {
|
||||
public:
|
||||
explicit DynarmicCallbacks32(ARM_Dynarmic_32& parent_)
|
||||
: parent{parent_}, memory(parent.system.Memory()) {}
|
||||
: parent{parent_},
|
||||
memory(parent.system.Memory()), debugger_enabled{parent.system.DebuggerEnabled()} {}
|
||||
|
||||
u8 MemoryRead8(u32 vaddr) override {
|
||||
CheckMemoryAccess(vaddr, 1, Kernel::DebugWatchpointType::Read);
|
||||
return memory.Read8(vaddr);
|
||||
}
|
||||
u16 MemoryRead16(u32 vaddr) override {
|
||||
CheckMemoryAccess(vaddr, 2, Kernel::DebugWatchpointType::Read);
|
||||
return memory.Read16(vaddr);
|
||||
}
|
||||
u32 MemoryRead32(u32 vaddr) override {
|
||||
CheckMemoryAccess(vaddr, 4, Kernel::DebugWatchpointType::Read);
|
||||
return memory.Read32(vaddr);
|
||||
}
|
||||
u64 MemoryRead64(u32 vaddr) override {
|
||||
CheckMemoryAccess(vaddr, 8, Kernel::DebugWatchpointType::Read);
|
||||
return memory.Read64(vaddr);
|
||||
}
|
||||
|
||||
void MemoryWrite8(u32 vaddr, u8 value) override {
|
||||
memory.Write8(vaddr, value);
|
||||
if (CheckMemoryAccess(vaddr, 1, Kernel::DebugWatchpointType::Write)) {
|
||||
memory.Write8(vaddr, value);
|
||||
}
|
||||
}
|
||||
void MemoryWrite16(u32 vaddr, u16 value) override {
|
||||
memory.Write16(vaddr, value);
|
||||
if (CheckMemoryAccess(vaddr, 2, Kernel::DebugWatchpointType::Write)) {
|
||||
memory.Write16(vaddr, value);
|
||||
}
|
||||
}
|
||||
void MemoryWrite32(u32 vaddr, u32 value) override {
|
||||
memory.Write32(vaddr, value);
|
||||
if (CheckMemoryAccess(vaddr, 4, Kernel::DebugWatchpointType::Write)) {
|
||||
memory.Write32(vaddr, value);
|
||||
}
|
||||
}
|
||||
void MemoryWrite64(u32 vaddr, u64 value) override {
|
||||
memory.Write64(vaddr, value);
|
||||
if (CheckMemoryAccess(vaddr, 8, Kernel::DebugWatchpointType::Write)) {
|
||||
memory.Write64(vaddr, value);
|
||||
}
|
||||
}
|
||||
|
||||
bool MemoryWriteExclusive8(u32 vaddr, u8 value, u8 expected) override {
|
||||
return memory.WriteExclusive8(vaddr, value, expected);
|
||||
return CheckMemoryAccess(vaddr, 1, Kernel::DebugWatchpointType::Write) &&
|
||||
memory.WriteExclusive8(vaddr, value, expected);
|
||||
}
|
||||
bool MemoryWriteExclusive16(u32 vaddr, u16 value, u16 expected) override {
|
||||
return memory.WriteExclusive16(vaddr, value, expected);
|
||||
return CheckMemoryAccess(vaddr, 2, Kernel::DebugWatchpointType::Write) &&
|
||||
memory.WriteExclusive16(vaddr, value, expected);
|
||||
}
|
||||
bool MemoryWriteExclusive32(u32 vaddr, u32 value, u32 expected) override {
|
||||
return memory.WriteExclusive32(vaddr, value, expected);
|
||||
return CheckMemoryAccess(vaddr, 4, Kernel::DebugWatchpointType::Write) &&
|
||||
memory.WriteExclusive32(vaddr, value, expected);
|
||||
}
|
||||
bool MemoryWriteExclusive64(u32 vaddr, u64 value, u64 expected) override {
|
||||
return memory.WriteExclusive64(vaddr, value, expected);
|
||||
return CheckMemoryAccess(vaddr, 8, Kernel::DebugWatchpointType::Write) &&
|
||||
memory.WriteExclusive64(vaddr, value, expected);
|
||||
}
|
||||
|
||||
void InterpreterFallback(u32 pc, std::size_t num_instructions) override {
|
||||
|
@ -77,8 +94,8 @@ public:
|
|||
}
|
||||
|
||||
void ExceptionRaised(u32 pc, Dynarmic::A32::Exception exception) override {
|
||||
if (parent.system.DebuggerEnabled()) {
|
||||
parent.jit.load()->Regs()[15] = pc;
|
||||
if (debugger_enabled) {
|
||||
parent.SaveContext(parent.breakpoint_context);
|
||||
parent.jit.load()->HaltExecution(ARM_Interface::breakpoint);
|
||||
return;
|
||||
}
|
||||
|
@ -124,9 +141,26 @@ public:
|
|||
return std::max<s64>(parent.system.CoreTiming().GetDowncount(), 0);
|
||||
}
|
||||
|
||||
bool CheckMemoryAccess(VAddr addr, u64 size, Kernel::DebugWatchpointType type) {
|
||||
if (!debugger_enabled) {
|
||||
return true;
|
||||
}
|
||||
|
||||
const auto match{parent.MatchingWatchpoint(addr, size, type)};
|
||||
if (match) {
|
||||
parent.SaveContext(parent.breakpoint_context);
|
||||
parent.jit.load()->HaltExecution(ARM_Interface::watchpoint);
|
||||
parent.halted_watchpoint = match;
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
ARM_Dynarmic_32& parent;
|
||||
Core::Memory::Memory& memory;
|
||||
std::size_t num_interpreted_instructions{};
|
||||
bool debugger_enabled{};
|
||||
static constexpr u64 minimum_run_cycles = 10000U;
|
||||
};
|
||||
|
||||
|
@ -161,6 +195,11 @@ std::shared_ptr<Dynarmic::A32::Jit> ARM_Dynarmic_32::MakeJit(Common::PageTable*
|
|||
config.code_cache_size = 512_MiB;
|
||||
config.far_code_offset = 400_MiB;
|
||||
|
||||
// Allow memory fault handling to work
|
||||
if (system.DebuggerEnabled()) {
|
||||
config.check_halt_on_memory_access = true;
|
||||
}
|
||||
|
||||
// null_jit
|
||||
if (!page_table) {
|
||||
// Don't waste too much memory on null_jit
|
||||
|
@ -255,6 +294,14 @@ u32 ARM_Dynarmic_32::GetSvcNumber() const {
|
|||
return svc_swi;
|
||||
}
|
||||
|
||||
const Kernel::DebugWatchpoint* ARM_Dynarmic_32::HaltedWatchpoint() const {
|
||||
return halted_watchpoint;
|
||||
}
|
||||
|
||||
void ARM_Dynarmic_32::RewindBreakpointInstruction() {
|
||||
LoadContext(breakpoint_context);
|
||||
}
|
||||
|
||||
ARM_Dynarmic_32::ARM_Dynarmic_32(System& system_, CPUInterrupts& interrupt_handlers_,
|
||||
bool uses_wall_clock_, ExclusiveMonitor& exclusive_monitor_,
|
||||
std::size_t core_index_)
|
||||
|
|
|
@ -72,6 +72,8 @@ protected:
|
|||
Dynarmic::HaltReason RunJit() override;
|
||||
Dynarmic::HaltReason StepJit() override;
|
||||
u32 GetSvcNumber() const override;
|
||||
const Kernel::DebugWatchpoint* HaltedWatchpoint() const override;
|
||||
void RewindBreakpointInstruction() override;
|
||||
|
||||
private:
|
||||
std::shared_ptr<Dynarmic::A32::Jit> MakeJit(Common::PageTable* page_table) const;
|
||||
|
@ -98,6 +100,10 @@ private:
|
|||
|
||||
// SVC callback
|
||||
u32 svc_swi{};
|
||||
|
||||
// Watchpoint info
|
||||
const Kernel::DebugWatchpoint* halted_watchpoint;
|
||||
ThreadContext32 breakpoint_context;
|
||||
};
|
||||
|
||||
} // namespace Core
|
||||
|
|
|
@ -29,55 +29,76 @@ using namespace Common::Literals;
|
|||
class DynarmicCallbacks64 : public Dynarmic::A64::UserCallbacks {
|
||||
public:
|
||||
explicit DynarmicCallbacks64(ARM_Dynarmic_64& parent_)
|
||||
: parent{parent_}, memory(parent.system.Memory()) {}
|
||||
: parent{parent_},
|
||||
memory(parent.system.Memory()), debugger_enabled{parent.system.DebuggerEnabled()} {}
|
||||
|
||||
u8 MemoryRead8(u64 vaddr) override {
|
||||
CheckMemoryAccess(vaddr, 1, Kernel::DebugWatchpointType::Read);
|
||||
return memory.Read8(vaddr);
|
||||
}
|
||||
u16 MemoryRead16(u64 vaddr) override {
|
||||
CheckMemoryAccess(vaddr, 2, Kernel::DebugWatchpointType::Read);
|
||||
return memory.Read16(vaddr);
|
||||
}
|
||||
u32 MemoryRead32(u64 vaddr) override {
|
||||
CheckMemoryAccess(vaddr, 4, Kernel::DebugWatchpointType::Read);
|
||||
return memory.Read32(vaddr);
|
||||
}
|
||||
u64 MemoryRead64(u64 vaddr) override {
|
||||
CheckMemoryAccess(vaddr, 8, Kernel::DebugWatchpointType::Read);
|
||||
return memory.Read64(vaddr);
|
||||
}
|
||||
Vector MemoryRead128(u64 vaddr) override {
|
||||
CheckMemoryAccess(vaddr, 16, Kernel::DebugWatchpointType::Read);
|
||||
return {memory.Read64(vaddr), memory.Read64(vaddr + 8)};
|
||||
}
|
||||
|
||||
void MemoryWrite8(u64 vaddr, u8 value) override {
|
||||
memory.Write8(vaddr, value);
|
||||
if (CheckMemoryAccess(vaddr, 1, Kernel::DebugWatchpointType::Write)) {
|
||||
memory.Write8(vaddr, value);
|
||||
}
|
||||
}
|
||||
void MemoryWrite16(u64 vaddr, u16 value) override {
|
||||
memory.Write16(vaddr, value);
|
||||
if (CheckMemoryAccess(vaddr, 2, Kernel::DebugWatchpointType::Write)) {
|
||||
memory.Write16(vaddr, value);
|
||||
}
|
||||
}
|
||||
void MemoryWrite32(u64 vaddr, u32 value) override {
|
||||
memory.Write32(vaddr, value);
|
||||
if (CheckMemoryAccess(vaddr, 4, Kernel::DebugWatchpointType::Write)) {
|
||||
memory.Write32(vaddr, value);
|
||||
}
|
||||
}
|
||||
void MemoryWrite64(u64 vaddr, u64 value) override {
|
||||
memory.Write64(vaddr, value);
|
||||
if (CheckMemoryAccess(vaddr, 8, Kernel::DebugWatchpointType::Write)) {
|
||||
memory.Write64(vaddr, value);
|
||||
}
|
||||
}
|
||||
void MemoryWrite128(u64 vaddr, Vector value) override {
|
||||
memory.Write64(vaddr, value[0]);
|
||||
memory.Write64(vaddr + 8, value[1]);
|
||||
if (CheckMemoryAccess(vaddr, 16, Kernel::DebugWatchpointType::Write)) {
|
||||
memory.Write64(vaddr, value[0]);
|
||||
memory.Write64(vaddr + 8, value[1]);
|
||||
}
|
||||
}
|
||||
|
||||
bool MemoryWriteExclusive8(u64 vaddr, std::uint8_t value, std::uint8_t expected) override {
|
||||
return memory.WriteExclusive8(vaddr, value, expected);
|
||||
return CheckMemoryAccess(vaddr, 1, Kernel::DebugWatchpointType::Write) &&
|
||||
memory.WriteExclusive8(vaddr, value, expected);
|
||||
}
|
||||
bool MemoryWriteExclusive16(u64 vaddr, std::uint16_t value, std::uint16_t expected) override {
|
||||
return memory.WriteExclusive16(vaddr, value, expected);
|
||||
return CheckMemoryAccess(vaddr, 2, Kernel::DebugWatchpointType::Write) &&
|
||||
memory.WriteExclusive16(vaddr, value, expected);
|
||||
}
|
||||
bool MemoryWriteExclusive32(u64 vaddr, std::uint32_t value, std::uint32_t expected) override {
|
||||
return memory.WriteExclusive32(vaddr, value, expected);
|
||||
return CheckMemoryAccess(vaddr, 4, Kernel::DebugWatchpointType::Write) &&
|
||||
memory.WriteExclusive32(vaddr, value, expected);
|
||||
}
|
||||
bool MemoryWriteExclusive64(u64 vaddr, std::uint64_t value, std::uint64_t expected) override {
|
||||
return memory.WriteExclusive64(vaddr, value, expected);
|
||||
return CheckMemoryAccess(vaddr, 8, Kernel::DebugWatchpointType::Write) &&
|
||||
memory.WriteExclusive64(vaddr, value, expected);
|
||||
}
|
||||
bool MemoryWriteExclusive128(u64 vaddr, Vector value, Vector expected) override {
|
||||
return memory.WriteExclusive128(vaddr, value, expected);
|
||||
return CheckMemoryAccess(vaddr, 16, Kernel::DebugWatchpointType::Write) &&
|
||||
memory.WriteExclusive128(vaddr, value, expected);
|
||||
}
|
||||
|
||||
void InterpreterFallback(u64 pc, std::size_t num_instructions) override {
|
||||
|
@ -118,8 +139,8 @@ public:
|
|||
case Dynarmic::A64::Exception::Yield:
|
||||
return;
|
||||
default:
|
||||
if (parent.system.DebuggerEnabled()) {
|
||||
parent.jit.load()->SetPC(pc);
|
||||
if (debugger_enabled) {
|
||||
parent.SaveContext(parent.breakpoint_context);
|
||||
parent.jit.load()->HaltExecution(ARM_Interface::breakpoint);
|
||||
return;
|
||||
}
|
||||
|
@ -167,10 +188,27 @@ public:
|
|||
return parent.system.CoreTiming().GetClockTicks();
|
||||
}
|
||||
|
||||
bool CheckMemoryAccess(VAddr addr, u64 size, Kernel::DebugWatchpointType type) {
|
||||
if (!debugger_enabled) {
|
||||
return true;
|
||||
}
|
||||
|
||||
const auto match{parent.MatchingWatchpoint(addr, size, type)};
|
||||
if (match) {
|
||||
parent.SaveContext(parent.breakpoint_context);
|
||||
parent.jit.load()->HaltExecution(ARM_Interface::watchpoint);
|
||||
parent.halted_watchpoint = match;
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
ARM_Dynarmic_64& parent;
|
||||
Core::Memory::Memory& memory;
|
||||
u64 tpidrro_el0 = 0;
|
||||
u64 tpidr_el0 = 0;
|
||||
bool debugger_enabled{};
|
||||
static constexpr u64 minimum_run_cycles = 10000U;
|
||||
};
|
||||
|
||||
|
@ -221,6 +259,11 @@ std::shared_ptr<Dynarmic::A64::Jit> ARM_Dynarmic_64::MakeJit(Common::PageTable*
|
|||
config.code_cache_size = 512_MiB;
|
||||
config.far_code_offset = 400_MiB;
|
||||
|
||||
// Allow memory fault handling to work
|
||||
if (system.DebuggerEnabled()) {
|
||||
config.check_halt_on_memory_access = true;
|
||||
}
|
||||
|
||||
// null_jit
|
||||
if (!page_table) {
|
||||
// Don't waste too much memory on null_jit
|
||||
|
@ -315,6 +358,14 @@ u32 ARM_Dynarmic_64::GetSvcNumber() const {
|
|||
return svc_swi;
|
||||
}
|
||||
|
||||
const Kernel::DebugWatchpoint* ARM_Dynarmic_64::HaltedWatchpoint() const {
|
||||
return halted_watchpoint;
|
||||
}
|
||||
|
||||
void ARM_Dynarmic_64::RewindBreakpointInstruction() {
|
||||
LoadContext(breakpoint_context);
|
||||
}
|
||||
|
||||
ARM_Dynarmic_64::ARM_Dynarmic_64(System& system_, CPUInterrupts& interrupt_handlers_,
|
||||
bool uses_wall_clock_, ExclusiveMonitor& exclusive_monitor_,
|
||||
std::size_t core_index_)
|
||||
|
|
|
@ -66,6 +66,8 @@ protected:
|
|||
Dynarmic::HaltReason RunJit() override;
|
||||
Dynarmic::HaltReason StepJit() override;
|
||||
u32 GetSvcNumber() const override;
|
||||
const Kernel::DebugWatchpoint* HaltedWatchpoint() const override;
|
||||
void RewindBreakpointInstruction() override;
|
||||
|
||||
private:
|
||||
std::shared_ptr<Dynarmic::A64::Jit> MakeJit(Common::PageTable* page_table,
|
||||
|
@ -91,6 +93,10 @@ private:
|
|||
|
||||
// SVC callback
|
||||
u32 svc_swi{};
|
||||
|
||||
// Breakpoint info
|
||||
const Kernel::DebugWatchpoint* halted_watchpoint;
|
||||
ThreadContext64 breakpoint_context;
|
||||
};
|
||||
|
||||
} // namespace Core
|
||||
|
|
|
@ -44,12 +44,14 @@ static std::span<const u8> ReceiveInto(Readable& r, Buffer& buffer) {
|
|||
|
||||
enum class SignalType {
|
||||
Stopped,
|
||||
Watchpoint,
|
||||
ShuttingDown,
|
||||
};
|
||||
|
||||
struct SignalInfo {
|
||||
SignalType type;
|
||||
Kernel::KThread* thread;
|
||||
const Kernel::DebugWatchpoint* watchpoint;
|
||||
};
|
||||
|
||||
namespace Core {
|
||||
|
@ -157,13 +159,19 @@ private:
|
|||
void PipeData(std::span<const u8> data) {
|
||||
switch (info.type) {
|
||||
case SignalType::Stopped:
|
||||
case SignalType::Watchpoint:
|
||||
// Stop emulation.
|
||||
PauseEmulation();
|
||||
|
||||
// Notify the client.
|
||||
active_thread = info.thread;
|
||||
UpdateActiveThread();
|
||||
frontend->Stopped(active_thread);
|
||||
|
||||
if (info.type == SignalType::Watchpoint) {
|
||||
frontend->Watchpoint(active_thread, *info.watchpoint);
|
||||
} else {
|
||||
frontend->Stopped(active_thread);
|
||||
}
|
||||
|
||||
break;
|
||||
case SignalType::ShuttingDown:
|
||||
|
@ -290,12 +298,17 @@ Debugger::Debugger(Core::System& system, u16 port) {
|
|||
Debugger::~Debugger() = default;
|
||||
|
||||
bool Debugger::NotifyThreadStopped(Kernel::KThread* thread) {
|
||||
return impl && impl->SignalDebugger(SignalInfo{SignalType::Stopped, thread});
|
||||
return impl && impl->SignalDebugger(SignalInfo{SignalType::Stopped, thread, nullptr});
|
||||
}
|
||||
|
||||
bool Debugger::NotifyThreadWatchpoint(Kernel::KThread* thread,
|
||||
const Kernel::DebugWatchpoint& watch) {
|
||||
return impl && impl->SignalDebugger(SignalInfo{SignalType::Watchpoint, thread, &watch});
|
||||
}
|
||||
|
||||
void Debugger::NotifyShutdown() {
|
||||
if (impl) {
|
||||
impl->SignalDebugger(SignalInfo{SignalType::ShuttingDown, nullptr});
|
||||
impl->SignalDebugger(SignalInfo{SignalType::ShuttingDown, nullptr, nullptr});
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -9,7 +9,8 @@
|
|||
|
||||
namespace Kernel {
|
||||
class KThread;
|
||||
}
|
||||
struct DebugWatchpoint;
|
||||
} // namespace Kernel
|
||||
|
||||
namespace Core {
|
||||
class System;
|
||||
|
@ -40,6 +41,11 @@ public:
|
|||
*/
|
||||
void NotifyShutdown();
|
||||
|
||||
/*
|
||||
* Notify the debugger that the given thread has stopped due to hitting a watchpoint.
|
||||
*/
|
||||
bool NotifyThreadWatchpoint(Kernel::KThread* thread, const Kernel::DebugWatchpoint& watch);
|
||||
|
||||
private:
|
||||
std::unique_ptr<DebuggerImpl> impl;
|
||||
};
|
||||
|
|
|
@ -11,7 +11,8 @@
|
|||
|
||||
namespace Kernel {
|
||||
class KThread;
|
||||
}
|
||||
struct DebugWatchpoint;
|
||||
} // namespace Kernel
|
||||
|
||||
namespace Core {
|
||||
|
||||
|
@ -71,6 +72,11 @@ public:
|
|||
*/
|
||||
virtual void ShuttingDown() = 0;
|
||||
|
||||
/*
|
||||
* Called when emulation has stopped on a watchpoint.
|
||||
*/
|
||||
virtual void Watchpoint(Kernel::KThread* thread, const Kernel::DebugWatchpoint& watch) = 0;
|
||||
|
||||
/**
|
||||
* Called when new data is asynchronously received on the client socket.
|
||||
* A list of actions to perform is returned.
|
||||
|
|
|
@ -112,6 +112,23 @@ void GDBStub::Stopped(Kernel::KThread* thread) {
|
|||
SendReply(arch->ThreadStatus(thread, GDB_STUB_SIGTRAP));
|
||||
}
|
||||
|
||||
void GDBStub::Watchpoint(Kernel::KThread* thread, const Kernel::DebugWatchpoint& watch) {
|
||||
const auto status{arch->ThreadStatus(thread, GDB_STUB_SIGTRAP)};
|
||||
|
||||
switch (watch.type) {
|
||||
case Kernel::DebugWatchpointType::Read:
|
||||
SendReply(fmt::format("{}rwatch:{:x};", status, watch.start_address));
|
||||
break;
|
||||
case Kernel::DebugWatchpointType::Write:
|
||||
SendReply(fmt::format("{}watch:{:x};", status, watch.start_address));
|
||||
break;
|
||||
case Kernel::DebugWatchpointType::ReadOrWrite:
|
||||
default:
|
||||
SendReply(fmt::format("{}awatch:{:x};", status, watch.start_address));
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<DebuggerAction> GDBStub::ClientData(std::span<const u8> data) {
|
||||
std::vector<DebuggerAction> actions;
|
||||
current_command.insert(current_command.end(), data.begin(), data.end());
|
||||
|
@ -278,44 +295,124 @@ void GDBStub::ExecuteCommand(std::string_view packet, std::vector<DebuggerAction
|
|||
case 'c':
|
||||
actions.push_back(DebuggerAction::Continue);
|
||||
break;
|
||||
case 'Z': {
|
||||
const auto addr_sep{std::find(command.begin(), command.end(), ',') - command.begin() + 1};
|
||||
const size_t addr{static_cast<size_t>(strtoll(command.data() + addr_sep, nullptr, 16))};
|
||||
|
||||
if (system.Memory().IsValidVirtualAddress(addr)) {
|
||||
replaced_instructions[addr] = system.Memory().Read32(addr);
|
||||
system.Memory().Write32(addr, arch->BreakpointInstruction());
|
||||
system.InvalidateCpuInstructionCacheRange(addr, sizeof(u32));
|
||||
|
||||
SendReply(GDB_STUB_REPLY_OK);
|
||||
} else {
|
||||
SendReply(GDB_STUB_REPLY_ERR);
|
||||
}
|
||||
case 'Z':
|
||||
HandleBreakpointInsert(command);
|
||||
break;
|
||||
}
|
||||
case 'z': {
|
||||
const auto addr_sep{std::find(command.begin(), command.end(), ',') - command.begin() + 1};
|
||||
const size_t addr{static_cast<size_t>(strtoll(command.data() + addr_sep, nullptr, 16))};
|
||||
|
||||
const auto orig_insn{replaced_instructions.find(addr)};
|
||||
if (system.Memory().IsValidVirtualAddress(addr) &&
|
||||
orig_insn != replaced_instructions.end()) {
|
||||
system.Memory().Write32(addr, orig_insn->second);
|
||||
system.InvalidateCpuInstructionCacheRange(addr, sizeof(u32));
|
||||
replaced_instructions.erase(addr);
|
||||
|
||||
SendReply(GDB_STUB_REPLY_OK);
|
||||
} else {
|
||||
SendReply(GDB_STUB_REPLY_ERR);
|
||||
}
|
||||
case 'z':
|
||||
HandleBreakpointRemove(command);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
SendReply(GDB_STUB_REPLY_EMPTY);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
enum class BreakpointType {
|
||||
Software = 0,
|
||||
Hardware = 1,
|
||||
WriteWatch = 2,
|
||||
ReadWatch = 3,
|
||||
AccessWatch = 4,
|
||||
};
|
||||
|
||||
void GDBStub::HandleBreakpointInsert(std::string_view command) {
|
||||
const auto type{static_cast<BreakpointType>(strtoll(command.data(), nullptr, 16))};
|
||||
const auto addr_sep{std::find(command.begin(), command.end(), ',') - command.begin() + 1};
|
||||
const auto size_sep{std::find(command.begin() + addr_sep, command.end(), ',') -
|
||||
command.begin() + 1};
|
||||
const size_t addr{static_cast<size_t>(strtoll(command.data() + addr_sep, nullptr, 16))};
|
||||
const size_t size{static_cast<size_t>(strtoll(command.data() + size_sep, nullptr, 16))};
|
||||
|
||||
if (!system.Memory().IsValidVirtualAddressRange(addr, size)) {
|
||||
SendReply(GDB_STUB_REPLY_ERR);
|
||||
return;
|
||||
}
|
||||
|
||||
bool success{};
|
||||
|
||||
switch (type) {
|
||||
case BreakpointType::Software:
|
||||
replaced_instructions[addr] = system.Memory().Read32(addr);
|
||||
system.Memory().Write32(addr, arch->BreakpointInstruction());
|
||||
system.InvalidateCpuInstructionCacheRange(addr, sizeof(u32));
|
||||
success = true;
|
||||
break;
|
||||
case BreakpointType::WriteWatch:
|
||||
success = system.CurrentProcess()->InsertWatchpoint(system, addr, size,
|
||||
Kernel::DebugWatchpointType::Write);
|
||||
break;
|
||||
case BreakpointType::ReadWatch:
|
||||
success = system.CurrentProcess()->InsertWatchpoint(system, addr, size,
|
||||
Kernel::DebugWatchpointType::Read);
|
||||
break;
|
||||
case BreakpointType::AccessWatch:
|
||||
success = system.CurrentProcess()->InsertWatchpoint(
|
||||
system, addr, size, Kernel::DebugWatchpointType::ReadOrWrite);
|
||||
break;
|
||||
case BreakpointType::Hardware:
|
||||
default:
|
||||
SendReply(GDB_STUB_REPLY_EMPTY);
|
||||
return;
|
||||
}
|
||||
|
||||
if (success) {
|
||||
SendReply(GDB_STUB_REPLY_OK);
|
||||
} else {
|
||||
SendReply(GDB_STUB_REPLY_ERR);
|
||||
}
|
||||
}
|
||||
|
||||
void GDBStub::HandleBreakpointRemove(std::string_view command) {
|
||||
const auto type{static_cast<BreakpointType>(strtoll(command.data(), nullptr, 16))};
|
||||
const auto addr_sep{std::find(command.begin(), command.end(), ',') - command.begin() + 1};
|
||||
const auto size_sep{std::find(command.begin() + addr_sep, command.end(), ',') -
|
||||
command.begin() + 1};
|
||||
const size_t addr{static_cast<size_t>(strtoll(command.data() + addr_sep, nullptr, 16))};
|
||||
const size_t size{static_cast<size_t>(strtoll(command.data() + size_sep, nullptr, 16))};
|
||||
|
||||
if (!system.Memory().IsValidVirtualAddressRange(addr, size)) {
|
||||
SendReply(GDB_STUB_REPLY_ERR);
|
||||
return;
|
||||
}
|
||||
|
||||
bool success{};
|
||||
|
||||
switch (type) {
|
||||
case BreakpointType::Software: {
|
||||
const auto orig_insn{replaced_instructions.find(addr)};
|
||||
if (orig_insn != replaced_instructions.end()) {
|
||||
system.Memory().Write32(addr, orig_insn->second);
|
||||
system.InvalidateCpuInstructionCacheRange(addr, sizeof(u32));
|
||||
replaced_instructions.erase(addr);
|
||||
success = true;
|
||||
}
|
||||
break;
|
||||
}
|
||||
case BreakpointType::WriteWatch:
|
||||
success = system.CurrentProcess()->RemoveWatchpoint(system, addr, size,
|
||||
Kernel::DebugWatchpointType::Write);
|
||||
break;
|
||||
case BreakpointType::ReadWatch:
|
||||
success = system.CurrentProcess()->RemoveWatchpoint(system, addr, size,
|
||||
Kernel::DebugWatchpointType::Read);
|
||||
break;
|
||||
case BreakpointType::AccessWatch:
|
||||
success = system.CurrentProcess()->RemoveWatchpoint(
|
||||
system, addr, size, Kernel::DebugWatchpointType::ReadOrWrite);
|
||||
break;
|
||||
case BreakpointType::Hardware:
|
||||
default:
|
||||
SendReply(GDB_STUB_REPLY_EMPTY);
|
||||
return;
|
||||
}
|
||||
|
||||
if (success) {
|
||||
SendReply(GDB_STUB_REPLY_OK);
|
||||
} else {
|
||||
SendReply(GDB_STUB_REPLY_ERR);
|
||||
}
|
||||
}
|
||||
|
||||
// Structure offsets are from Atmosphere
|
||||
// See osdbg_thread_local_region.os.horizon.hpp and osdbg_thread_type.os.horizon.hpp
|
||||
|
||||
|
|
|
@ -24,6 +24,7 @@ public:
|
|||
void Connected() override;
|
||||
void Stopped(Kernel::KThread* thread) override;
|
||||
void ShuttingDown() override;
|
||||
void Watchpoint(Kernel::KThread* thread, const Kernel::DebugWatchpoint& watch) override;
|
||||
std::vector<DebuggerAction> ClientData(std::span<const u8> data) override;
|
||||
|
||||
private:
|
||||
|
@ -31,6 +32,8 @@ private:
|
|||
void ExecuteCommand(std::string_view packet, std::vector<DebuggerAction>& actions);
|
||||
void HandleVCont(std::string_view command, std::vector<DebuggerAction>& actions);
|
||||
void HandleQuery(std::string_view command);
|
||||
void HandleBreakpointInsert(std::string_view command);
|
||||
void HandleBreakpointRemove(std::string_view command);
|
||||
std::vector<char>::const_iterator CommandEnd() const;
|
||||
std::optional<std::string> DetachCommand();
|
||||
Kernel::KThread* GetThreadByID(u64 thread_id);
|
||||
|
|
|
@ -25,6 +25,9 @@ constexpr std::array<s32, Common::BitSize<u64>()> VirtualToPhysicalCoreMap{
|
|||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3,
|
||||
};
|
||||
|
||||
// Cortex-A57 supports 4 memory watchpoints
|
||||
constexpr u64 NUM_WATCHPOINTS = 4;
|
||||
|
||||
} // namespace Hardware
|
||||
|
||||
} // namespace Core
|
||||
|
|
|
@ -579,6 +579,52 @@ ResultCode KProcess::DeleteThreadLocalRegion(VAddr addr) {
|
|||
return ResultSuccess;
|
||||
}
|
||||
|
||||
bool KProcess::InsertWatchpoint(Core::System& system, VAddr addr, u64 size,
|
||||
DebugWatchpointType type) {
|
||||
const auto watch{std::find_if(watchpoints.begin(), watchpoints.end(), [&](const auto& wp) {
|
||||
return wp.type == DebugWatchpointType::None;
|
||||
})};
|
||||
|
||||
if (watch == watchpoints.end()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
watch->start_address = addr;
|
||||
watch->end_address = addr + size;
|
||||
watch->type = type;
|
||||
|
||||
for (VAddr page = Common::AlignDown(addr, PageSize); page < addr + size; page += PageSize) {
|
||||
debug_page_refcounts[page]++;
|
||||
system.Memory().MarkRegionDebug(page, PageSize, true);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool KProcess::RemoveWatchpoint(Core::System& system, VAddr addr, u64 size,
|
||||
DebugWatchpointType type) {
|
||||
const auto watch{std::find_if(watchpoints.begin(), watchpoints.end(), [&](const auto& wp) {
|
||||
return wp.start_address == addr && wp.end_address == addr + size && wp.type == type;
|
||||
})};
|
||||
|
||||
if (watch == watchpoints.end()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
watch->start_address = 0;
|
||||
watch->end_address = 0;
|
||||
watch->type = DebugWatchpointType::None;
|
||||
|
||||
for (VAddr page = Common::AlignDown(addr, PageSize); page < addr + size; page += PageSize) {
|
||||
debug_page_refcounts[page]--;
|
||||
if (!debug_page_refcounts[page]) {
|
||||
system.Memory().MarkRegionDebug(page, PageSize, false);
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void KProcess::LoadModule(CodeSet code_set, VAddr base_addr) {
|
||||
const auto ReprotectSegment = [&](const CodeSet::Segment& segment,
|
||||
Svc::MemoryPermission permission) {
|
||||
|
|
|
@ -7,6 +7,7 @@
|
|||
#include <array>
|
||||
#include <cstddef>
|
||||
#include <list>
|
||||
#include <map>
|
||||
#include <string>
|
||||
#include "common/common_types.h"
|
||||
#include "core/hle/kernel/k_address_arbiter.h"
|
||||
|
@ -68,6 +69,20 @@ enum class ProcessActivity : u32 {
|
|||
Paused,
|
||||
};
|
||||
|
||||
enum class DebugWatchpointType : u8 {
|
||||
None = 0,
|
||||
Read = 1 << 0,
|
||||
Write = 1 << 1,
|
||||
ReadOrWrite = Read | Write,
|
||||
};
|
||||
DECLARE_ENUM_FLAG_OPERATORS(DebugWatchpointType);
|
||||
|
||||
struct DebugWatchpoint {
|
||||
VAddr start_address;
|
||||
VAddr end_address;
|
||||
DebugWatchpointType type;
|
||||
};
|
||||
|
||||
class KProcess final : public KAutoObjectWithSlabHeapAndContainer<KProcess, KWorkerTask> {
|
||||
KERNEL_AUTOOBJECT_TRAITS(KProcess, KSynchronizationObject);
|
||||
|
||||
|
@ -374,6 +389,19 @@ public:
|
|||
// Frees a used TLS slot identified by the given address
|
||||
ResultCode DeleteThreadLocalRegion(VAddr addr);
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Debug watchpoint management
|
||||
|
||||
// Attempts to insert a watchpoint into a free slot. Returns false if none are available.
|
||||
bool InsertWatchpoint(Core::System& system, VAddr addr, u64 size, DebugWatchpointType type);
|
||||
|
||||
// Attempts to remove the watchpoint specified by the given parameters.
|
||||
bool RemoveWatchpoint(Core::System& system, VAddr addr, u64 size, DebugWatchpointType type);
|
||||
|
||||
const std::array<DebugWatchpoint, Core::Hardware::NUM_WATCHPOINTS>& GetWatchpoints() const {
|
||||
return watchpoints;
|
||||
}
|
||||
|
||||
private:
|
||||
void PinThread(s32 core_id, KThread* thread) {
|
||||
ASSERT(0 <= core_id && core_id < static_cast<s32>(Core::Hardware::NUM_CPU_CORES));
|
||||
|
@ -478,6 +506,8 @@ private:
|
|||
std::array<KThread*, Core::Hardware::NUM_CPU_CORES> running_threads{};
|
||||
std::array<u64, Core::Hardware::NUM_CPU_CORES> running_thread_idle_counts{};
|
||||
std::array<KThread*, Core::Hardware::NUM_CPU_CORES> pinned_threads{};
|
||||
std::array<DebugWatchpoint, Core::Hardware::NUM_WATCHPOINTS> watchpoints{};
|
||||
std::map<VAddr, u64> debug_page_refcounts;
|
||||
|
||||
KThread* exception_thread{};
|
||||
|
||||
|
|
|
@ -710,6 +710,7 @@ void KScheduler::Reload(KThread* thread) {
|
|||
Core::ARM_Interface& cpu_core = system.ArmInterface(core_id);
|
||||
cpu_core.LoadContext(thread->GetContext32());
|
||||
cpu_core.LoadContext(thread->GetContext64());
|
||||
cpu_core.LoadWatchpointArray(thread->GetOwnerProcess()->GetWatchpoints());
|
||||
cpu_core.SetTlsAddress(thread->GetTLSAddress());
|
||||
cpu_core.SetTPIDR_EL0(thread->GetTPIDR_EL0());
|
||||
cpu_core.ClearExclusiveState();
|
||||
|
|
|
@ -67,6 +67,16 @@ struct Memory::Impl {
|
|||
return system.DeviceMemory().GetPointer(paddr) + vaddr;
|
||||
}
|
||||
|
||||
[[nodiscard]] u8* GetPointerFromDebugMemory(VAddr vaddr) const {
|
||||
const PAddr paddr{current_page_table->backing_addr[vaddr >> PAGE_BITS]};
|
||||
|
||||
if (paddr == 0) {
|
||||
return {};
|
||||
}
|
||||
|
||||
return system.DeviceMemory().GetPointer(paddr) + vaddr;
|
||||
}
|
||||
|
||||
u8 Read8(const VAddr addr) {
|
||||
return Read<u8>(addr);
|
||||
}
|
||||
|
@ -187,6 +197,12 @@ struct Memory::Impl {
|
|||
on_memory(copy_amount, mem_ptr);
|
||||
break;
|
||||
}
|
||||
case Common::PageType::DebugMemory: {
|
||||
DEBUG_ASSERT(pointer);
|
||||
u8* const mem_ptr{GetPointerFromDebugMemory(current_vaddr)};
|
||||
on_memory(copy_amount, mem_ptr);
|
||||
break;
|
||||
}
|
||||
case Common::PageType::RasterizerCachedMemory: {
|
||||
u8* const host_ptr{GetPointerFromRasterizerCachedMemory(current_vaddr)};
|
||||
on_rasterizer(current_vaddr, copy_amount, host_ptr);
|
||||
|
@ -316,6 +332,58 @@ struct Memory::Impl {
|
|||
});
|
||||
}
|
||||
|
||||
void MarkRegionDebug(VAddr vaddr, u64 size, bool debug) {
|
||||
if (vaddr == 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Iterate over a contiguous CPU address space, marking/unmarking the region.
|
||||
// The region is at a granularity of CPU pages.
|
||||
|
||||
const u64 num_pages = ((vaddr + size - 1) >> PAGE_BITS) - (vaddr >> PAGE_BITS) + 1;
|
||||
for (u64 i = 0; i < num_pages; ++i, vaddr += PAGE_SIZE) {
|
||||
const Common::PageType page_type{
|
||||
current_page_table->pointers[vaddr >> PAGE_BITS].Type()};
|
||||
if (debug) {
|
||||
// Switch page type to debug if now debug
|
||||
switch (page_type) {
|
||||
case Common::PageType::Unmapped:
|
||||
ASSERT_MSG(false, "Attempted to mark unmapped pages as debug");
|
||||
break;
|
||||
case Common::PageType::RasterizerCachedMemory:
|
||||
case Common::PageType::DebugMemory:
|
||||
// Page is already marked.
|
||||
break;
|
||||
case Common::PageType::Memory:
|
||||
current_page_table->pointers[vaddr >> PAGE_BITS].Store(
|
||||
nullptr, Common::PageType::DebugMemory);
|
||||
break;
|
||||
default:
|
||||
UNREACHABLE();
|
||||
}
|
||||
} else {
|
||||
// Switch page type to non-debug if now non-debug
|
||||
switch (page_type) {
|
||||
case Common::PageType::Unmapped:
|
||||
ASSERT_MSG(false, "Attempted to mark unmapped pages as non-debug");
|
||||
break;
|
||||
case Common::PageType::RasterizerCachedMemory:
|
||||
case Common::PageType::Memory:
|
||||
// Don't mess with already non-debug or rasterizer memory.
|
||||
break;
|
||||
case Common::PageType::DebugMemory: {
|
||||
u8* const pointer{GetPointerFromDebugMemory(vaddr & ~PAGE_MASK)};
|
||||
current_page_table->pointers[vaddr >> PAGE_BITS].Store(
|
||||
pointer - (vaddr & ~PAGE_MASK), Common::PageType::Memory);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
UNREACHABLE();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void RasterizerMarkRegionCached(VAddr vaddr, u64 size, bool cached) {
|
||||
if (vaddr == 0) {
|
||||
return;
|
||||
|
@ -342,6 +410,7 @@ struct Memory::Impl {
|
|||
// It is not necessary for a process to have this region mapped into its address
|
||||
// space, for example, a system module need not have a VRAM mapping.
|
||||
break;
|
||||
case Common::PageType::DebugMemory:
|
||||
case Common::PageType::Memory:
|
||||
current_page_table->pointers[vaddr >> PAGE_BITS].Store(
|
||||
nullptr, Common::PageType::RasterizerCachedMemory);
|
||||
|
@ -360,6 +429,7 @@ struct Memory::Impl {
|
|||
// It is not necessary for a process to have this region mapped into its address
|
||||
// space, for example, a system module need not have a VRAM mapping.
|
||||
break;
|
||||
case Common::PageType::DebugMemory:
|
||||
case Common::PageType::Memory:
|
||||
// There can be more than one GPU region mapped per CPU region, so it's common
|
||||
// that this area is already unmarked as cached.
|
||||
|
@ -460,6 +530,8 @@ struct Memory::Impl {
|
|||
case Common::PageType::Memory:
|
||||
ASSERT_MSG(false, "Mapped memory page without a pointer @ 0x{:016X}", vaddr);
|
||||
return nullptr;
|
||||
case Common::PageType::DebugMemory:
|
||||
return GetPointerFromDebugMemory(vaddr);
|
||||
case Common::PageType::RasterizerCachedMemory: {
|
||||
u8* const host_ptr{GetPointerFromRasterizerCachedMemory(vaddr)};
|
||||
on_rasterizer();
|
||||
|
@ -596,7 +668,8 @@ bool Memory::IsValidVirtualAddress(const VAddr vaddr) const {
|
|||
return false;
|
||||
}
|
||||
const auto [pointer, type] = page_table.pointers[page].PointerType();
|
||||
return pointer != nullptr || type == Common::PageType::RasterizerCachedMemory;
|
||||
return pointer != nullptr || type == Common::PageType::RasterizerCachedMemory ||
|
||||
type == Common::PageType::DebugMemory;
|
||||
}
|
||||
|
||||
bool Memory::IsValidVirtualAddressRange(VAddr base, u64 size) const {
|
||||
|
@ -716,4 +789,8 @@ void Memory::RasterizerMarkRegionCached(VAddr vaddr, u64 size, bool cached) {
|
|||
impl->RasterizerMarkRegionCached(vaddr, size, cached);
|
||||
}
|
||||
|
||||
void Memory::MarkRegionDebug(VAddr vaddr, u64 size, bool debug) {
|
||||
impl->MarkRegionDebug(vaddr, size, debug);
|
||||
}
|
||||
|
||||
} // namespace Core::Memory
|
||||
|
|
|
@ -447,6 +447,17 @@ public:
|
|||
*/
|
||||
void RasterizerMarkRegionCached(VAddr vaddr, u64 size, bool cached);
|
||||
|
||||
/**
|
||||
* Marks each page within the specified address range as debug or non-debug.
|
||||
* Debug addresses are not accessible from fastmem pointers.
|
||||
*
|
||||
* @param vaddr The virtual address indicating the start of the address range.
|
||||
* @param size The size of the address range in bytes.
|
||||
* @param debug Whether or not any pages within the address range should be
|
||||
* marked as debug or non-debug.
|
||||
*/
|
||||
void MarkRegionDebug(VAddr vaddr, u64 size, bool debug);
|
||||
|
||||
private:
|
||||
Core::System& system;
|
||||
|
||||
|
|
Loading…
Reference in a new issue