From 92a647d0ae2057afaa9a75ffb12e455d42f0b360 Mon Sep 17 00:00:00 2001 From: pineappleEA Date: Mon, 29 Mar 2021 00:03:19 +0200 Subject: [PATCH] early-access version 1536 --- README.md | 2 +- .../xbyak/.github/workflows/main.yml | 11 ++ externals/dynarmic/externals/xbyak/.gitignore | 1 + .../dynarmic/externals/xbyak/CMakeLists.txt | 46 ++++- .../externals/xbyak/cmake/config.cmake.in | 1 + .../externals/xbyak/gen/gen_avx512.cpp | 6 - .../dynarmic/externals/xbyak/gen/gen_code.cpp | 18 ++ externals/dynarmic/externals/xbyak/readme.md | 19 +- externals/dynarmic/externals/xbyak/readme.txt | 8 +- .../externals/xbyak/sample/test_util.cpp | 13 +- .../dynarmic/externals/xbyak/test/make_nm.cpp | 1 + .../dynarmic/externals/xbyak/test/misc.cpp | 28 +++ .../externals/xbyak/test/test_address.sh | 2 +- .../dynarmic/externals/xbyak/test/test_avx.sh | 51 ++--- .../externals/xbyak/test/test_avx512.sh | 31 ++-- .../dynarmic/externals/xbyak/test/test_nm.sh | 71 +++---- .../dynarmic/externals/xbyak/xbyak/xbyak.h | 46 ++++- .../externals/xbyak/xbyak/xbyak_mnemonic.h | 10 +- .../externals/xbyak/xbyak/xbyak_util.h | 46 +++-- externals/dynarmic/src/CMakeLists.txt | 1 + .../src/backend/x64/emit_x64_vector.cpp | 62 +++---- .../src/frontend/A32/decoder/thumb32.inc | 79 ++++---- ...data_processing_plain_binary_immediate.cpp | 24 +++ .../translate/impl/thumb32_load_halfword.cpp | 130 +++++++++++++ .../impl/thumb32_load_store_multiple.cpp | 150 +++++++++++++++ .../A32/translate/impl/thumb32_load_word.cpp | 123 +++++++++++++ .../impl/thumb32_store_single_data_item.cpp | 174 ++++++++++++++++++ .../A32/translate/impl/translate_thumb.h | 41 +++++ .../dynarmic/src/frontend/A64/decoder/a64.inc | 6 + 29 files changed, 1017 insertions(+), 184 deletions(-) create mode 100755 externals/dynarmic/externals/xbyak/.github/workflows/main.yml create mode 100755 externals/dynarmic/externals/xbyak/.gitignore create mode 100755 externals/dynarmic/externals/xbyak/cmake/config.cmake.in create mode 100755 externals/dynarmic/src/frontend/A32/translate/impl/thumb32_load_store_multiple.cpp diff --git a/README.md b/README.md index 7e0461acb..089c5563c 100755 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ yuzu emulator early access ============= -This is the source code for early-access 1535. +This is the source code for early-access 1536. ## Legal Notice diff --git a/externals/dynarmic/externals/xbyak/.github/workflows/main.yml b/externals/dynarmic/externals/xbyak/.github/workflows/main.yml new file mode 100755 index 000000000..3fad14220 --- /dev/null +++ b/externals/dynarmic/externals/xbyak/.github/workflows/main.yml @@ -0,0 +1,11 @@ +name: test +on: [push] + +jobs: + build: + name: test + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + - run: sudo apt install nasm yasm g++-multilib tcsh + - run: make test diff --git a/externals/dynarmic/externals/xbyak/.gitignore b/externals/dynarmic/externals/xbyak/.gitignore new file mode 100755 index 000000000..24b0b1de5 --- /dev/null +++ b/externals/dynarmic/externals/xbyak/.gitignore @@ -0,0 +1 @@ +/build* # cmake diff --git a/externals/dynarmic/externals/xbyak/CMakeLists.txt b/externals/dynarmic/externals/xbyak/CMakeLists.txt index be131b144..f2c54f141 100755 --- a/externals/dynarmic/externals/xbyak/CMakeLists.txt +++ b/externals/dynarmic/externals/xbyak/CMakeLists.txt @@ -1,6 +1,46 @@ -cmake_minimum_required(VERSION 2.6) -project(xbyak) +cmake_minimum_required(VERSION 2.6...3.0.2) + +project(xbyak CXX) file(GLOB headers xbyak/*.h) -install(FILES ${headers} DESTINATION include/xbyak) +if (DEFINED CMAKE_VERSION AND CMAKE_VERSION VERSION_GREATER_EQUAL 3.0.2) + include(GNUInstallDirs) + add_library(${PROJECT_NAME} INTERFACE) + add_library(${PROJECT_NAME}::${PROJECT_NAME} ALIAS ${PROJECT_NAME}) + + target_include_directories( + ${PROJECT_NAME} INTERFACE + "$" + "$" + ) + + install( + TARGETS ${PROJECT_NAME} + EXPORT ${PROJECT_NAME}-targets + ) + + configure_file( + cmake/config.cmake.in + ${PROJECT_NAME}Config.cmake + @ONLY + ) + + install( + FILES ${CMAKE_CURRENT_BINARY_DIR}/${PROJECT_NAME}Config.cmake + DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/${PROJECT_NAME} + ) + + install( + EXPORT ${PROJECT_NAME}-targets + NAMESPACE ${PROJECT_NAME}:: + DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/${PROJECT_NAME} + ) +elseif(NOT DEFINED CMAKE_INSTALL_INCLUDEDIR) + set(CMAKE_INSTALL_INCLUDEDIR "include") +endif() + +install( + FILES ${headers} + DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/xbyak +) diff --git a/externals/dynarmic/externals/xbyak/cmake/config.cmake.in b/externals/dynarmic/externals/xbyak/cmake/config.cmake.in new file mode 100755 index 000000000..f40ebfa88 --- /dev/null +++ b/externals/dynarmic/externals/xbyak/cmake/config.cmake.in @@ -0,0 +1 @@ +include("${CMAKE_CURRENT_LIST_DIR}/@PROJECT_NAME@-targets.cmake") diff --git a/externals/dynarmic/externals/xbyak/gen/gen_avx512.cpp b/externals/dynarmic/externals/xbyak/gen/gen_avx512.cpp index 250c8d4b5..b2b88c3fd 100755 --- a/externals/dynarmic/externals/xbyak/gen/gen_avx512.cpp +++ b/externals/dynarmic/externals/xbyak/gen/gen_avx512.cpp @@ -363,12 +363,6 @@ void putX_X_XM_IMM() { 0x73, "vpshrdvd", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_SAE_Z | T_B32, false }, { 0x73, "vpshrdvq", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW1 | T_SAE_Z | T_B64, false }, - { 0x50, "vpdpbusd", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_SAE_Z | T_B32, false }, - { 0x51, "vpdpbusds", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_SAE_Z | T_B32, false }, - - { 0x52, "vpdpwssd", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_SAE_Z | T_B32, false }, - { 0x53, "vpdpwssds", T_66 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_SAE_Z | T_B32, false }, - { 0x72, "vcvtne2ps2bf16", T_F2 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_SAE_Z | T_B32, false }, { 0x52, "vdpbf16ps", T_F3 | T_0F38 | T_YMM | T_MUST_EVEX | T_EW0 | T_SAE_Z | T_B32, false }, }; diff --git a/externals/dynarmic/externals/xbyak/gen/gen_code.cpp b/externals/dynarmic/externals/xbyak/gen/gen_code.cpp index ba7dbf619..90f296725 100755 --- a/externals/dynarmic/externals/xbyak/gen/gen_code.cpp +++ b/externals/dynarmic/externals/xbyak/gen/gen_code.cpp @@ -1729,6 +1729,24 @@ void put() printf("void %s(const Xmm& x1, const Address& addr, const Xmm& x2) { opGather(x1, addr, x2, T_0F38 | T_66 | T_YMM | T_VSIB | T_W%d, 0x%x, %d); }\n", p.name, p.w, p.code, p.mode); } } + // vnni + { + const struct Tbl { + uint8_t code; + const char *name; + int type; + } tbl[] = { + { 0x50, "vpdpbusd", T_66 | T_0F38 | T_YMM | T_EW0 | T_SAE_Z | T_B32}, + { 0x51, "vpdpbusds", T_66 | T_0F38 | T_YMM | T_EW0 | T_SAE_Z | T_B32}, + { 0x52, "vpdpwssd", T_66 | T_0F38 | T_YMM | T_EW0 | T_SAE_Z | T_B32}, + { 0x53, "vpdpwssds", T_66 | T_0F38 | T_YMM | T_EW0 | T_SAE_Z | T_B32}, + }; + for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { + const Tbl *p = &tbl[i]; + std::string type = type2String(p->type); + printf("void %s(const Xmm& x1, const Xmm& x2, const Operand& op, PreferredEncoding encoding = DefaultEncoding) { opVnni(x1, x2, op, %s, 0x%02X, encoding); }\n", p->name, type.c_str(), p->code); + } + } } void put32() diff --git a/externals/dynarmic/externals/xbyak/readme.md b/externals/dynarmic/externals/xbyak/readme.md index 6caaa51a3..d9aaa6aef 100755 --- a/externals/dynarmic/externals/xbyak/readme.md +++ b/externals/dynarmic/externals/xbyak/readme.md @@ -1,11 +1,14 @@ -[![Build Status](https://travis-ci.org/herumi/xbyak.png)](https://travis-ci.org/herumi/xbyak) +[![Build Status](https://github.com/herumi/xbyak/actions/workflows/main.yml/badge.svg)](https://github.com/herumi/xbyak/actions/workflows/main.yml) -# Xbyak 5.97 ; JIT assembler for x86(IA32), x64(AMD64, x86-64) by C++ +# Xbyak 5.991 ; JIT assembler for x86(IA32), x64(AMD64, x86-64) by C++ ## Abstract Xbyak is a C++ header library that enables dynamically to assemble x86(IA32), x64(AMD64, x86-64) mnemonic. +The pronunciation of Xbyak is `kəi-bja-k`. +It is named from a Japanese word [開闢](https://translate.google.com/?hl=ja&sl=ja&tl=en&text=%E9%96%8B%E9%97%A2&op=translate), which means the beginning of the world. + ## Feature * header file only * Intel/MASM like syntax @@ -16,6 +19,7 @@ Use `and_()`, `or_()`, ... instead of `and()`, `or()`. If you want to use them, then specify `-fno-operator-names` option to gcc/clang. ### News +- vnni instructions such as vpdpbusd supports vex encoding. - (break backward compatibility) `push(byte, imm)` (resp. `push(word, imm)`) forces to cast `imm` to 8(resp. 16) bit. - (Windows) `#include ` has been removed from xbyak.h, so add it explicitly if you need it. - support exception-less mode see. [Exception-less mode](#exception-less-mode) @@ -154,6 +158,10 @@ vcvtpd2dq xmm19, [eax+32]{1to4} --> vcvtpd2dq(xmm19, yword_b [eax+32]); vfpclassps k5{k3}, zword [rax+64], 5 --> vfpclassps(k5|k3, zword [rax+64], 5); // specify m512 vfpclasspd k5{k3}, [rax+64]{1to2}, 5 --> vfpclasspd(k5|k3, xword_b [rax+64], 5); // broadcast 64-bit to 128-bit vfpclassps k5{k3}, [rax+64]{1to4}, 5 --> vfpclassps(k5|k3, yword_b [rax+64], 5); // broadcast 64-bit to 256-bit + +vpdpbusd(xm0, xm1, xm2); // default encoding is EVEX +vpdpbusd(xm0, xm1, xm2, EvexEncoding); // same as the above +vpdpbusd(xm0, xm1, xm2, VexEncoding); // VEX encoding ``` ### Remark * `k1`, ..., `k7` are opmask registers. @@ -339,9 +347,9 @@ public: ## User allocated memory -You can make jit code on prepaired memory. +You can make jit code on prepared memory. -Call `setProtectModeRE` yourself to change memory mode if using the prepaired memory. +Call `setProtectModeRE` yourself to change memory mode if using the prepared memory. ``` uint8_t alignas(4096) buf[8192]; // C++11 or later @@ -438,6 +446,9 @@ modified new BSD License http://opensource.org/licenses/BSD-3-Clause ## History +* 2020/Nov/16 ver 5.991 disable constexpr for gcc-5 with -std=c++-14 +* 2020/Oct/19 ver 5.99 support VNNI instructions(Thanks to akharito) +* 2020/Oct/17 ver 5.98 support the form of [scale * reg] * 2020/Sep/08 ver 5.97 replace uint32 with uint32_t etc. * 2020/Aug/28 ver 5.95 some constructors of register classes support constexpr if C++14 or later * 2020/Aug/04 ver 5.941 `CodeGenerator::reset()` calls `ClearError()`. diff --git a/externals/dynarmic/externals/xbyak/readme.txt b/externals/dynarmic/externals/xbyak/readme.txt index bb30d0828..d39a1b9dd 100755 --- a/externals/dynarmic/externals/xbyak/readme.txt +++ b/externals/dynarmic/externals/xbyak/readme.txt @@ -1,5 +1,5 @@ - C++用x86(IA-32), x64(AMD64, x86-64) JITアセンブラ Xbyak 5.97 + C++用x86(IA-32), x64(AMD64, x86-64) JITアセンブラ Xbyak 5.991 ----------------------------------------------------------------------------- ◎概要 @@ -163,6 +163,9 @@ vfpclassps k5{k3}, zword [rax+64], 5 --> vfpclassps(k5|k3, zword [rax+64], 5) vfpclasspd k5{k3}, [rax+64]{1to2}, 5 --> vfpclasspd(k5|k3, xword_b [rax+64], 5); // broadcast 64-bit to 128-bit vfpclassps k5{k3}, [rax+64]{1to4}, 5 --> vfpclassps(k5|k3, xword_b [rax+64], 5); // broadcast 64-bit to 256-bit +vpdpbusd(xm0, xm1, xm2); // default encoding is EVEX +vpdpbusd(xm0, xm1, xm2, EvexEncoding); // same as the above +vpdpbusd(xm0, xm1, xm2, VexEncoding); // VEX encoding 注意 * k1, ..., k7 は新しいopmaskレジスタです。 @@ -379,6 +382,9 @@ sample/{echo,hello}.bfは http://www.kmonos.net/alang/etc/brainfuck.php から ----------------------------------------------------------------------------- ◎履歴 +2020/11/16 ver 5.991 g++-5のC++14でconstexpr機能の抑制 +2020/10/19 ver 5.99 VNNI命令サポート(Thanks to akharito) +2020/10/17 ver 5.98 [scale * reg]のサポート 2020/09/08 ver 5.97 uint32などをuint32_tに置換 2020/08/28 ver 5.95 レジスタクラスのコンストラクタがconstexprに対応(C++14以降) 2020/08/04 ver 5.941 `CodeGenerator::reset()`が`ClearError()`を呼ぶように変更 diff --git a/externals/dynarmic/externals/xbyak/sample/test_util.cpp b/externals/dynarmic/externals/xbyak/sample/test_util.cpp index afb6e5a40..18dcce501 100755 --- a/externals/dynarmic/externals/xbyak/sample/test_util.cpp +++ b/externals/dynarmic/externals/xbyak/sample/test_util.cpp @@ -1,12 +1,13 @@ #include -#define XBYAK_NO_OP_NAMES #include "xbyak/xbyak_util.h" #define NUM_OF_ARRAY(x) (sizeof(x) / sizeof(x[0])) struct PopCountTest : public Xbyak::CodeGenerator { PopCountTest(int n) + : Xbyak::CodeGenerator(4096, Xbyak::DontSetProtectRWE) { +ret(); mov(eax, n); popcnt(eax, eax); ret(); @@ -80,6 +81,10 @@ void putCPUinfo() { Cpu::tAVX512_VPOPCNTDQ, "avx512_vpopcntdq" }, { Cpu::tAVX512_BF16, "avx512_bf16" }, { Cpu::tAVX512_VP2INTERSECT, "avx512_vp2intersect" }, + { Cpu::tAMX_TILE, "amx(tile)" }, + { Cpu::tAMX_INT8, "amx(int8)" }, + { Cpu::tAMX_BF16, "amx(bf16)" }, + { Cpu::tAVX_VNNI, "avx_vnni" }, }; for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { if (cpu.has(tbl[i].type)) printf(" %s", tbl[i].str); @@ -88,12 +93,16 @@ void putCPUinfo() if (cpu.has(Cpu::tPOPCNT)) { const int n = 0x12345678; // bitcount = 13 const int ok = 13; - int r = PopCountTest(n).getCode()(); + PopCountTest code(n); + code.setProtectModeRE(); + int (*f)() = code.getCode(); + int r = f(); if (r == ok) { puts("popcnt ok"); } else { printf("popcnt ng %d %d\n", r, ok); } + code.setProtectModeRW(); } /* displayFamily displayModel diff --git a/externals/dynarmic/externals/xbyak/test/make_nm.cpp b/externals/dynarmic/externals/xbyak/test/make_nm.cpp index 47eb0237b..494d8ac52 100755 --- a/externals/dynarmic/externals/xbyak/test/make_nm.cpp +++ b/externals/dynarmic/externals/xbyak/test/make_nm.cpp @@ -643,6 +643,7 @@ class Test { puts(isXbyak_ ? "out_(dx, al); dump();" : "out dx, al"); puts(isXbyak_ ? "out_(dx, ax); dump();" : "out dx, ax"); puts(isXbyak_ ? "out_(dx, eax); dump();" : "out dx, eax"); + puts(isXbyak_ ? "lea(eax, ptr[edi + 4 * eax]); dump();" : "lea eax, [edi + 4 * eax]"); } void putJmp() const { diff --git a/externals/dynarmic/externals/xbyak/test/misc.cpp b/externals/dynarmic/externals/xbyak/test/misc.cpp index 2a55ec2e8..2f0413eec 100755 --- a/externals/dynarmic/externals/xbyak/test/misc.cpp +++ b/externals/dynarmic/externals/xbyak/test/misc.cpp @@ -815,4 +815,32 @@ CYBOZU_TEST_AUTO(tileloadd) CYBOZU_TEST_EXCEPTION(c.notSupported(), std::exception); CYBOZU_TEST_EXCEPTION(c.notSupported2(), std::exception); } + +CYBOZU_TEST_AUTO(vnni) +{ + struct Code : Xbyak::CodeGenerator { + Code() + { + // default encoding is EVEX + vpdpbusd(xm0, xm1, xm2); + vpdpbusd(xm0, xm1, xm2, EvexEncoding); // EVEX + vpdpbusd(xm0, xm1, xm2, VexEncoding); // VEX + } + void badVex() + { + vpdpbusd(xm0, xm1, xm31, VexEncoding); + } + } c; + const uint8_t tbl[] = { + 0x62, 0xF2, 0x75, 0x08, 0x50, 0xC2, + 0x62, 0xF2, 0x75, 0x08, 0x50, 0xC2, + 0xC4, 0xE2, 0x71, 0x50, 0xC2, + }; + const size_t n = sizeof(tbl) / sizeof(tbl[0]); + CYBOZU_TEST_EQUAL(c.getSize(), n); + CYBOZU_TEST_EQUAL_ARRAY(c.getCode(), tbl, n); + + CYBOZU_TEST_EXCEPTION(c.badVex(), std::exception); +} + #endif diff --git a/externals/dynarmic/externals/xbyak/test/test_address.sh b/externals/dynarmic/externals/xbyak/test/test_address.sh index d63a4ef55..d283a5f30 100755 --- a/externals/dynarmic/externals/xbyak/test/test_address.sh +++ b/externals/dynarmic/externals/xbyak/test/test_address.sh @@ -12,7 +12,7 @@ g++ $CFLAGS address.cpp -o address ./address $1 > a.asm echo "asm" $EXE -f$OPT3 a.asm -l a.lst -awk '{if (index($3, "-")) { conti=substr($3, 0, length($3) - 1) } else { conti = conti $3; print conti; conti = "" }} ' < a.lst | $FILTER > ok.lst +awk '{printf "%s", sub(/-$/, "", $3) ? $3 : $3 ORS}' a.lst | $FILTER > ok.lst echo "xbyak" ./address $1 jit > nm.cpp diff --git a/externals/dynarmic/externals/xbyak/test/test_avx.sh b/externals/dynarmic/externals/xbyak/test/test_avx.sh index 236f7aa5e..34dc1e55b 100755 --- a/externals/dynarmic/externals/xbyak/test/test_avx.sh +++ b/externals/dynarmic/externals/xbyak/test/test_avx.sh @@ -1,39 +1,44 @@ -#!/bin/tcsh +#!/bin/sh -set FILTER="grep -v warning" +FILTER="grep -v warning" -if ($1 == "Y") then +case $1 in +Y) echo "yasm(32bit)" - set EXE=yasm - set OPT2="-DUSE_YASM -DXBYAK32" - set OPT3=win32 -else if ($1 == "64") then + EXE=yasm + OPT2="-DUSE_YASM -DXBYAK32" + OPT3=win32 + ;; +64) echo "nasm(64bit)" - set EXE=nasm - set OPT2=-DXBYAK64 - set OPT3=win64 - set FILTER=./normalize_prefix -else if ($1 == "Y64") then + EXE=nasm + OPT2=-DXBYAK64 + OPT3=win64 + FILTER=./normalize_prefix + ;; +Y64) echo "yasm(64bit)" - set EXE=yasm - set OPT2="-DUSE_YASM -DXBYAK64" - set OPT3=win64 - set FILTER=./normalize_prefix -else + EXE=yasm + OPT2="-DUSE_YASM -DXBYAK64" + OPT3=win64 + FILTER=./normalize_prefix + ;; +*) echo "nasm(32bit)" - set EXE=nasm - set OPT2=-DXBYAK32 - set OPT3=win32 -endif + EXE=nasm + OPT2=-DXBYAK32 + OPT3=win32 + ;; +esac -set CFLAGS="-Wall -fno-operator-names -I../ $OPT2 -DUSE_AVX" +CFLAGS="-Wall -fno-operator-names -I../ $OPT2 -DUSE_AVX" echo "compile make_nm.cpp" g++ $CFLAGS make_nm.cpp -o make_nm ./make_nm > a.asm echo "asm" $EXE -f$OPT3 a.asm -l a.lst -awk '{if (index($3, "-")) { conti=substr($3, 0, length($3) - 1) } else { conti = conti $3; print conti; conti = "" }} ' < a.lst | $FILTER | grep -v "1+1" > ok.lst +awk '$3 != "1+1" {printf "%s", sub(/-$/, "", $3) ? $3 : $3 ORS}' a.lst | $FILTER > ok.lst echo "xbyak" ./make_nm jit > nm.cpp diff --git a/externals/dynarmic/externals/xbyak/test/test_avx512.sh b/externals/dynarmic/externals/xbyak/test/test_avx512.sh index cce5de0cc..17edfeec7 100755 --- a/externals/dynarmic/externals/xbyak/test/test_avx512.sh +++ b/externals/dynarmic/externals/xbyak/test/test_avx512.sh @@ -1,28 +1,31 @@ -#!/bin/tcsh +#!/bin/sh -set FILTER="grep -v warning" +FILTER="grep -v warning" -if ($1 == "64") then +case $1 in +64) echo "nasm(64bit)" - set EXE=nasm - set OPT2=-DXBYAK64 - set OPT3=win64 - set FILTER=./normalize_prefix -else + EXE=nasm + OPT2=-DXBYAK64 + OPT3=win64 + FILTER=./normalize_prefix + ;; +*) echo "nasm(32bit)" - set EXE=nasm - set OPT2=-DXBYAK32 - set OPT3=win32 -endif + EXE=nasm + OPT2=-DXBYAK32 + OPT3=win32 + ;; +esac -set CFLAGS="-Wall -fno-operator-names -I../ $OPT2 -DUSE_AVX512" +CFLAGS="-Wall -fno-operator-names -I../ $OPT2 -DUSE_AVX512" echo "compile make_512.cpp" g++ $CFLAGS make_512.cpp -o make_512 ./make_512 > a.asm echo "asm" $EXE -f$OPT3 a.asm -l a.lst -awk '{if (index($3, "-")) { conti=substr($3, 0, length($3) - 1) } else { conti = conti $3; print conti; conti = "" }} ' < a.lst | $FILTER > ok.lst +awk '{printf "%s", sub(/-$/, "", $3) ? $3 : $3 ORS}' a.lst | $FILTER > ok.lst echo "xbyak" ./make_512 jit > nm.cpp diff --git a/externals/dynarmic/externals/xbyak/test/test_nm.sh b/externals/dynarmic/externals/xbyak/test/test_nm.sh index 3e328012a..afa2b1eb2 100755 --- a/externals/dynarmic/externals/xbyak/test/test_nm.sh +++ b/externals/dynarmic/externals/xbyak/test/test_nm.sh @@ -1,50 +1,57 @@ -#!/bin/tcsh +#!/bin/sh -set FILTER=cat +FILTER=cat -if ($1 == "Y") then +case $1 in +Y) echo "yasm(32bit)" - set EXE=yasm - set OPT2="-DUSE_YASM -DXBYAK32" - set OPT3=win32 -else if ($1 == "64") then + EXE=yasm + OPT2="-DUSE_YASM -DXBYAK32" + OPT3=win32 + ;; +64) echo "nasm(64bit)" - set EXE=nasm - set OPT2=-DXBYAK64 - set OPT3=win64 - set FILTER=./normalize_prefix -else if ($1 == "Y64") then + EXE=nasm + OPT2=-DXBYAK64 + OPT3=win64 + FILTER=./normalize_prefix + ;; +Y64) echo "yasm(64bit)" - set EXE=yasm - set OPT2="-DUSE_YASM -DXBYAK64" - set OPT3=win64 - set FILTER=./normalize_prefix -else if ($1 == "avx512") then + EXE=yasm + OPT2="-DUSE_YASM -DXBYAK64" + OPT3=win64 + FILTER=./normalize_prefix + ;; +avx512) echo "nasm(64bit) + avx512" - set EXE=nasm - set OPT2="-DXBYAK64 -DUSE_AVX512" - set OPT3=win64 - set FILTER=./normalize_prefix -else if ($1 == "noexcept") then + EXE=nasm + OPT2="-DXBYAK64 -DUSE_AVX512" + OPT3=win64 + FILTER=./normalize_prefix + ;; +noexcept) echo "nasm(32bit) without exception" - set EXE=nasm - set OPT2="-DXBYAK32 -DXBYAK_NO_EXCEPTION" - set OPT3=win32 -else + EXE=nasm + OPT2="-DXBYAK32 -DXBYAK_NO_EXCEPTION" + OPT3=win32 + ;; +*) echo "nasm(32bit)" - set EXE=nasm - set OPT2=-DXBYAK32 - set OPT3=win32 -endif + EXE=nasm + OPT2=-DXBYAK32 + OPT3=win32 + ;; +esac -set CFLAGS="-Wall -fno-operator-names -I../ $OPT2" +CFLAGS="-Wall -fno-operator-names -I../ $OPT2" echo "compile make_nm.cpp with $CFLAGS" g++ $CFLAGS make_nm.cpp -o make_nm ./make_nm > a.asm echo "asm" $EXE -f$OPT3 a.asm -l a.lst -awk '{if (index($3, "-")) { conti=substr($3, 0, length($3) - 1) } else { conti = conti $3; print conti; conti = "" }} ' < a.lst | $FILTER | grep -v "1+1" > ok.lst +awk '$3 != "1+1" {printf "%s", sub(/-$/, "", $3) ? $3 : $3 ORS}' a.lst | $FILTER > ok.lst echo "xbyak" ./make_nm jit > nm.cpp diff --git a/externals/dynarmic/externals/xbyak/xbyak/xbyak.h b/externals/dynarmic/externals/xbyak/xbyak/xbyak.h index 4310455b3..bc0d71f5e 100755 --- a/externals/dynarmic/externals/xbyak/xbyak/xbyak.h +++ b/externals/dynarmic/externals/xbyak/xbyak/xbyak.h @@ -108,7 +108,7 @@ #endif #endif -#if (__cplusplus >= 201103) || (_MSC_VER >= 1800) +#if (__cplusplus >= 201103) || (defined(_MSC_VER) && _MSC_VER >= 1800) #undef XBYAK_TLS #define XBYAK_TLS thread_local #define XBYAK_VARIADIC_TEMPLATE @@ -117,8 +117,11 @@ #define XBYAK_NOEXCEPT throw() #endif -#if (__cplusplus >= 201402L) || (_MSC_VER >= 1910) // Visual Studio 2017 version 15.0 - #define XBYAK_CONSTEXPR constexpr // require c++14 or later +// require c++14 or later +// Visual Studio 2017 version 15.0 or later +// g++-6 or later +#if ((__cplusplus >= 201402L) && !(!defined(__clang__) && defined(__GNUC__) && (__GNUC__ <= 5))) || (defined(_MSC_VER) && _MSC_VER >= 1910) + #define XBYAK_CONSTEXPR constexpr #else #define XBYAK_CONSTEXPR #endif @@ -135,7 +138,7 @@ namespace Xbyak { enum { DEFAULT_MAX_CODE_SIZE = 4096, - VERSION = 0x5970 /* 0xABCD = A.BC(D) */ + VERSION = 0x5991 /* 0xABCD = A.BC(D) */ }; #ifndef MIE_INTEGER_TYPE_DEFINED @@ -413,16 +416,16 @@ public: { const size_t alignedSizeM1 = inner::ALIGN_PAGE_SIZE - 1; size = (size + alignedSizeM1) & ~alignedSizeM1; -#if defined(XBYAK_USE_MAP_JIT) +#if defined(MAP_ANONYMOUS) int mode = MAP_PRIVATE | MAP_ANONYMOUS; - const int mojaveVersion = 18; - if (util::getMacOsVersion() >= mojaveVersion) mode |= MAP_JIT; -#elif defined(MAP_ANONYMOUS) - const int mode = MAP_PRIVATE | MAP_ANONYMOUS; #elif defined(MAP_ANON) - const int mode = MAP_PRIVATE | MAP_ANON; + int mode = MAP_PRIVATE | MAP_ANON; #else #error "not supported" +#endif +#if defined(XBYAK_USE_MAP_JIT) + const int mojaveVersion = 18; + if (util::getMacOsVersion() >= mojaveVersion) mode |= MAP_JIT; #endif void *p = mmap(NULL, size, PROT_READ | PROT_WRITE, mode, -1, 0); if (p == MAP_FAILED) XBYAK_THROW_RET(ERR_CANT_ALLOC, 0) @@ -924,6 +927,10 @@ inline RegExp operator*(const Reg& r, int scale) { return RegExp(r, scale); } +inline RegExp operator*(int scale, const Reg& r) +{ + return r * scale; +} inline RegExp operator-(const RegExp& e, size_t disp) { RegExp ret = e; @@ -1539,6 +1546,12 @@ inline const uint8_t* Label::getAddress() const return mgr->getCode() + offset; } +typedef enum { + DefaultEncoding, + VexEncoding, + EvexEncoding +} PreferredEncoding; + class CodeGenerator : public CodeArray { public: enum LabelType { @@ -2293,6 +2306,19 @@ private: if (addr.getRegExp().getIndex().getKind() != kind) XBYAK_THROW(ERR_BAD_VSIB_ADDRESSING) opVex(x, 0, addr, type, code); } + void opVnni(const Xmm& x1, const Xmm& x2, const Operand& op, int type, int code0, PreferredEncoding encoding) + { + if (encoding == DefaultEncoding) { + encoding = EvexEncoding; + } + if (encoding == EvexEncoding) { +#ifdef XBYAK_DISABLE_AVX512 + XBYAK_THROW(ERR_EVEX_IS_INVALID) +#endif + type |= T_MUST_EVEX; + } + opAVX_X_X_XM(x1, x2, op, type, code0); + } void opInOut(const Reg& a, const Reg& d, uint8_t code) { if (a.getIdx() == Operand::AL && d.getIdx() == Operand::DX && d.getBit() == 16) { diff --git a/externals/dynarmic/externals/xbyak/xbyak/xbyak_mnemonic.h b/externals/dynarmic/externals/xbyak/xbyak/xbyak_mnemonic.h index 85e8bed5b..5c1ecffbb 100755 --- a/externals/dynarmic/externals/xbyak/xbyak/xbyak_mnemonic.h +++ b/externals/dynarmic/externals/xbyak/xbyak/xbyak_mnemonic.h @@ -1,4 +1,4 @@ -const char *getVersionString() const { return "5.97"; } +const char *getVersionString() const { return "5.991"; } void adc(const Operand& op, uint32_t imm) { opRM_I(op, imm, 0x10, 2); } void adc(const Operand& op1, const Operand& op2) { opRM_RM(op1, op2, 0x10); } void adcx(const Reg32e& reg, const Operand& op) { opGen(reg, op, 0xF6, 0x66, isREG32_REG32orMEM, NONE, 0x38); } @@ -1180,6 +1180,10 @@ void vpcmpgtq(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1 void vpcmpgtw(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F | T_YMM, 0x65); } void vpcmpistri(const Xmm& xm, const Operand& op, uint8_t imm) { opAVX_X_XM_IMM(xm, op, T_66 | T_0F3A, 0x63, imm); } void vpcmpistrm(const Xmm& xm, const Operand& op, uint8_t imm) { opAVX_X_XM_IMM(xm, op, T_66 | T_0F3A, 0x62, imm); } +void vpdpbusd(const Xmm& x1, const Xmm& x2, const Operand& op, PreferredEncoding encoding = DefaultEncoding) { opVnni(x1, x2, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_SAE_Z | T_B32, 0x50, encoding); } +void vpdpbusds(const Xmm& x1, const Xmm& x2, const Operand& op, PreferredEncoding encoding = DefaultEncoding) { opVnni(x1, x2, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_SAE_Z | T_B32, 0x51, encoding); } +void vpdpwssd(const Xmm& x1, const Xmm& x2, const Operand& op, PreferredEncoding encoding = DefaultEncoding) { opVnni(x1, x2, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_SAE_Z | T_B32, 0x52, encoding); } +void vpdpwssds(const Xmm& x1, const Xmm& x2, const Operand& op, PreferredEncoding encoding = DefaultEncoding) { opVnni(x1, x2, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_SAE_Z | T_B32, 0x53, encoding); } void vperm2f128(const Ymm& y1, const Ymm& y2, const Operand& op, uint8_t imm) { if (!(y1.isYMM() && y2.isYMM() && op.isYMEM())) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(y1, &y2, op, T_0F3A | T_66 | T_W0 | T_YMM, 0x06, imm); } void vperm2i128(const Ymm& y1, const Ymm& y2, const Operand& op, uint8_t imm) { if (!(y1.isYMM() && y2.isYMM() && op.isYMEM())) XBYAK_THROW(ERR_BAD_COMBINATION) opVex(y1, &y2, op, T_0F3A | T_66 | T_W0 | T_YMM, 0x46, imm); } void vpermd(const Ymm& y1, const Ymm& y2, const Operand& op) { opAVX_X_X_XM(y1, y2, op, T_66 | T_0F38 | T_W0 | T_EW0 | T_YMM | T_EVEX | T_B32, 0x36); } @@ -1883,10 +1887,6 @@ void vpcompressd(const Operand& op, const Xmm& x) { opAVX_X_XM_IMM(x, op, T_N4 | void vpcompressq(const Operand& op, const Xmm& x) { opAVX_X_XM_IMM(x, op, T_N8 | T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX, 0x8B); } void vpconflictd(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_B32, 0xC4); } void vpconflictq(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66 | T_0F38 | T_EW1 | T_YMM | T_MUST_EVEX | T_B64, 0xC4); } -void vpdpbusd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B32, 0x50); } -void vpdpbusds(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B32, 0x51); } -void vpdpwssd(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B32, 0x52); } -void vpdpwssds(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B32, 0x53); } void vpermb(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX, 0x8D); } void vpermi2b(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX, 0x75); } void vpermi2d(const Xmm& x1, const Xmm& x2, const Operand& op) { opAVX_X_X_XM(x1, x2, op, T_66 | T_0F38 | T_EW0 | T_YMM | T_MUST_EVEX | T_B32, 0x76); } diff --git a/externals/dynarmic/externals/xbyak/xbyak/xbyak_util.h b/externals/dynarmic/externals/xbyak/xbyak/xbyak_util.h index 1516fc33d..6ff9aab83 100755 --- a/externals/dynarmic/externals/xbyak/xbyak/xbyak_util.h +++ b/externals/dynarmic/externals/xbyak/xbyak/xbyak_util.h @@ -368,6 +368,7 @@ public: static const Type tAMX_TILE = uint64_t(1) << 59; static const Type tAMX_INT8 = uint64_t(1) << 60; static const Type tAMX_BF16 = uint64_t(1) << 61; + static const Type tAVX_VNNI = uint64_t(1) << 62; Cpu() : type_(NONE) @@ -389,19 +390,35 @@ public: if (ECX == get32bitAsBE(amd)) { type_ |= tAMD; getCpuid(0x80000001, data); - if (EDX & (1U << 31)) type_ |= t3DN; - if (EDX & (1U << 15)) type_ |= tCMOV; - if (EDX & (1U << 30)) type_ |= tE3DN; - if (EDX & (1U << 22)) type_ |= tMMX2; - if (EDX & (1U << 27)) type_ |= tRDTSCP; + if (EDX & (1U << 31)) { + type_ |= t3DN; + // 3DNow! implies support for PREFETCHW on AMD + type_ |= tPREFETCHW; + } + + if (EDX & (1U << 29)) { + // Long mode implies support for PREFETCHW on AMD + type_ |= tPREFETCHW; + } } if (ECX == get32bitAsBE(intel)) { type_ |= tINTEL; + } + + // Extended flags information + getCpuid(0x80000000, data); + if (EAX >= 0x80000001) { getCpuid(0x80000001, data); + + if (EDX & (1U << 31)) type_ |= t3DN; + if (EDX & (1U << 30)) type_ |= tE3DN; if (EDX & (1U << 27)) type_ |= tRDTSCP; + if (EDX & (1U << 22)) type_ |= tMMX2; + if (EDX & (1U << 15)) type_ |= tCMOV; if (ECX & (1U << 5)) type_ |= tLZCNT; if (ECX & (1U << 8)) type_ |= tPREFETCHW; } + getCpuid(1, data); if (ECX & (1U << 0)) type_ |= tSSE3; if (ECX & (1U << 9)) type_ |= tSSSE3; @@ -426,7 +443,11 @@ public: if ((bv & 6) == 6) { if (ECX & (1U << 28)) type_ |= tAVX; if (ECX & (1U << 12)) type_ |= tFMA; - if (((bv >> 5) & 7) == 7) { + // do *not* check AVX-512 state on macOS because it has on-demand AVX-512 support +#if !defined(__APPLE__) + if (((bv >> 5) & 7) == 7) +#endif + { getCpuidEx(7, 0, data); if (EBX & (1U << 16)) type_ |= tAVX512F; if (type_ & tAVX512F) { @@ -449,16 +470,12 @@ public: if (EDX & (1U << 3)) type_ |= tAVX512_4FMAPS; if (EDX & (1U << 8)) type_ |= tAVX512_VP2INTERSECT; } - // EAX=07H, ECX=1 - getCpuidEx(7, 1, data); - if (type_ & tAVX512F) { - if (EAX & (1U << 5)) type_ |= tAVX512_BF16; - } } } } if (maxNum >= 7) { getCpuidEx(7, 0, data); + const uint32_t maxNumSubLeaves = EAX; if (type_ & tAVX && (EBX & (1U << 5))) type_ |= tAVX2; if (EBX & (1U << 3)) type_ |= tBMI1; if (EBX & (1U << 8)) type_ |= tBMI2; @@ -474,6 +491,13 @@ public: if (EDX & (1U << 24)) type_ |= tAMX_TILE; if (EDX & (1U << 25)) type_ |= tAMX_INT8; if (EDX & (1U << 22)) type_ |= tAMX_BF16; + if (maxNumSubLeaves >= 1) { + getCpuidEx(7, 1, data); + if (EAX & (1U << 4)) type_ |= tAVX_VNNI; + if (type_ & tAVX512F) { + if (EAX & (1U << 5)) type_ |= tAVX512_BF16; + } + } } setFamily(); setNumCores(); diff --git a/externals/dynarmic/src/CMakeLists.txt b/externals/dynarmic/src/CMakeLists.txt index 83cece7bc..949e50ac8 100755 --- a/externals/dynarmic/src/CMakeLists.txt +++ b/externals/dynarmic/src/CMakeLists.txt @@ -163,6 +163,7 @@ if ("A32" IN_LIST DYNARMIC_FRONTENDS) frontend/A32/translate/impl/thumb32_data_processing_shifted_register.cpp frontend/A32/translate/impl/thumb32_load_byte.cpp frontend/A32/translate/impl/thumb32_load_halfword.cpp + frontend/A32/translate/impl/thumb32_load_store_multiple.cpp frontend/A32/translate/impl/thumb32_load_word.cpp frontend/A32/translate/impl/thumb32_long_multiply.cpp frontend/A32/translate/impl/thumb32_misc.cpp diff --git a/externals/dynarmic/src/backend/x64/emit_x64_vector.cpp b/externals/dynarmic/src/backend/x64/emit_x64_vector.cpp index 07f7f1532..15b3326a6 100755 --- a/externals/dynarmic/src/backend/x64/emit_x64_vector.cpp +++ b/externals/dynarmic/src/backend/x64/emit_x64_vector.cpp @@ -440,13 +440,13 @@ void EmitX64::EmitVectorAnd(EmitContext& ctx, IR::Inst* inst) { static void ArithmeticShiftRightByte(EmitContext& ctx, BlockOfCode& code, const Xbyak::Xmm& result, u8 shift_amount) { if (code.HasAVX512_Icelake()) { - // Do a logical shift right upon the 8x8 bit-matrix, but shift in - // `0x80` bytes into the matrix to repeat the most significant bit. - const u64 zero_extend = ~(0xFFFFFFFFFFFFFFFF << (shift_amount * 8)) & 0x8080808080808080; - const u64 shift_matrix = (0x0102040810204080 >> (shift_amount * 8)) | zero_extend; + const u64 shift_matrix = shift_amount < 8 + ? (0x0102040810204080 << (shift_amount * 8)) | (0x8080808080808080 >> (64 - shift_amount * 8)) + : 0x8080808080808080; code.vgf2p8affineqb(result, result, code.MConst(xword_b, shift_matrix), 0); return; } + const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm(); code.punpckhbw(tmp, result); @@ -1465,20 +1465,21 @@ void EmitX64::EmitVectorLogicalShiftLeft8(EmitContext& ctx, IR::Inst* inst) { const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]); const u8 shift_amount = args[1].GetImmediateU8(); - if (shift_amount == 1) { + if (shift_amount == 0) { + // do nothing + } else if (shift_amount >= 8) { + code.pxor(result, result); + } else if (shift_amount == 1) { code.paddb(result, result); - } else if (shift_amount > 0) { - if (code.HasAVX512_Icelake()) { - // Galois 8x8 identity matrix, bit-shifted by the shift-amount - const u64 shift_matrix = 0x0102040810204080 >> (shift_amount * 8); - code.vgf2p8affineqb(result, result, code.MConst(xword_b, shift_matrix), 0); - } else { - const u64 replicand = (0xFFULL << shift_amount) & 0xFF; - const u64 mask = Common::Replicate(replicand, Common::BitSize()); + } else if (code.HasAVX512_Icelake()) { + const u64 shift_matrix = 0x0102040810204080 >> (shift_amount * 8); + code.vgf2p8affineqb(result, result, code.MConst(xword_b, shift_matrix), 0); + } else { + const u64 replicand = (0xFFULL << shift_amount) & 0xFF; + const u64 mask = Common::Replicate(replicand, Common::BitSize()); - code.psllw(result, shift_amount); - code.pand(result, code.MConst(xword, mask, mask)); - } + code.psllw(result, shift_amount); + code.pand(result, code.MConst(xword, mask, mask)); } ctx.reg_alloc.DefineValue(inst, result); @@ -1523,18 +1524,19 @@ void EmitX64::EmitVectorLogicalShiftRight8(EmitContext& ctx, IR::Inst* inst) { const Xbyak::Xmm result = ctx.reg_alloc.UseScratchXmm(args[0]); const u8 shift_amount = args[1].GetImmediateU8(); - if (shift_amount > 0) { - if (code.HasAVX512_Icelake()) { - // Galois 8x8 identity matrix, bit-shifted by the shift-amount - const u64 shift_matrix = 0x0102040810204080 << (shift_amount * 8); - code.vgf2p8affineqb(result, result, code.MConst(xword_b, shift_matrix), 0); - } else { - const u64 replicand = 0xFEULL >> shift_amount; - const u64 mask = Common::Replicate(replicand, Common::BitSize()); + if (shift_amount == 0) { + // Do nothing + } else if (shift_amount >= 8) { + code.pxor(result, result); + } else if (code.HasAVX512_Icelake()) { + const u64 shift_matrix = 0x0102040810204080 << (shift_amount * 8); + code.vgf2p8affineqb(result, result, code.MConst(xword_b, shift_matrix), 0); + } else { + const u64 replicand = 0xFEULL >> shift_amount; + const u64 mask = Common::Replicate(replicand, Common::BitSize()); - code.psrlw(result, shift_amount); - code.pand(result, code.MConst(xword, mask, mask)); - } + code.psrlw(result, shift_amount); + code.pand(result, code.MConst(xword, mask, mask)); } ctx.reg_alloc.DefineValue(inst, result); @@ -2768,12 +2770,8 @@ void EmitX64::EmitVectorReverseBits(EmitContext& ctx, IR::Inst* inst) { const Xbyak::Xmm data = ctx.reg_alloc.UseScratchXmm(args[0]); - if (code.HasAVX512_Icelake() && code.HasSSSE3()) { - // GFNI(vgf2p8affineqb) and SSSE3(pshuf) - // Reverse bits within bytes + if (code.HasAVX512_Icelake()) { code.vgf2p8affineqb(data, data, code.MConst(xword_b, 0x8040201008040201), 0); - // Reverse bytes within vector - code.pshufb(data, code.MConst(xword, 0x0001020304050607, 0x08090a0b0c0d0e0f)); } else { const Xbyak::Xmm high_nibble_reg = ctx.reg_alloc.ScratchXmm(); code.movdqa(high_nibble_reg, code.MConst(xword, 0xF0F0F0F0F0F0F0F0, 0xF0F0F0F0F0F0F0F0)); diff --git a/externals/dynarmic/src/frontend/A32/decoder/thumb32.inc b/externals/dynarmic/src/frontend/A32/decoder/thumb32.inc index bd09a0419..e5def4ca3 100755 --- a/externals/dynarmic/src/frontend/A32/decoder/thumb32.inc +++ b/externals/dynarmic/src/frontend/A32/decoder/thumb32.inc @@ -1,12 +1,12 @@ // Load/Store Multiple //INST(thumb32_SRS_1, "SRS", "1110100000-0--------------------") //INST(thumb32_RFE_2, "RFE", "1110100000-1--------------------") -//INST(thumb32_STMIA, "STMIA/STMEA", "1110100010-0--------------------") -//INST(thumb32_POP, "POP", "1110100010111101----------------") -//INST(thumb32_LDMIA, "LDMIA/LDMFD", "1110100010-1--------------------") -//INST(thumb32_PUSH, "PUSH", "1110100100101101----------------") -//INST(thumb32_STMDB, "STMDB/STMFD", "1110100100-0--------------------") -//INST(thumb32_LDMDB, "LDMDB/LDMEA", "1110100100-1--------------------") +INST(thumb32_STMIA, "STMIA/STMEA", "1110100010W0nnnn0iiiiiiiiiiiiiii") +INST(thumb32_POP, "POP", "1110100010111101iiiiiiiiiiiiiiii") +INST(thumb32_LDMIA, "LDMIA/LDMFD", "1110100010W1nnnniiiiiiiiiiiiiiii") +INST(thumb32_PUSH, "PUSH", "11101001001011010iiiiiiiiiiiiiii") +INST(thumb32_STMDB, "STMDB/STMFD", "1110100100W0nnnn0iiiiiiiiiiiiiii") +INST(thumb32_LDMDB, "LDMDB/LDMEA", "1110100100W1nnnniiiiiiiiiiiiiiii") //INST(thumb32_SRS_1, "SRS", "1110100110-0--------------------") //INST(thumb32_RFE_2, "RFE", "1110100110-1--------------------") @@ -66,10 +66,10 @@ INST(thumb32_SUB_imm_1, "SUB (imm)", "11110v01101Snnnn0vvvdd INST(thumb32_RSB_imm, "RSB (imm)", "11110v01110Snnnn0vvvddddvvvvvvvv") // Data Processing (Plain Binary Immediate) -//INST(thumb32_ADR, "ADR", "11110-10000011110---------------") +INST(thumb32_ADR_t3, "ADR", "11110i10000011110iiiddddiiiiiiii") INST(thumb32_ADD_imm_2, "ADD (imm)", "11110i10000011010iiiddddiiiiiiii") INST(thumb32_MOVW_imm, "MOVW (imm)", "11110i100100iiii0iiiddddiiiiiiii") -//INST(thumb32_ADR, "ADR", "11110-10101011110---------------") +INST(thumb32_ADR_t2, "ADR", "11110i10101011110iiiddddiiiiiiii") INST(thumb32_SUB_imm_2, "SUB (imm)", "11110i10101011010iiiddddiiiiiiii") INST(thumb32_MOVT, "MOVT", "11110i101100iiii0iiiddddiiiiiiii") INST(thumb32_UDF, "Invalid decoding", "11110011-010----0000----0001----") @@ -124,24 +124,25 @@ INST(thumb32_UDF, "Invalid decoding", "11110-111-------10-0-- INST(thumb32_B_cond, "B (cond)", "11110Sccccvvvvvv10i0ivvvvvvvvvvv") // Store Single Data Item -//INST(thumb32_STRB_imm_1, "STRB (imm)", "111110000000--------1--1--------") -//INST(thumb32_STRB_imm_2, "STRB (imm)", "111110000000--------1100--------") -//INST(thumb32_STRB_imm_3, "STRB (imm)", "111110001000--------------------") -//INST(thumb32_STRBT, "STRBT", "111110000000--------1110--------") +INST(thumb32_STRB_imm_1, "STRB (imm)", "111110000000nnnntttt1PU1iiiiiiii") +INST(thumb32_STRB_imm_2, "STRB (imm)", "111110000000nnnntttt1100iiiiiiii") +INST(thumb32_STRB_imm_3, "STRB (imm)", "111110001000nnnnttttiiiiiiiiiiii") +INST(thumb32_STRBT, "STRBT", "111110000000nnnntttt1110iiiiiiii") INST(thumb32_STRB, "STRB (reg)", "111110000000nnnntttt000000iimmmm") -//INST(thumb32_STRH_imm_1, "STRH (imm)", "111110000010--------1--1--------") -//INST(thumb32_STRH_imm_2, "STRH (imm)", "111110000010--------1100--------") -//INST(thumb32_STRH_imm_3, "STRH (imm)", "111110001010--------------------") -//INST(thumb32_STRHT, "STRHT", "111110000010--------1110--------") +INST(thumb32_STRH_imm_1, "STRH (imm)", "111110000010nnnntttt1PU1iiiiiiii") +INST(thumb32_STRH_imm_2, "STRH (imm)", "111110000010nnnntttt1100iiiiiiii") +INST(thumb32_STRH_imm_3, "STRH (imm)", "111110001010nnnnttttiiiiiiiiiiii") +INST(thumb32_STRHT, "STRHT", "111110000010nnnntttt1110iiiiiiii") INST(thumb32_STRH, "STRH (reg)", "111110000010nnnntttt000000iimmmm") -//INST(thumb32_STR_imm_1, "STR (imm)", "111110000100--------1--1--------") -//INST(thumb32_STR_imm_2, "STR (imm)", "111110000100--------1100--------") -//INST(thumb32_STR_imm_3, "STR (imm)", "111110001100--------------------") -//INST(thumb32_STRT, "STRT", "111110000100--------1110--------") +INST(thumb32_STR_imm_1, "STR (imm)", "111110000100nnnntttt1PU1iiiiiiii") +INST(thumb32_STR_imm_2, "STR (imm)", "111110000100nnnntttt1100iiiiiiii") +INST(thumb32_STR_imm_3, "STR (imm)", "111110001100nnnnttttiiiiiiiiiiii") +INST(thumb32_STRT, "STRT", "111110000100nnnntttt1110iiiiiiii") INST(thumb32_STR_reg, "STR (reg)", "111110000100nnnntttt000000iimmmm") // Load Byte and Memory Hints INST(thumb32_PLD_lit, "PLD (lit)", "11111000U00111111111iiiiiiiiiiii") +INST(thumb32_PLD_lit, "PLD (lit)", "11111000U01111111111iiiiiiiiiiii") INST(thumb32_PLD_reg, "PLD (reg)", "1111100000W1nnnn1111000000iimmmm") INST(thumb32_PLD_imm8, "PLD (imm8)", "1111100000W1nnnn11111100iiiiiiii") INST(thumb32_PLD_imm12, "PLD (imm12)", "1111100010W1nnnn1111iiiiiiiiiiii") @@ -161,27 +162,27 @@ INST(thumb32_LDRSB_imm8, "LDRSB (imm8)", "111110010001nnnntttt1P INST(thumb32_LDRSB_imm12, "LDRSB (imm12)", "111110011001nnnnttttiiiiiiiiiiii") // Load Halfword and Memory Hints -//INST(thumb32_LDRH_lit, "LDRH (lit)", "11111000-0111111----------------") -//INST(thumb32_LDRH_reg, "LDRH (reg)", "111110000011--------000000------") -//INST(thumb32_LDRHT, "LDRHT", "111110000011--------1110--------") -//INST(thumb32_LDRH_imm8, "LDRH (imm8)", "111110000011--------1-----------") -//INST(thumb32_LDRH_imm12, "LDRH (imm12)", "111110001011--------------------") -//INST(thumb32_LDRSH_lit, "LDRSH (lit)", "11111001-0111111----------------") -//INST(thumb32_LDRSH_reg, "LDRSH (reg)", "111110010011--------000000------") -//INST(thumb32_LDRSHT, "LDRSHT", "111110010011--------1110--------") -//INST(thumb32_LDRSH_imm8, "LDRSH (imm8)", "111110010011--------1-----------") -//INST(thumb32_LDRSH_imm12, "LDRSH (imm12)", "111110011011--------------------") -//INST(thumb32_NOP, "NOP", "111110010011----1111000000------") -//INST(thumb32_NOP, "NOP", "111110010011----11111100--------") -//INST(thumb32_NOP, "NOP", "11111001-01111111111------------") -//INST(thumb32_NOP, "NOP", "111110011011----1111------------") +INST(thumb32_LDRH_lit, "LDRH (lit)", "11111000U0111111ttttiiiiiiiiiiii") +INST(thumb32_LDRH_reg, "LDRH (reg)", "111110000011nnnntttt000000iimmmm") +INST(thumb32_LDRHT, "LDRHT", "111110000011nnnntttt1110iiiiiiii") +INST(thumb32_LDRH_imm8, "LDRH (imm8)", "111110000011nnnntttt1PUWiiiiiiii") +INST(thumb32_LDRH_imm12, "LDRH (imm12)", "111110001011nnnnttttiiiiiiiiiiii") +INST(thumb32_NOP, "NOP", "11111001-01111111111------------") +INST(thumb32_LDRSH_lit, "LDRSH (lit)", "11111001U0111111ttttiiiiiiiiiiii") +INST(thumb32_NOP, "NOP", "111110010011----1111000000------") +INST(thumb32_LDRSH_reg, "LDRSH (reg)", "111110010011nnnntttt000000iimmmm") +INST(thumb32_LDRSHT, "LDRSHT", "111110010011nnnntttt1110iiiiiiii") +INST(thumb32_NOP, "NOP", "111110010011----11111100--------") +INST(thumb32_NOP, "NOP", "111110011011----1111------------") +INST(thumb32_LDRSH_imm8, "LDRSH (imm8)", "111110010011nnnntttt1PUWiiiiiiii") +INST(thumb32_LDRSH_imm12, "LDRSH (imm12)", "111110011011nnnnttttiiiiiiiiiiii") // Load Word -//INST(thumb32_LDR_lit, "LDR (lit)", "11111000-1011111----------------") -//INST(thumb32_LDRT, "LDRT", "111110000101--------1110--------") -//INST(thumb32_LDR_reg, "LDR (reg)", "111110000101--------000000------") -//INST(thumb32_LDR_imm8, "LDR (imm8)", "111110000101--------1-----------") -//INST(thumb32_LDR_imm12, "LDR (imm12)", "111110001101--------------------") +INST(thumb32_LDR_lit, "LDR (lit)", "11111000U1011111ttttiiiiiiiiiiii") +INST(thumb32_LDRT, "LDRT", "111110000101nnnntttt1110iiiiiiii") +INST(thumb32_LDR_reg, "LDR (reg)", "111110000101nnnntttt000000iimmmm") +INST(thumb32_LDR_imm8, "LDR (imm8)", "111110000101nnnntttt1PUWiiiiiiii") +INST(thumb32_LDR_imm12, "LDR (imm12)", "111110001101nnnnttttiiiiiiiiiiii") // Data Processing (register) INST(thumb32_LSL_reg, "LSL (reg)", "111110100000mmmm1111dddd0000ssss") diff --git a/externals/dynarmic/src/frontend/A32/translate/impl/thumb32_data_processing_plain_binary_immediate.cpp b/externals/dynarmic/src/frontend/A32/translate/impl/thumb32_data_processing_plain_binary_immediate.cpp index 5094ac310..33f3c52ca 100755 --- a/externals/dynarmic/src/frontend/A32/translate/impl/thumb32_data_processing_plain_binary_immediate.cpp +++ b/externals/dynarmic/src/frontend/A32/translate/impl/thumb32_data_processing_plain_binary_immediate.cpp @@ -52,6 +52,30 @@ static bool Saturation16(ThumbTranslatorVisitor& v, Reg n, Reg d, size_t saturat return true; } +bool ThumbTranslatorVisitor::thumb32_ADR_t2(Imm<1> imm1, Imm<3> imm3, Reg d, Imm<8> imm8) { + if (d == Reg::PC) { + return UnpredictableInstruction(); + } + + const auto imm32 = concatenate(imm1, imm3, imm8).ZeroExtend(); + const auto result = ir.AlignPC(4) - imm32; + + ir.SetRegister(d, ir.Imm32(result)); + return true; +} + +bool ThumbTranslatorVisitor::thumb32_ADR_t3(Imm<1> imm1, Imm<3> imm3, Reg d, Imm<8> imm8) { + if (d == Reg::PC) { + return UnpredictableInstruction(); + } + + const auto imm32 = concatenate(imm1, imm3, imm8).ZeroExtend(); + const auto result = ir.AlignPC(4) + imm32; + + ir.SetRegister(d, ir.Imm32(result)); + return true; +} + bool ThumbTranslatorVisitor::thumb32_ADD_imm_2(Imm<1> imm1, Imm<3> imm3, Reg d, Imm<8> imm8) { if (d == Reg::PC) { return UnpredictableInstruction(); diff --git a/externals/dynarmic/src/frontend/A32/translate/impl/thumb32_load_halfword.cpp b/externals/dynarmic/src/frontend/A32/translate/impl/thumb32_load_halfword.cpp index 5f4e4104b..ae41a5892 100755 --- a/externals/dynarmic/src/frontend/A32/translate/impl/thumb32_load_halfword.cpp +++ b/externals/dynarmic/src/frontend/A32/translate/impl/thumb32_load_halfword.cpp @@ -7,5 +7,135 @@ namespace Dynarmic::A32 { +using ExtensionFunction = IR::U32 (IREmitter::*)(const IR::U16&); + +static bool LoadHalfLiteral(ThumbTranslatorVisitor& v, bool U, Reg t, Imm<12> imm12, + ExtensionFunction ext_fn) { + const auto imm32 = imm12.ZeroExtend(); + const auto base = v.ir.AlignPC(4); + const auto address = U ? (base + imm32) : (base - imm32); + const auto data = (v.ir.*ext_fn)(v.ir.ReadMemory16(v.ir.Imm32(address))); + + v.ir.SetRegister(t, data); + return true; +} + +static bool LoadHalfRegister(ThumbTranslatorVisitor& v, Reg n, Reg t, Imm<2> imm2, Reg m, + ExtensionFunction ext_fn) { + if (m == Reg::PC) { + return v.UnpredictableInstruction(); + } + + const IR::U32 reg_m = v.ir.GetRegister(m); + const IR::U32 reg_n = v.ir.GetRegister(n); + const IR::U32 offset = v.ir.LogicalShiftLeft(reg_m, v.ir.Imm8(imm2.ZeroExtend())); + const IR::U32 address = v.ir.Add(reg_n, offset); + const IR::U32 data = (v.ir.*ext_fn)(v.ir.ReadMemory16(address)); + + v.ir.SetRegister(t, data); + return true; +} + +static bool LoadHalfImmediate(ThumbTranslatorVisitor& v, Reg n, Reg t, bool P, bool U, bool W, + Imm<12> imm12, ExtensionFunction ext_fn) { + const u32 imm32 = imm12.ZeroExtend(); + const IR::U32 reg_n = v.ir.GetRegister(n); + const IR::U32 offset_address = U ? v.ir.Add(reg_n, v.ir.Imm32(imm32)) + : v.ir.Sub(reg_n, v.ir.Imm32(imm32)); + const IR::U32 address = P ? offset_address + : reg_n; + const IR::U32 data = (v.ir.*ext_fn)(v.ir.ReadMemory16(address)); + + if (W) { + v.ir.SetRegister(n, offset_address); + } + + v.ir.SetRegister(t, data); + return true; +} + +bool ThumbTranslatorVisitor::thumb32_LDRH_lit(bool U, Reg t, Imm<12> imm12) { + return LoadHalfLiteral(*this, U, t, imm12, &IREmitter::ZeroExtendHalfToWord); +} + +bool ThumbTranslatorVisitor::thumb32_LDRH_reg(Reg n, Reg t, Imm<2> imm2, Reg m) { + return LoadHalfRegister(*this, n, t, imm2, m, &IREmitter::ZeroExtendHalfToWord); +} + +bool ThumbTranslatorVisitor::thumb32_LDRH_imm8(Reg n, Reg t, bool P, bool U, bool W, Imm<8> imm8) { + if (!P && !W) { + return UndefinedInstruction(); + } + if (t == Reg::PC && W) { + return UnpredictableInstruction(); + } + if (W && n == t) { + return UnpredictableInstruction(); + } + + return LoadHalfImmediate(*this, n, t, P, U, W, Imm<12>{imm8.ZeroExtend()}, + &IREmitter::ZeroExtendHalfToWord); +} + +bool ThumbTranslatorVisitor::thumb32_LDRH_imm12(Reg n, Reg t, Imm<12> imm12) { + return LoadHalfImmediate(*this, n, t, true, true, false, imm12, + &IREmitter::ZeroExtendHalfToWord); +} + +bool ThumbTranslatorVisitor::thumb32_LDRHT(Reg n, Reg t, Imm<8> imm8) { + // TODO: Add an unpredictable instruction path if this + // is executed in hypervisor mode if we ever support + // privileged execution levels. + + if (t == Reg::PC) { + return UnpredictableInstruction(); + } + + // Treat it as a normal LDRH, given we don't support + // execution levels other than EL0 currently. + return thumb32_LDRH_imm8(n, t, true, true, false, imm8); +} + +bool ThumbTranslatorVisitor::thumb32_LDRSH_lit(bool U, Reg t, Imm<12> imm12) { + return LoadHalfLiteral(*this, U, t, imm12, &IREmitter::SignExtendHalfToWord); +} + +bool ThumbTranslatorVisitor::thumb32_LDRSH_reg(Reg n, Reg t, Imm<2> imm2, Reg m) { + return LoadHalfRegister(*this, n, t, imm2, m, &IREmitter::SignExtendHalfToWord); +} + +bool ThumbTranslatorVisitor::thumb32_LDRSH_imm8(Reg n, Reg t, bool P, bool U, bool W, Imm<8> imm8) { + if (!P && !W) { + return UndefinedInstruction(); + } + if (t == Reg::PC && W) { + return UnpredictableInstruction(); + } + if (W && n == t) { + return UnpredictableInstruction(); + } + + return LoadHalfImmediate(*this, n, t, P, U, W, Imm<12>{imm8.ZeroExtend()}, + &IREmitter::SignExtendHalfToWord); +} + +bool ThumbTranslatorVisitor::thumb32_LDRSH_imm12(Reg n, Reg t, Imm<12> imm12) { + return LoadHalfImmediate(*this, n, t, true, true, false, imm12, + &IREmitter::SignExtendHalfToWord); +} + +bool ThumbTranslatorVisitor::thumb32_LDRSHT(Reg n, Reg t, Imm<8> imm8) { + // TODO: Add an unpredictable instruction path if this + // is executed in hypervisor mode if we ever support + // privileged execution levels. + + if (t == Reg::PC) { + return UnpredictableInstruction(); + } + + // Treat it as a normal LDRSH, given we don't support + // execution levels other than EL0 currently. + return thumb32_LDRSH_imm8(n, t, true, true, false, imm8); +} } // namespace Dynarmic::A32 diff --git a/externals/dynarmic/src/frontend/A32/translate/impl/thumb32_load_store_multiple.cpp b/externals/dynarmic/src/frontend/A32/translate/impl/thumb32_load_store_multiple.cpp new file mode 100755 index 000000000..49af9d764 --- /dev/null +++ b/externals/dynarmic/src/frontend/A32/translate/impl/thumb32_load_store_multiple.cpp @@ -0,0 +1,150 @@ +/* This file is part of the dynarmic project. + * Copyright (c) 2021 MerryMage + * SPDX-License-Identifier: 0BSD + */ + +#include "common/bit_util.h" +#include "frontend/A32/translate/impl/translate_thumb.h" + +namespace Dynarmic::A32 { +static bool ITBlockCheck(const A32::IREmitter& ir) { + return ir.current_location.IT().IsInITBlock() && !ir.current_location.IT().IsLastInITBlock(); +} + +static bool LDMHelper(A32::IREmitter& ir, bool W, Reg n, u32 list, + const IR::U32& start_address, const IR::U32& writeback_address) { + auto address = start_address; + for (size_t i = 0; i <= 14; i++) { + if (Common::Bit(i, list)) { + ir.SetRegister(static_cast(i), ir.ReadMemory32(address)); + address = ir.Add(address, ir.Imm32(4)); + } + } + if (W && !Common::Bit(RegNumber(n), list)) { + ir.SetRegister(n, writeback_address); + } + if (Common::Bit<15>(list)) { + ir.UpdateUpperLocationDescriptor(); + ir.LoadWritePC(ir.ReadMemory32(address)); + if (n == Reg::R13) { + ir.SetTerm(IR::Term::PopRSBHint{}); + } else { + ir.SetTerm(IR::Term::FastDispatchHint{}); + } + return false; + } + return true; +} + +static bool STMHelper(A32::IREmitter& ir, bool W, Reg n, u32 list, + const IR::U32& start_address, const IR::U32& writeback_address) { + auto address = start_address; + for (size_t i = 0; i <= 14; i++) { + if (Common::Bit(i, list)) { + ir.WriteMemory32(address, ir.GetRegister(static_cast(i))); + address = ir.Add(address, ir.Imm32(4)); + } + } + if (W) { + ir.SetRegister(n, writeback_address); + } + return true; +} + +bool ThumbTranslatorVisitor::thumb32_LDMDB(bool W, Reg n, Imm<16> reg_list) { + const auto regs_imm = reg_list.ZeroExtend(); + const auto num_regs = static_cast(Common::BitCount(regs_imm)); + + if (n == Reg::PC || num_regs < 2) { + return UnpredictableInstruction(); + } + if (reg_list.Bit<15>() && reg_list.Bit<14>()) { + return UnpredictableInstruction(); + } + if (W && Common::Bit(static_cast(n), regs_imm)) { + return UnpredictableInstruction(); + } + if (reg_list.Bit<13>()) { + return UnpredictableInstruction(); + } + if (reg_list.Bit<15>() && ITBlockCheck(ir)) { + return UnpredictableInstruction(); + } + + // Start address is the same as the writeback address. + const IR::U32 start_address = ir.Sub(ir.GetRegister(n), ir.Imm32(4 * num_regs)); + return LDMHelper(ir, W, n, regs_imm, start_address, start_address); +} + +bool ThumbTranslatorVisitor::thumb32_LDMIA(bool W, Reg n, Imm<16> reg_list) { + const auto regs_imm = reg_list.ZeroExtend(); + const auto num_regs = static_cast(Common::BitCount(regs_imm)); + + if (n == Reg::PC || num_regs < 2) { + return UnpredictableInstruction(); + } + if (reg_list.Bit<15>() && reg_list.Bit<14>()) { + return UnpredictableInstruction(); + } + if (W && Common::Bit(static_cast(n), regs_imm)) { + return UnpredictableInstruction(); + } + if (reg_list.Bit<13>()) { + return UnpredictableInstruction(); + } + if (reg_list.Bit<15>() && ITBlockCheck(ir)) { + return UnpredictableInstruction(); + } + + const auto start_address = ir.GetRegister(n); + const auto writeback_address = ir.Add(start_address, ir.Imm32(num_regs * 4)); + return LDMHelper(ir, W, n, regs_imm, start_address, writeback_address); +} + +bool ThumbTranslatorVisitor::thumb32_POP(Imm<16> reg_list) { + return thumb32_LDMIA(true, Reg::SP, reg_list); +} + +bool ThumbTranslatorVisitor::thumb32_PUSH(Imm<15> reg_list) { + return thumb32_STMDB(true, Reg::SP, reg_list); +} + +bool ThumbTranslatorVisitor::thumb32_STMIA(bool W, Reg n, Imm<15> reg_list) { + const auto regs_imm = reg_list.ZeroExtend(); + const auto num_regs = static_cast(Common::BitCount(regs_imm)); + + if (n == Reg::PC || num_regs < 2) { + return UnpredictableInstruction(); + } + if (W && Common::Bit(static_cast(n), regs_imm)) { + return UnpredictableInstruction(); + } + if (reg_list.Bit<13>()) { + return UnpredictableInstruction(); + } + + const auto start_address = ir.GetRegister(n); + const auto writeback_address = ir.Add(start_address, ir.Imm32(num_regs * 4)); + return STMHelper(ir, W, n, regs_imm, start_address, writeback_address); +} + +bool ThumbTranslatorVisitor::thumb32_STMDB(bool W, Reg n, Imm<15> reg_list) { + const auto regs_imm = reg_list.ZeroExtend(); + const auto num_regs = static_cast(Common::BitCount(regs_imm)); + + if (n == Reg::PC || num_regs < 2) { + return UnpredictableInstruction(); + } + if (W && Common::Bit(static_cast(n), regs_imm)) { + return UnpredictableInstruction(); + } + if (reg_list.Bit<13>()) { + return UnpredictableInstruction(); + } + + // Start address is the same as the writeback address. + const IR::U32 start_address = ir.Sub(ir.GetRegister(n), ir.Imm32(4 * num_regs)); + return STMHelper(ir, W, n, regs_imm, start_address, start_address); +} + +} // namespace Dynarmic::A32 diff --git a/externals/dynarmic/src/frontend/A32/translate/impl/thumb32_load_word.cpp b/externals/dynarmic/src/frontend/A32/translate/impl/thumb32_load_word.cpp index 5f4e4104b..81dc66b83 100755 --- a/externals/dynarmic/src/frontend/A32/translate/impl/thumb32_load_word.cpp +++ b/externals/dynarmic/src/frontend/A32/translate/impl/thumb32_load_word.cpp @@ -6,6 +6,129 @@ #include "frontend/A32/translate/impl/translate_thumb.h" namespace Dynarmic::A32 { +static bool ITBlockCheck(const A32::IREmitter& ir) { + return ir.current_location.IT().IsInITBlock() && !ir.current_location.IT().IsLastInITBlock(); +} +bool ThumbTranslatorVisitor::thumb32_LDR_lit(bool U, Reg t, Imm<12> imm12) { + if (t == Reg::PC && ITBlockCheck(ir)) { + return UnpredictableInstruction(); + } + + const u32 imm32 = imm12.ZeroExtend(); + const u32 base = ir.AlignPC(4); + const u32 address = U ? base + imm32 : base - imm32; + const auto data = ir.ReadMemory32(ir.Imm32(address)); + + if (t == Reg::PC) { + ir.UpdateUpperLocationDescriptor(); + ir.LoadWritePC(data); + ir.SetTerm(IR::Term::FastDispatchHint{}); + return false; + } + + ir.SetRegister(t, data); + return true; +} + +bool ThumbTranslatorVisitor::thumb32_LDR_imm8(Reg n, Reg t, bool P, bool U, bool W, Imm<8> imm8) { + if (!P && !W) { + return UndefinedInstruction(); + } + if (W && n == t) { + return UnpredictableInstruction(); + } + if (t == Reg::PC && ITBlockCheck(ir)) { + return UnpredictableInstruction(); + } + + const u32 imm32 = imm8.ZeroExtend(); + const IR::U32 reg_n = ir.GetRegister(n); + const IR::U32 offset_address = U ? ir.Add(reg_n, ir.Imm32(imm32)) + : ir.Sub(reg_n, ir.Imm32(imm32)); + const IR::U32 address = P ? offset_address + : reg_n; + const IR::U32 data = ir.ReadMemory32(address); + + if (W) { + ir.SetRegister(n, offset_address); + } + + if (t == Reg::PC) { + ir.UpdateUpperLocationDescriptor(); + ir.LoadWritePC(data); + + if (!P && W && n == Reg::R13) { + ir.SetTerm(IR::Term::PopRSBHint{}); + } else { + ir.SetTerm(IR::Term::FastDispatchHint{}); + } + + return false; + } + + ir.SetRegister(t, data); + return true; +} + +bool ThumbTranslatorVisitor::thumb32_LDR_imm12(Reg n, Reg t, Imm<12> imm12) { + if (t == Reg::PC && ITBlockCheck(ir)) { + return UnpredictableInstruction(); + } + + const auto imm32 = imm12.ZeroExtend(); + const auto reg_n = ir.GetRegister(n); + const auto address = ir.Add(reg_n, ir.Imm32(imm32)); + const auto data = ir.ReadMemory32(address); + + if (t == Reg::PC) { + ir.UpdateUpperLocationDescriptor(); + ir.LoadWritePC(data); + ir.SetTerm(IR::Term::FastDispatchHint{}); + return false; + } + + ir.SetRegister(t, data); + return true; +} + +bool ThumbTranslatorVisitor::thumb32_LDR_reg(Reg n, Reg t, Imm<2> imm2, Reg m) { + if (m == Reg::PC) { + return UnpredictableInstruction(); + } + if (t == Reg::PC && ITBlockCheck(ir)) { + return UnpredictableInstruction(); + } + + const auto reg_m = ir.GetRegister(m); + const auto reg_n = ir.GetRegister(n); + const auto offset = ir.LogicalShiftLeft(reg_m, ir.Imm8(imm2.ZeroExtend())); + const auto address = ir.Add(reg_n, offset); + const auto data = ir.ReadMemory32(address); + + if (t == Reg::PC) { + ir.UpdateUpperLocationDescriptor(); + ir.LoadWritePC(data); + ir.SetTerm(IR::Term::FastDispatchHint{}); + return false; + } + + ir.SetRegister(t, data); + return true; +} + +bool ThumbTranslatorVisitor::thumb32_LDRT(Reg n, Reg t, Imm<8> imm8) { + // TODO: Add an unpredictable instruction path if this + // is executed in hypervisor mode if we ever support + // privileged execution levels. + + if (t == Reg::PC) { + return UnpredictableInstruction(); + } + + // Treat it as a normal LDR, given we don't support + // execution levels other than EL0 currently. + return thumb32_LDR_imm8(n, t, true, true, false, imm8); +} } // namespace Dynarmic::A32 diff --git a/externals/dynarmic/src/frontend/A32/translate/impl/thumb32_store_single_data_item.cpp b/externals/dynarmic/src/frontend/A32/translate/impl/thumb32_store_single_data_item.cpp index 0b776e75b..78c56fc65 100755 --- a/externals/dynarmic/src/frontend/A32/translate/impl/thumb32_store_single_data_item.cpp +++ b/externals/dynarmic/src/frontend/A32/translate/impl/thumb32_store_single_data_item.cpp @@ -29,18 +29,192 @@ static bool StoreRegister(ThumbTranslatorVisitor& v, Reg n, Reg t, Imm<2> imm2, return true; } +using StoreImmFn = void (*)(ThumbTranslatorVisitor&, const IR::U32&, const IR::U32&); + +static void StoreImmByteFn(ThumbTranslatorVisitor& v, const IR::U32& address, const IR::U32& data) { + v.ir.WriteMemory8(address, v.ir.LeastSignificantByte(data)); +} + +static void StoreImmHalfFn(ThumbTranslatorVisitor& v, const IR::U32& address, const IR::U32& data) { + v.ir.WriteMemory16(address, v.ir.LeastSignificantHalf(data)); +} + +static void StoreImmWordFn(ThumbTranslatorVisitor& v, const IR::U32& address, const IR::U32& data) { + v.ir.WriteMemory32(address, data); +} + +static bool StoreImmediate(ThumbTranslatorVisitor& v, Reg n, Reg t, bool P, bool U, bool W, Imm<12> imm12, + StoreImmFn store_fn) { + const auto imm32 = imm12.ZeroExtend(); + const auto reg_n = v.ir.GetRegister(n); + const auto reg_t = v.ir.GetRegister(t); + + const IR::U32 offset_address = U ? v.ir.Add(reg_n, v.ir.Imm32(imm32)) + : v.ir.Sub(reg_n, v.ir.Imm32(imm32)); + const IR::U32 address = P ? offset_address + : reg_n; + + store_fn(v, address, reg_t); + if (W) { + v.ir.SetRegister(n, offset_address); + } + + return true; +} + +bool ThumbTranslatorVisitor::thumb32_STRB_imm_1(Reg n, Reg t, bool P, bool U, Imm<8> imm8) { + if (n == Reg::PC) { + return UndefinedInstruction(); + } + if (t == Reg::PC || n == t) { + return UnpredictableInstruction(); + } + return StoreImmediate(*this, n, t, P, U, true, Imm<12>{imm8.ZeroExtend()}, StoreImmByteFn); +} + +bool ThumbTranslatorVisitor::thumb32_STRB_imm_2(Reg n, Reg t, Imm<8> imm8) { + if (n == Reg::PC) { + return UndefinedInstruction(); + } + if (t == Reg::PC) { + return UnpredictableInstruction(); + } + return StoreImmediate(*this, n, t, true, false, false, Imm<12>{imm8.ZeroExtend()}, StoreImmByteFn); +} + +bool ThumbTranslatorVisitor::thumb32_STRB_imm_3(Reg n, Reg t, Imm<12> imm12) { + if (n == Reg::PC) { + return UndefinedInstruction(); + } + if (t == Reg::PC) { + return UnpredictableInstruction(); + } + return StoreImmediate(*this, n, t, true, true, false, imm12, StoreImmByteFn); +} + +bool ThumbTranslatorVisitor::thumb32_STRBT(Reg n, Reg t, Imm<8> imm8) { + // TODO: Add an unpredictable instruction path if this + // is executed in hypervisor mode if we ever support + // privileged execution levels. + + if (n == Reg::PC) { + return UndefinedInstruction(); + } + if (t == Reg::PC) { + return UnpredictableInstruction(); + } + + // Treat this as a normal STRB, given we don't support + // execution levels other than EL0 currently. + return StoreImmediate(*this, n, t, true, true, false, Imm<12>{imm8.ZeroExtend()}, StoreImmByteFn); +} + bool ThumbTranslatorVisitor::thumb32_STRB(Reg n, Reg t, Imm<2> imm2, Reg m) { return StoreRegister(*this, n, t, imm2, m, [this](const IR::U32& offset_address, const IR::U32& data) { ir.WriteMemory8(offset_address, ir.LeastSignificantByte(data)); }); } +bool ThumbTranslatorVisitor::thumb32_STRH_imm_1(Reg n, Reg t, bool P, bool U, Imm<8> imm8) { + if (n == Reg::PC) { + return UndefinedInstruction(); + } + if (t == Reg::PC || n == t) { + return UnpredictableInstruction(); + } + return StoreImmediate(*this, n, t, P, U, true, Imm<12>{imm8.ZeroExtend()}, StoreImmHalfFn); +} + +bool ThumbTranslatorVisitor::thumb32_STRH_imm_2(Reg n, Reg t, Imm<8> imm8) { + if (n == Reg::PC) { + return UndefinedInstruction(); + } + if (t == Reg::PC) { + return UnpredictableInstruction(); + } + return StoreImmediate(*this, n, t, true, false, false, Imm<12>{imm8.ZeroExtend()}, StoreImmHalfFn); +} + +bool ThumbTranslatorVisitor::thumb32_STRH_imm_3(Reg n, Reg t, Imm<12> imm12) { + if (n == Reg::PC) { + return UndefinedInstruction(); + } + if (t == Reg::PC) { + return UnpredictableInstruction(); + } + return StoreImmediate(*this, n, t, true, true, false, imm12, StoreImmHalfFn); +} + +bool ThumbTranslatorVisitor::thumb32_STRHT(Reg n, Reg t, Imm<8> imm8) { + // TODO: Add an unpredictable instruction path if this + // is executed in hypervisor mode if we ever support + // privileged execution levels. + + if (n == Reg::PC) { + return UndefinedInstruction(); + } + if (t == Reg::PC) { + return UnpredictableInstruction(); + } + + // Treat this as a normal STRH, given we don't support + // execution levels other than EL0 currently. + return StoreImmediate(*this, n, t, true, true, false, Imm<12>{imm8.ZeroExtend()}, StoreImmHalfFn); +} + bool ThumbTranslatorVisitor::thumb32_STRH(Reg n, Reg t, Imm<2> imm2, Reg m) { return StoreRegister(*this, n, t, imm2, m, [this](const IR::U32& offset_address, const IR::U32& data) { ir.WriteMemory16(offset_address, ir.LeastSignificantHalf(data)); }); } +bool ThumbTranslatorVisitor::thumb32_STR_imm_1(Reg n, Reg t, bool P, bool U, Imm<8> imm8) { + if (n == Reg::PC) { + return UndefinedInstruction(); + } + if (t == Reg::PC || n == t) { + return UnpredictableInstruction(); + } + return StoreImmediate(*this, n, t, P, U, true, Imm<12>{imm8.ZeroExtend()}, StoreImmWordFn); +} + +bool ThumbTranslatorVisitor::thumb32_STR_imm_2(Reg n, Reg t, Imm<8> imm8) { + if (n == Reg::PC) { + return UndefinedInstruction(); + } + if (t == Reg::PC) { + return UnpredictableInstruction(); + } + return StoreImmediate(*this, n, t, true, false, false, Imm<12>{imm8.ZeroExtend()}, StoreImmWordFn); +} + +bool ThumbTranslatorVisitor::thumb32_STR_imm_3(Reg n, Reg t, Imm<12> imm12) { + if (n == Reg::PC) { + return UndefinedInstruction(); + } + if (t == Reg::PC) { + return UnpredictableInstruction(); + } + return StoreImmediate(*this, n, t, true, true, false, imm12, StoreImmWordFn); +} + +bool ThumbTranslatorVisitor::thumb32_STRT(Reg n, Reg t, Imm<8> imm8) { + // TODO: Add an unpredictable instruction path if this + // is executed in hypervisor mode if we ever support + // privileged execution levels. + + if (n == Reg::PC) { + return UndefinedInstruction(); + } + if (t == Reg::PC) { + return UnpredictableInstruction(); + } + + // Treat this as a normal STR, given we don't support + // execution levels other than EL0 currently. + return StoreImmediate(*this, n, t, true, true, false, Imm<12>{imm8.ZeroExtend()}, StoreImmWordFn); +} + bool ThumbTranslatorVisitor::thumb32_STR_reg(Reg n, Reg t, Imm<2> imm2, Reg m) { return StoreRegister(*this, n, t, imm2, m, [this](const IR::U32& offset_address, const IR::U32& data) { ir.WriteMemory32(offset_address, data); diff --git a/externals/dynarmic/src/frontend/A32/translate/impl/translate_thumb.h b/externals/dynarmic/src/frontend/A32/translate/impl/translate_thumb.h index 05ff0fdb6..174f601c4 100755 --- a/externals/dynarmic/src/frontend/A32/translate/impl/translate_thumb.h +++ b/externals/dynarmic/src/frontend/A32/translate/impl/translate_thumb.h @@ -171,6 +171,14 @@ struct ThumbTranslatorVisitor final { bool thumb16_B_t1(Cond cond, Imm<8> imm8); bool thumb16_B_t2(Imm<11> imm11); + // thumb32 load/store multiple instructions + bool thumb32_LDMDB(bool W, Reg n, Imm<16> reg_list); + bool thumb32_LDMIA(bool W, Reg n, Imm<16> reg_list); + bool thumb32_POP(Imm<16> reg_list); + bool thumb32_PUSH(Imm<15> reg_list); + bool thumb32_STMIA(bool W, Reg n, Imm<15> reg_list); + bool thumb32_STMDB(bool W, Reg n, Imm<15> reg_list); + // thumb32 data processing (shifted register) instructions bool thumb32_TST_reg(Reg n, Imm<3> imm3, Imm<2> imm2, ShiftType type, Reg m); bool thumb32_AND_reg(bool S, Reg n, Imm<3> imm3, Reg d, Imm<2> imm2, ShiftType type, Reg m); @@ -209,6 +217,8 @@ struct ThumbTranslatorVisitor final { bool thumb32_RSB_imm(Imm<1> i, bool S, Reg n, Imm<3> imm3, Reg d, Imm<8> imm8); // thumb32 data processing (plain binary immediate) instructions. + bool thumb32_ADR_t2(Imm<1> imm1, Imm<3> imm3, Reg d, Imm<8> imm8); + bool thumb32_ADR_t3(Imm<1> imm1, Imm<3> imm3, Reg d, Imm<8> imm8); bool thumb32_ADD_imm_2(Imm<1> imm1, Imm<3> imm3, Reg d, Imm<8> imm8); bool thumb32_BFC(Imm<3> imm3, Reg d, Imm<2> imm2, Imm<5> msb); bool thumb32_BFI(Reg n, Imm<3> imm3, Reg d, Imm<2> imm2, Imm<5> msb); @@ -243,8 +253,20 @@ struct ThumbTranslatorVisitor final { bool thumb32_B_cond(Imm<1> S, Cond cond, Imm<6> hi, Imm<1> j1, Imm<1> j2, Imm<11> lo); // thumb32 store single data item instructions + bool thumb32_STRB_imm_1(Reg n, Reg t, bool P, bool U, Imm<8> imm8); + bool thumb32_STRB_imm_2(Reg n, Reg t, Imm<8> imm8); + bool thumb32_STRB_imm_3(Reg n, Reg t, Imm<12> imm12); + bool thumb32_STRBT(Reg n, Reg t, Imm<8> imm8); bool thumb32_STRB(Reg n, Reg t, Imm<2> imm2, Reg m); + bool thumb32_STRH_imm_1(Reg n, Reg t, bool P, bool U, Imm<8> imm8); + bool thumb32_STRH_imm_2(Reg n, Reg t, Imm<8> imm8); + bool thumb32_STRH_imm_3(Reg n, Reg t, Imm<12> imm12); + bool thumb32_STRHT(Reg n, Reg t, Imm<8> imm8); bool thumb32_STRH(Reg n, Reg t, Imm<2> imm2, Reg m); + bool thumb32_STR_imm_1(Reg n, Reg t, bool P, bool U, Imm<8> imm8); + bool thumb32_STR_imm_2(Reg n, Reg t, Imm<8> imm8); + bool thumb32_STR_imm_3(Reg n, Reg t, Imm<12> imm12); + bool thumb32_STRT(Reg n, Reg t, Imm<8> imm8); bool thumb32_STR_reg(Reg n, Reg t, Imm<2> imm2, Reg m); // thumb32 load byte and memory hints @@ -267,6 +289,25 @@ struct ThumbTranslatorVisitor final { bool thumb32_LDRSB_imm12(Reg n, Reg t, Imm<12> imm12); bool thumb32_LDRSBT(Reg n, Reg t, Imm<8> imm8); + // thumb32 load halfword instructions + bool thumb32_LDRH_lit(bool U, Reg t, Imm<12> imm12); + bool thumb32_LDRH_reg(Reg n, Reg t, Imm<2> imm2, Reg m); + bool thumb32_LDRH_imm8(Reg n, Reg t, bool P, bool U, bool W, Imm<8> imm8); + bool thumb32_LDRH_imm12(Reg n, Reg t, Imm<12> imm12); + bool thumb32_LDRHT(Reg n, Reg t, Imm<8> imm8); + bool thumb32_LDRSH_lit(bool U, Reg t, Imm<12> imm12); + bool thumb32_LDRSH_reg(Reg n, Reg t, Imm<2> imm2, Reg m); + bool thumb32_LDRSH_imm8(Reg n, Reg t, bool P, bool U, bool W, Imm<8> imm8); + bool thumb32_LDRSH_imm12(Reg n, Reg t, Imm<12> imm12); + bool thumb32_LDRSHT(Reg n, Reg t, Imm<8> imm8); + + // thumb32 load word instructions + bool thumb32_LDR_lit(bool U, Reg t, Imm<12> imm12); + bool thumb32_LDR_reg(Reg n, Reg t, Imm<2> imm2, Reg m); + bool thumb32_LDR_imm8(Reg n, Reg t, bool P, bool U, bool W, Imm<8> imm8); + bool thumb32_LDR_imm12(Reg n, Reg t, Imm<12> imm12); + bool thumb32_LDRT(Reg n, Reg t, Imm<8> imm8); + // thumb32 data processing (register) instructions bool thumb32_ASR_reg(Reg m, Reg d, Reg s); bool thumb32_LSL_reg(Reg m, Reg d, Reg s); diff --git a/externals/dynarmic/src/frontend/A64/decoder/a64.inc b/externals/dynarmic/src/frontend/A64/decoder/a64.inc index f32f18e04..23f8b7193 100755 --- a/externals/dynarmic/src/frontend/A64/decoder/a64.inc +++ b/externals/dynarmic/src/frontend/A64/decoder/a64.inc @@ -61,6 +61,8 @@ INST(WFI, "WFI", "11010 INST(SEV, "SEV", "11010101000000110010000010011111") INST(SEVL, "SEVL", "11010101000000110010000010111111") //INST(DGH, "DGH", "11010101000000110010000011011111") // v8.6 +//INST(WFET, "WFET", "110101010000001100010000000ddddd") // v8.7 +//INST(WFIT, "WFIT", "110101010000001100010000001ddddd") // v8.7 //INST(XPAC_1, "XPACD, XPACI, XPACLRI", "110110101100000101000D11111ddddd") //INST(XPAC_2, "XPACD, XPACI, XPACLRI", "11010101000000110010000011111111") //INST(PACIA_1, "PACIA, PACIA1716, PACIASP, PACIAZ, PACIZA", "110110101100000100Z000nnnnnddddd") @@ -267,6 +269,10 @@ INST(LDTRSW, "LDTRSW", "10111 //INST(LDUMIN, "LDUMIN, LDUMINA, LDUMINAL, LDUMINL", "1-111000AR1sssss011100nnnnnttttt") //INST(SWP, "SWP, SWPA, SWPAL, SWPL", "1-111000AR1sssss100000nnnnnttttt") //INST(LDAPR, "LDAPR", "1-11100010111111110000nnnnnttttt") +//INST(LD64B, "LD64B", "1111100000111111110100nnnnnttttt") // v8.7 +//INST(ST64B, "ST64B", "1111100000111111100100nnnnnttttt") // v8.7 +//INST(ST64BV, "ST64BV", "11111000001sssss101100nnnnnttttt") // v8.7 +//INST(ST64BV0, "ST64BV0", "11111000001sssss101000nnnnnttttt") // v8.7 // Loads and stores - Load/Store register (register offset) INST(STRx_reg, "STRx (register)", "zz111000o01mmmmmxxxS10nnnnnttttt")