diff --git a/gen/gen_avx512.cpp b/gen/gen_avx512.cpp index 51bdb7d..86b0b2d 100644 --- a/gen/gen_avx512.cpp +++ b/gen/gen_avx512.cpp @@ -560,6 +560,7 @@ void putCvt() { 0x5A, "vcvtph2pd", T_MAP5 | T_MUST_EVEX | T_YMM | T_EW0 | T_B16 | T_N4 | T_N_VL | T_SAE_X, 3 }, { 0x7B, "vcvtph2qq", T_66 | T_MAP5 | T_MUST_EVEX | T_YMM | T_EW0 | T_B16 | T_N4 | T_N_VL | T_ER_X, 3 }, { 0x79, "vcvtph2uqq", T_66 | T_MAP5 | T_MUST_EVEX | T_YMM | T_EW0 | T_B16 | T_N4 | T_N_VL | T_ER_X, 3 }, + { 0x78, "vcvttph2uqq", T_66 | T_MAP5 | T_MUST_EVEX | T_YMM | T_EW0 | T_B16 | T_N4 | T_N_VL | T_SAE_X, 3 }, }; for (size_t i = 0; i < NUM_OF_ARRAY(tbl); i++) { const Tbl& p = tbl[i]; diff --git a/test/misc.cpp b/test/misc.cpp index 9a98d0c..b70f49c 100644 --- a/test/misc.cpp +++ b/test/misc.cpp @@ -1160,6 +1160,16 @@ CYBOZU_TEST_AUTO(vaddph) vcvtph2uqq(zmm1|k5|T_z, ptr [rax+0x40]); vcvtph2uqq(zmm1|k5|T_z, ptr_b [rax+0x40]); + vcvttph2uqq(xmm1, xmm5); + vcvttph2uqq(xmm1, ptr [rax+0x40]); + vcvttph2uqq(xmm1, ptr_b [rax+0x40]); + vcvttph2uqq(ymm1|k2|T_z, xmm5); + vcvttph2uqq(ymm1, ptr [rax+0x40]); + vcvttph2uqq(ymm1, ptr_b [rax+0x40]); + vcvttph2uqq(zmm1|k5|T_z|T_sae, xmm3); + vcvttph2uqq(zmm1|k5|T_z, ptr [rax+0x40]); + vcvttph2uqq(zmm1|k5|T_z, ptr_b [rax+0x40]); + } } c; const uint8_t tbl[] = { @@ -1504,6 +1514,17 @@ CYBOZU_TEST_AUTO(vaddph) 0x62, 0xf5, 0x7d, 0xbd, 0x79, 0xcb, 0x62, 0xf5, 0x7d, 0xcd, 0x79, 0x48, 0x04, 0x62, 0xf5, 0x7d, 0xdd, 0x79, 0x48, 0x20, + + // vcvttph2uqq + 0x62, 0xf5, 0x7d, 0x08, 0x78, 0xcd, + 0x62, 0xf5, 0x7d, 0x08, 0x78, 0x48, 0x10, + 0x62, 0xf5, 0x7d, 0x18, 0x78, 0x48, 0x20, + 0x62, 0xf5, 0x7d, 0xaa, 0x78, 0xcd, + 0x62, 0xf5, 0x7d, 0x28, 0x78, 0x48, 0x08, + 0x62, 0xf5, 0x7d, 0x38, 0x78, 0x48, 0x20, + 0x62, 0xf5, 0x7d, 0x9d, 0x78, 0xcb, + 0x62, 0xf5, 0x7d, 0xcd, 0x78, 0x48, 0x04, + 0x62, 0xf5, 0x7d, 0xdd, 0x78, 0x48, 0x20, }; const size_t n = sizeof(tbl) / sizeof(tbl[0]); CYBOZU_TEST_EQUAL(c.getSize(), n); diff --git a/xbyak/xbyak_mnemonic.h b/xbyak/xbyak_mnemonic.h index 932515e..3389f5a 100644 --- a/xbyak/xbyak_mnemonic.h +++ b/xbyak/xbyak_mnemonic.h @@ -1924,6 +1924,7 @@ void vcvttpd2udq(const Xmm& x, const Operand& op) { opCvt2(x, op, T_0F | T_EW1 | void vcvttpd2uqq(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_66 | T_0F | T_EW1 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B64, 0x78); } void vcvttph2dq(const Xmm& x, const Operand& op) { checkCvt1(x, op); opVex(x, 0, op, T_N8 | T_N_VL | T_F3 | T_MAP5 | T_EW0 | T_YMM | T_SAE_Y | T_MUST_EVEX | T_B16, 0x5B); } void vcvttph2udq(const Xmm& x, const Operand& op) { checkCvt1(x, op); opVex(x, 0, op, T_N8 | T_N_VL | T_MAP5 | T_EW0 | T_YMM | T_SAE_Y | T_MUST_EVEX | T_B16, 0x78); } +void vcvttph2uqq(const Xmm& x, const Operand& op) { if (!op.isXMM() && !op.isMEM()) XBYAK_THROW(ERR_BAD_MEM_SIZE) opVex(x, 0, op, T_N4 | T_N_VL | T_66 | T_MAP5 | T_EW0 | T_YMM | T_SAE_X | T_MUST_EVEX | T_B16, 0x78); } void vcvttps2qq(const Xmm& x, const Operand& op) { checkCvt1(x, op); opVex(x, 0, op, T_N8 | T_N_VL | T_66 | T_0F | T_EW0 | T_YMM | T_SAE_Y | T_MUST_EVEX | T_B32, 0x7A); } void vcvttps2udq(const Xmm& x, const Operand& op) { opAVX_X_XM_IMM(x, op, T_0F | T_EW0 | T_YMM | T_SAE_Z | T_MUST_EVEX | T_B32, 0x78); } void vcvttps2uqq(const Xmm& x, const Operand& op) { checkCvt1(x, op); opVex(x, 0, op, T_N8 | T_N_VL | T_66 | T_0F | T_EW0 | T_YMM | T_SAE_Y | T_MUST_EVEX | T_B32, 0x78); }